diff options
Diffstat (limited to 'fs/nfs')
68 files changed, 3535 insertions, 1722 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 7df2503cef6c..07932ce9246c 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -2,8 +2,10 @@ config NFS_FS tristate "NFS client support" depends on INET && FILE_LOCKING && MULTIUSER + select CRC32 select LOCKD select SUNRPC + select NFS_COMMON select NFS_ACL_SUPPORT if NFS_V3_ACL help Choose Y here if you want to access files residing on other @@ -33,12 +35,12 @@ config NFS_FS config NFS_V2 tristate "NFS client support for NFS version 2" depends on NFS_FS - default y + default n help This option enables support for version 2 of the NFS protocol (RFC 1094) in the kernel's NFS client. - If unsure, say Y. + If unsure, say N. config NFS_V3 tristate "NFS client support for NFS version 3" @@ -125,7 +127,7 @@ config PNFS_BLOCK config PNFS_FLEXFILE_LAYOUT tristate - depends on NFS_V4_1 && NFS_V3 + depends on NFS_V4_1 default NFS_V4 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN @@ -169,8 +171,9 @@ config ROOT_NFS config NFS_FSCACHE bool "Provide NFS client caching support" - depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y + depends on NFS_FS select NETFS_SUPPORT + select FSCACHE help Say Y here if you want NFS data to be cached locally on disc through the general filesystem cache manager @@ -194,7 +197,6 @@ config NFS_USE_KERNEL_DNS config NFS_DEBUG bool depends on NFS_FS && SUNRPC_DEBUG - select CRC32 default y config NFS_DISABLE_UDP_SUPPORT diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 5f6db37f461e..9fb2f2cac87e 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -13,6 +13,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o +nfs-$(CONFIG_NFS_LOCALIO) += localio.o obj-$(CONFIG_NFS_V2) += nfsv2.o nfsv2-y := nfs2super.o proc.o nfs2xdr.o diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 6be13e0ec170..5d6edafbed20 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -149,8 +149,8 @@ do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect, /* limit length to what the device mapping allows */ end = disk_addr + *len; - if (end >= map->start + map->len) - *len = map->start + map->len - disk_addr; + if (end >= map->disk_offset + map->len) + *len = map->disk_offset + map->len - disk_addr; retry: if (!bio) { @@ -564,25 +564,45 @@ bl_find_get_deviceid(struct nfs_server *server, gfp_t gfp_mask) { struct nfs4_deviceid_node *node; - unsigned long start, end; + int err = -ENODEV; retry: node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); if (!node) return ERR_PTR(-ENODEV); - if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) - return node; + /* + * Devices that are marked unavailable are left in the cache with a + * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or + * constantly attempting to register the device. Once marked as + * unavailable they must be deleted and never reused. + */ + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { + unsigned long end = jiffies; + unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT; + + if (!time_in_range(node->timestamp_unavailable, start, end)) { + /* Uncork subsequent GETDEVINFO operations for this device */ + nfs4_delete_deviceid(node->ld, node->nfs_client, id); + goto retry; + } + goto out_put; + } - end = jiffies; - start = end - PNFS_DEVICE_RETRY_TIMEOUT; - if (!time_in_range(node->timestamp_unavailable, start, end)) { - nfs4_delete_deviceid(node->ld, node->nfs_client, id); - goto retry; + if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) { + /* + * If we cannot register, treat this device as transient: + * Make a negative cache entry for the device + */ + nfs4_mark_deviceid_unavailable(node); + goto out_put; } + return node; + +out_put: nfs4_put_deviceid_node(node); - return ERR_PTR(-ENODEV); + return ERR_PTR(err); } static int diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 716bc75e9ed2..6da40ca19570 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -104,20 +104,26 @@ struct pnfs_block_dev { u64 start; u64 len; + enum pnfs_block_volume_type type; u32 nr_children; struct pnfs_block_dev *children; u64 chunk_size; - struct block_device *bdev; + struct file *bdev_file; u64 disk_offset; + unsigned long flags; u64 pr_key; - bool pr_registered; bool (*map)(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev_map *map); }; +/* pnfs_block_dev flag bits */ +enum { + PNFS_BDEV_REGISTERED = 0, +}; + /* sector_t fields are all in 512-byte sectors */ struct pnfs_block_extent { union { @@ -172,6 +178,7 @@ struct bl_msg_hdr { #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ /* dev.c */ +bool bl_register_dev(struct pnfs_block_dev *d); struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, gfp_t gfp_mask); void bl_free_deviceid_node(struct nfs4_deviceid_node *d); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 65cbb5607a5f..44306ac22353 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -10,12 +10,81 @@ #include <linux/pr.h> #include "blocklayout.h" +#include "../nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD +static void bl_unregister_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; + + status = ops->pr_register(bdev, dev->pr_key, 0, false); + if (status) + trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status); + else + trace_bl_pr_key_unreg(bdev, dev->pr_key); +} + +static bool bl_register_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; + + if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + return true; + + status = ops->pr_register(bdev, 0, dev->pr_key, true); + if (status) { + trace_bl_pr_key_reg_err(bdev, dev->pr_key, status); + return false; + } + trace_bl_pr_key_reg(bdev, dev->pr_key); + return true; +} + +static void bl_unregister_dev(struct pnfs_block_dev *dev) +{ + u32 i; + + if (dev->nr_children) { + for (i = 0; i < dev->nr_children; i++) + bl_unregister_dev(&dev->children[i]); + return; + } + + if (dev->type == PNFS_BLOCK_VOLUME_SCSI && + test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + bl_unregister_scsi(dev); +} + +bool bl_register_dev(struct pnfs_block_dev *dev) +{ + u32 i; + + if (dev->nr_children) { + for (i = 0; i < dev->nr_children; i++) { + if (!bl_register_dev(&dev->children[i])) { + while (i > 0) + bl_unregister_dev(&dev->children[--i]); + return false; + } + } + return true; + } + + if (dev->type == PNFS_BLOCK_VOLUME_SCSI) + return bl_register_scsi(dev); + return true; +} + static void bl_free_device(struct pnfs_block_dev *dev) { + bl_unregister_dev(dev); + if (dev->nr_children) { int i; @@ -23,19 +92,8 @@ bl_free_device(struct pnfs_block_dev *dev) bl_free_device(&dev->children[i]); kfree(dev->children); } else { - if (dev->pr_registered) { - const struct pr_ops *ops = - dev->bdev->bd_disk->fops->pr_ops; - int error; - - error = ops->pr_register(dev->bdev, dev->pr_key, 0, - false); - if (error) - pr_err("failed to unregister PR key.\n"); - } - - if (dev->bdev) - blkdev_put(dev->bdev, NULL); + if (dev->bdev_file) + fput(dev->bdev_file); } } @@ -169,7 +227,7 @@ static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, map->start = dev->start; map->len = dev->len; map->disk_offset = dev->disk_offset; - map->bdev = dev->bdev; + map->bdev = file_bdev(dev->bdev_file); return true; } @@ -199,10 +257,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev *child; u64 chunk; u32 chunk_idx; + u64 disk_chunk; u64 disk_offset; chunk = div_u64(offset, dev->chunk_size); - div_u64_rem(chunk, dev->nr_children, &chunk_idx); + disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx); if (chunk_idx >= dev->nr_children) { dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", @@ -215,7 +274,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, offset = chunk * dev->chunk_size; /* disk offset of the stripe */ - disk_offset = div_u64(offset, dev->nr_children); + disk_offset = disk_chunk * dev->chunk_size; child = &dev->children[chunk_idx]; child->map(child, disk_offset, map); @@ -236,28 +295,26 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) { struct pnfs_block_volume *v = &volumes[idx]; - struct block_device *bdev; + struct file *bdev_file; dev_t dev; dev = bl_resolve_deviceid(server, v, gfp_mask); if (!dev) return -EIO; - bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, - NULL); - if (IS_ERR(bdev)) { + bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, + NULL, NULL); + if (IS_ERR(bdev_file)) { printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", - MAJOR(dev), MINOR(dev), PTR_ERR(bdev)); - return PTR_ERR(bdev); + MAJOR(dev), MINOR(dev), PTR_ERR(bdev_file)); + return PTR_ERR(bdev_file); } - d->bdev = bdev; - - - d->len = bdev_nr_bytes(d->bdev); + d->bdev_file = bdev_file; + d->len = bdev_nr_bytes(file_bdev(bdev_file)); d->map = bl_map_simple; printk(KERN_INFO "pNFS: using block device %s\n", - d->bdev->bd_disk->disk_name); + file_bdev(bdev_file)->bd_disk->disk_name); return 0; } @@ -302,10 +359,10 @@ bl_validate_designator(struct pnfs_block_volume *v) } } -static struct block_device * +static struct file * bl_open_path(struct pnfs_block_volume *v, const char *prefix) { - struct block_device *bdev; + struct file *bdev_file; const char *devname; devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN", @@ -313,15 +370,15 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix) if (!devname) return ERR_PTR(-ENOMEM); - bdev = blkdev_get_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, - NULL); - if (IS_ERR(bdev)) { - pr_warn("pNFS: failed to open device %s (%ld)\n", - devname, PTR_ERR(bdev)); + bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, + NULL, NULL); + if (IS_ERR(bdev_file)) { + dprintk("failed to open device %s (%ld)\n", + devname, PTR_ERR(bdev_file)); } kfree(devname); - return bdev; + return bdev_file; } static int @@ -331,6 +388,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *v = &volumes[idx]; struct block_device *bdev; const struct pr_ops *ops; + struct file *bdev_file; int error; if (!bl_validate_designator(v)) @@ -342,40 +400,38 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, * On other distributions like Debian, the default SCSI by-id path will * point to the dm-multipath device if one exists. */ - bdev = bl_open_path(v, "dm-uuid-mpath-0x"); - if (IS_ERR(bdev)) - bdev = bl_open_path(v, "wwn-0x"); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - d->bdev = bdev; - - d->len = bdev_nr_bytes(d->bdev); + bdev_file = bl_open_path(v, "dm-uuid-mpath-0x"); + if (IS_ERR(bdev_file)) + bdev_file = bl_open_path(v, "wwn-0x"); + if (IS_ERR(bdev_file)) + bdev_file = bl_open_path(v, "nvme-eui."); + if (IS_ERR(bdev_file)) { + pr_warn("pNFS: no device found for volume %*phN\n", + v->scsi.designator_len, v->scsi.designator); + return PTR_ERR(bdev_file); + } + d->bdev_file = bdev_file; + bdev = file_bdev(bdev_file); + + d->len = bdev_nr_bytes(bdev); d->map = bl_map_simple; d->pr_key = v->scsi.pr_key; - pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", - d->bdev->bd_disk->disk_name, d->pr_key); + if (d->len == 0) + return -ENODEV; - ops = d->bdev->bd_disk->fops->pr_ops; + ops = bdev->bd_disk->fops->pr_ops; if (!ops) { pr_err("pNFS: block device %s does not support reservations.", - d->bdev->bd_disk->disk_name); + bdev->bd_disk->disk_name); error = -EINVAL; goto out_blkdev_put; } - error = ops->pr_register(d->bdev, 0, d->pr_key, true); - if (error) { - pr_err("pNFS: failed to register key for block device %s.", - d->bdev->bd_disk->disk_name); - goto out_blkdev_put; - } - - d->pr_registered = true; return 0; out_blkdev_put: - blkdev_put(d->bdev, NULL); + fput(d->bdev_file); return error; } @@ -457,7 +513,9 @@ static int bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) { - switch (volumes[idx].type) { + d->type = volumes[idx].type; + + switch (d->type) { case PNFS_BLOCK_VOLUME_SIMPLE: return bl_parse_simple(server, d, volumes, idx, gfp_mask); case PNFS_BLOCK_VOLUME_SLICE: @@ -469,7 +527,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, case PNFS_BLOCK_VOLUME_SCSI: return bl_parse_scsi(server, d, volumes, idx, gfp_mask); default: - dprintk("unsupported volume type: %d\n", volumes[idx].type); + dprintk("unsupported volume type: %d\n", d->type); return -EIO; } } diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 8f7cff7a4293..0add0f329816 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -552,6 +552,15 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, return ret; } +/** + * ext_tree_prepare_commit - encode extents that need to be committed + * @arg: layout commit data + * + * Return values: + * %0: Success, all required extents are encoded + * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit + * %-ENOMEM: Out of memory, extents not encoded + */ int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) { @@ -568,12 +577,12 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) start_p = page_address(arg->layoutupdate_page); arg->layoutupdate_pages = &arg->layoutupdate_page; -retry: - ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + &count, &arg->lastbytewritten); if (unlikely(ret)) { ext_tree_free_commitdata(arg, buffer_size); - buffer_size = ext_tree_layoutupdate_size(bl, count); + buffer_size = NFS_SERVER(arg->inode)->wsize; count = 0; arg->layoutupdate_pages = @@ -588,7 +597,8 @@ retry: return -ENOMEM; } - goto retry; + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + &count, &arg->lastbytewritten); } *start_p = cpu_to_be32(count); @@ -608,7 +618,7 @@ retry: } dprintk("%s found %zu ranges\n", __func__, count); - return 0; + return ret; } void diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index 6c977288cc28..d8d50a88de04 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -75,7 +75,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, msg->len = sizeof(*bl_msg) + b->simple.len; msg->data = kzalloc(msg->len, gfp_mask); if (!msg->data) - goto out_free_data; + goto out_unlock; bl_msg = msg->data; bl_msg->type = BL_DEVICE_MOUNT; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 869c88978899..6cf92498a5ac 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -76,9 +76,11 @@ nfs4_callback_svc(void *vrqstp) { struct svc_rqst *rqstp = vrqstp; + svc_thread_init_status(rqstp, 0); + set_freezable(); - while (!kthread_freezable_should_stop(NULL)) + while (!svc_thread_should_stop(rqstp)) svc_recv(rqstp); svc_exit_thread(rqstp); @@ -86,45 +88,6 @@ nfs4_callback_svc(void *vrqstp) } #if defined(CONFIG_NFS_V4_1) -/* - * The callback service for NFSv4.1 callbacks - */ -static int -nfs41_callback_svc(void *vrqstp) -{ - struct svc_rqst *rqstp = vrqstp; - struct svc_serv *serv = rqstp->rq_server; - struct rpc_rqst *req; - int error; - DEFINE_WAIT(wq); - - set_freezable(); - - while (!kthread_freezable_should_stop(NULL)) { - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE); - spin_lock_bh(&serv->sv_cb_lock); - if (!list_empty(&serv->sv_cb_list)) { - req = list_first_entry(&serv->sv_cb_list, - struct rpc_rqst, rq_bc_list); - list_del(&req->rq_bc_list); - spin_unlock_bh(&serv->sv_cb_lock); - finish_wait(&serv->sv_cb_waitq, &wq); - dprintk("Invoking bc_svc_process()\n"); - error = bc_svc_process(serv, req, rqstp); - dprintk("bc_svc_process() returned w/ error code= %d\n", - error); - } else { - spin_unlock_bh(&serv->sv_cb_lock); - if (!kthread_should_stop()) - schedule(); - finish_wait(&serv->sv_cb_waitq, &wq); - } - } - - svc_exit_thread(rqstp); - return 0; -} - static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { @@ -226,7 +189,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) * Check whether we're already up and running. */ if (cb_info->serv) - return svc_get(cb_info->serv); + return cb_info->serv; /* * Sanity check: if there's no task, @@ -237,10 +200,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) cb_info->users); threadfn = nfs4_callback_svc; -#if defined(CONFIG_NFS_V4_1) - if (minorversion) - threadfn = nfs41_callback_svc; -#else +#if !defined(CONFIG_NFS_V4_1) if (minorversion) return ERR_PTR(-ENOTSUPP); #endif @@ -287,9 +247,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) cb_info->users++; err_net: - if (!cb_info->users) - cb_info->serv = NULL; - svc_put(serv); + if (!cb_info->users) { + svc_set_num_threads(cb_info->serv, NULL, 0); + svc_destroy(&cb_info->serv); + } err_create: mutex_unlock(&nfs_callback_mutex); return ret; @@ -313,11 +274,9 @@ void nfs_callback_down(int minorversion, struct net *net) nfs_callback_down_net(minorversion, serv, net); cb_info->users--; if (cb_info->users == 0) { - svc_get(serv); svc_set_num_threads(serv, NULL, 0); - svc_put(serv); dprintk("nfs_callback_down: service destroyed\n"); - cb_info->serv = NULL; + svc_destroy(&cb_info->serv); } mutex_unlock(&nfs_callback_mutex); } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index ccd4f245cae2..154a6ed1299f 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -19,32 +19,14 @@ enum nfs4_callback_procnum { CB_COMPOUND = 1, }; -enum nfs4_callback_opnum { - OP_CB_GETATTR = 3, - OP_CB_RECALL = 4, -/* Callback operations new to NFSv4.1 */ - OP_CB_LAYOUTRECALL = 5, - OP_CB_NOTIFY = 6, - OP_CB_PUSH_DELEG = 7, - OP_CB_RECALL_ANY = 8, - OP_CB_RECALLABLE_OBJ_AVAIL = 9, - OP_CB_RECALL_SLOT = 10, - OP_CB_SEQUENCE = 11, - OP_CB_WANTS_CANCELLED = 12, - OP_CB_NOTIFY_LOCK = 13, - OP_CB_NOTIFY_DEVICEID = 14, -/* Callback operations new to NFSv4.2 */ - OP_CB_OFFLOAD = 15, - OP_CB_ILLEGAL = 10044, -}; - struct nfs4_slot; struct cb_process_state { - __be32 drc_status; struct nfs_client *clp; struct nfs4_slot *slot; - u32 minorversion; struct net *net; + u32 minorversion; + __be32 drc_status; + unsigned int referring_calls; }; struct cb_compound_hdr_arg { @@ -64,14 +46,15 @@ struct cb_compound_hdr_res { struct cb_getattrargs { struct nfs_fh fh; - uint32_t bitmap[2]; + uint32_t bitmap[3]; }; struct cb_getattrres { __be32 status; - uint32_t bitmap[2]; + uint32_t bitmap[3]; uint64_t size; uint64_t change_attr; + struct timespec64 atime; struct timespec64 ctime; struct timespec64 mtime; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 6bed1394d748..7832fb0369a1 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -37,7 +37,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ goto out; - res->bitmap[0] = res->bitmap[1] = 0; + memset(res->bitmap, 0, sizeof(res->bitmap)); res->status = htonl(NFS4ERR_BADHANDLE); dprintk_rcu("NFS: GETATTR callback request from %s\n", @@ -59,12 +59,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, res->change_attr = delegation->change_attr; if (nfs_have_writebacks(inode)) res->change_attr++; + res->atime = inode_get_atime(inode); res->ctime = inode_get_ctime(inode); - res->mtime = inode->i_mtime; - res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & - args->bitmap[0]; - res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & - args->bitmap[1]; + res->mtime = inode_get_mtime(inode); + res->bitmap[0] = (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE) & + args->bitmap[0]; + res->bitmap[1] = (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_METADATA | + FATTR4_WORD1_TIME_MODIFY) & args->bitmap[1]; + res->bitmap[2] = (FATTR4_WORD2_TIME_DELEG_ACCESS | + FATTR4_WORD2_TIME_DELEG_MODIFY) & args->bitmap[2]; res->status = 0; out_iput: rcu_read_unlock(); @@ -207,7 +211,8 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp, * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing) */ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo, - const nfs4_stateid *new) + const nfs4_stateid *new, + struct cb_process_state *cps) { u32 oldseq, newseq; @@ -221,28 +226,29 @@ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo, newseq = be32_to_cpu(new->seqid); /* Are we already in a layout recall situation? */ - if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && - lo->plh_return_seq != 0) { - if (newseq < lo->plh_return_seq) - return NFS4ERR_OLD_STATEID; - if (newseq > lo->plh_return_seq) - return NFS4ERR_DELAY; - goto out; - } + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return NFS4ERR_DELAY; - /* Check that the stateid matches what we think it should be. */ + /* + * Check that the stateid matches what we think it should be. + * Note that if the server sent us a list of referring calls, + * and we know that those have completed, then we trust the + * stateid argument is correct. + */ oldseq = be32_to_cpu(lo->plh_stateid.seqid); - if (newseq > oldseq + 1) + if (newseq > oldseq + 1 && !cps->referring_calls) return NFS4ERR_DELAY; + /* Crazy server! */ if (newseq <= oldseq) return NFS4ERR_OLD_STATEID; -out: + return NFS_OK; } static u32 initiate_file_draining(struct nfs_client *clp, - struct cb_layoutrecallargs *args) + struct cb_layoutrecallargs *args, + struct cb_process_state *cps) { struct inode *ino; struct pnfs_layout_hdr *lo; @@ -266,7 +272,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, goto out; } pnfs_get_layout_hdr(lo); - rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid); + rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid, cps); if (rv != NFS_OK) goto unlock; @@ -317,19 +323,21 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, int stat; if (args->cbl_recall_type == RETURN_FSID) - stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true); + stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid, + PNFS_LAYOUT_BULK_RETURN); else - stat = pnfs_destroy_layouts_byclid(clp, true); + stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN); if (stat != 0) return NFS4ERR_DELAY; return NFS4ERR_NOMATCHING_LAYOUT; } static u32 do_callback_layoutrecall(struct nfs_client *clp, - struct cb_layoutrecallargs *args) + struct cb_layoutrecallargs *args, + struct cb_process_state *cps) { if (args->cbl_recall_type == RETURN_FILE) - return initiate_file_draining(clp, args); + return initiate_file_draining(clp, args, cps); return initiate_bulk_draining(clp, args); } @@ -340,11 +348,12 @@ __be32 nfs4_callback_layoutrecall(void *argp, void *resp, u32 res = NFS4ERR_OP_NOT_IN_SESSION; if (cps->clp) - res = do_callback_layoutrecall(cps->clp, args); + res = do_callback_layoutrecall(cps->clp, args, cps); return cpu_to_be32(res); } -static void pnfs_recall_all_layouts(struct nfs_client *clp) +static void pnfs_recall_all_layouts(struct nfs_client *clp, + struct cb_process_state *cps) { struct cb_layoutrecallargs args; @@ -352,7 +361,7 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp) memset(&args, 0, sizeof(args)); args.cbl_recall_type = RETURN_ALL; /* FIXME we ignore errors, what should we do? */ - do_callback_layoutrecall(clp, &args); + do_callback_layoutrecall(clp, &args, cps); } __be32 nfs4_callback_devicenotify(void *argp, void *resp, @@ -450,6 +459,7 @@ static int referring_call_exists(struct nfs_client *clp, __acquires(lock) { int status = 0; + int found = 0; int i, j; struct nfs4_session *session; struct nfs4_slot_table *tbl; @@ -478,11 +488,12 @@ static int referring_call_exists(struct nfs_client *clp, spin_lock(lock); if (status) goto out; + found++; } } out: - return status; + return status < 0 ? status : found; } __be32 nfs4_callback_sequence(void *argp, void *resp, @@ -493,6 +504,7 @@ __be32 nfs4_callback_sequence(void *argp, void *resp, struct nfs4_slot_table *tbl; struct nfs4_slot *slot; struct nfs_client *clp; + int ret; int i; __be32 status = htonl(NFS4ERR_BADSESSION); @@ -552,11 +564,13 @@ __be32 nfs4_callback_sequence(void *argp, void *resp, * related callback was received before the response to the original * call. */ - if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists, - &tbl->slot_tbl_lock) < 0) { + ret = referring_call_exists(clp, args->csa_nrclists, args->csa_rclists, + &tbl->slot_tbl_lock); + if (ret < 0) { status = htonl(NFS4ERR_DELAY); goto out_unlock; } + cps->referring_calls = ret; /* * RFC5661 20.9.3 @@ -617,7 +631,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp, nfs_expire_unused_delegation_types(cps->clp, flags); if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT)) - pnfs_recall_all_layouts(cps->clp); + pnfs_recall_all_layouts(cps->clp, cps); if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) { set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d5f6437da352..fdeb0b34a3d3 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -25,8 +25,9 @@ #define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps #define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ CB_OP_GETATTR_BITMAP_MAXSZ + \ - /* change, size, ctime, mtime */\ - (2 + 2 + 3 + 3) * 4) + /* change, size, atime, ctime, + * mtime, deleg_atime, deleg_mtime */\ + (2 + 2 + 3 + 3 + 3 + 3 + 3) * 4) #define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #if defined(CONFIG_NFS_V4_1) @@ -117,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) if (likely(attrlen > 0)) bitmap[0] = ntohl(*p++); if (attrlen > 1) - bitmap[1] = ntohl(*p); + bitmap[1] = ntohl(*p++); + if (attrlen > 2) + bitmap[2] = ntohl(*p); return 0; } @@ -447,7 +450,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, void *argp) { struct cb_recallanyargs *args = argp; - uint32_t bitmap[2]; + uint32_t bitmap[3]; __be32 *p, status; p = xdr_inline_decode(xdr, 4); @@ -637,6 +640,13 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 * return 0; } +static __be32 encode_attr_atime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) + return 0; + return encode_attr_time(xdr,time); +} + static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) { if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) @@ -651,6 +661,24 @@ static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, return encode_attr_time(xdr,time); } +static __be32 encode_attr_delegatime(struct xdr_stream *xdr, + const uint32_t *bitmap, + const struct timespec64 *time) +{ + if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)) + return 0; + return encode_attr_time(xdr,time); +} + +static __be32 encode_attr_delegmtime(struct xdr_stream *xdr, + const uint32_t *bitmap, + const struct timespec64 *time) +{ + if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY)) + return 0; + return encode_attr_time(xdr,time); +} + static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr) { __be32 status; @@ -701,10 +729,19 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, status = encode_attr_size(xdr, res->bitmap, res->size); if (unlikely(status != 0)) goto out; + status = encode_attr_atime(xdr, res->bitmap, &res->atime); + if (unlikely(status != 0)) + goto out; status = encode_attr_ctime(xdr, res->bitmap, &res->ctime); if (unlikely(status != 0)) goto out; status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_delegatime(xdr, res->bitmap, &res->atime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_delegmtime(xdr, res->bitmap, &res->mtime); *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); out: return status; @@ -969,6 +1006,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) nops--; } + if (svc_is_backchannel(rqstp) && cps.clp) { + rqstp->bc_to_initval = cps.clp->cl_rpcclient->cl_timeout->to_initval; + rqstp->bc_to_retries = cps.clp->cl_rpcclient->cl_timeout->to_retries; + } + *hdr_res.status = status; *hdr_res.nops = htonl(nops); nfs4_cb_free_slot(&cps); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 62607d52bfa5..17edc124d03f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -76,10 +76,6 @@ const struct rpc_program nfs_program = { .pipe_dir_name = NFS_PIPE_DIRNAME, }; -struct rpc_stat nfs_rpcstat = { - .program = &nfs_program -}; - static struct nfs_subversion *find_nfs_version(unsigned int version) { struct nfs_subversion *nfs; @@ -182,6 +178,13 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1; clp->cl_net = get_net(cl_init->net); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + seqlock_init(&clp->cl_boot_lock); + ktime_get_real_ts64(&clp->cl_nfssvc_boot); + nfs_uuid_init(&clp->cl_uuid); + spin_lock_init(&clp->cl_localio_lock); +#endif /* CONFIG_NFS_LOCALIO */ + clp->cl_principal = "*"; clp->cl_xprtsec = cl_init->xprtsec; return clp; @@ -237,6 +240,8 @@ static void pnfs_init_server(struct nfs_server *server) */ void nfs_free_client(struct nfs_client *clp) { + nfs_local_disable(clp); + /* -EIO all pending I/O */ if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); @@ -245,7 +250,7 @@ void nfs_free_client(struct nfs_client *clp) put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp->cl_acceptor); - kfree(clp); + kfree_rcu(clp, rcu); } EXPORT_SYMBOL_GPL(nfs_free_client); @@ -428,7 +433,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) list_add_tail(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); - return rpc_ops->init_client(new, cl_init); + new = rpc_ops->init_client(new, cl_init); + if (!IS_ERR(new)) + nfs_local_probe(new); + return new; } spin_unlock(&nn->nfs_client_lock); @@ -668,6 +676,44 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp, } EXPORT_SYMBOL_GPL(nfs_init_client); +static void nfs4_server_set_init_caps(struct nfs_server *server) +{ +#if IS_ENABLED(CONFIG_NFS_V4) + /* Set the basic capabilities */ + server->caps = server->nfs_client->cl_mvops->init_caps; + if (server->flags & NFS_MOUNT_NORDIRPLUS) + server->caps &= ~NFS_CAP_READDIRPLUS; + if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) + server->caps &= ~NFS_CAP_READ_PLUS; + + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && + server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; +#endif +} + +void nfs_server_set_init_caps(struct nfs_server *server) +{ + switch (server->nfs_client->rpc_ops->version) { + case 2: + server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + break; + case 3: + server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + if (!(server->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + break; + default: + nfs4_server_set_init_caps(server); + break; + } +} +EXPORT_SYMBOL_GPL(nfs_server_set_init_caps); + /* * Create a version 2 or 3 client */ @@ -709,7 +755,6 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = ctx->flags; server->options = ctx->options; - server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; switch (clp->rpc_ops->version) { case 2: @@ -745,6 +790,8 @@ static int nfs_init_server(struct nfs_server *server, if (error < 0) goto error; + nfs_server_set_init_caps(server); + /* Preserve the values of mount_server-related mount options */ if (ctx->mount_server.addrlen) { memcpy(&server->mountd_address, &ctx->mount_server.address, @@ -919,7 +966,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; target->acdirmax = source->acdirmax; - target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; target->port = source->port; @@ -999,8 +1045,11 @@ struct nfs_server *nfs_alloc_server(void) server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; - ida_init(&server->openowner_id); - ida_init(&server->lockowner_id); + init_waitqueue_head(&server->write_congestion_wait); + atomic_long_set(&server->writeback, 0); + + atomic64_set(&server->owner_ctr, 0); + pnfs_init_server(server); rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC"); @@ -1008,6 +1057,14 @@ struct nfs_server *nfs_alloc_server(void) } EXPORT_SYMBOL_GPL(nfs_alloc_server); +static void delayed_free(struct rcu_head *p) +{ + struct nfs_server *server = container_of(p, struct nfs_server, rcu); + + nfs_free_iostats(server->io_stats); + kfree(server); +} + /* * Free up a server record */ @@ -1031,12 +1088,9 @@ void nfs_free_server(struct nfs_server *server) } ida_free(&s_sysfs_ids, server->s_sysfs_id); - ida_destroy(&server->lockowner_id); - ida_destroy(&server->openowner_id); - nfs_free_iostats(server->io_stats); put_cred(server->cred); - kfree(server); nfs_release_automount_timer(); + call_rcu(&server->rcu, delayed_free); } EXPORT_SYMBOL_GPL(nfs_free_server); @@ -1080,6 +1134,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) server->namelen = NFS2_MAXNAMLEN; } + /* Linux 'subtree_check' borkenness mandates this setting */ + server->fh_expire_type = NFS_FH_VOL_RENAME; if (!(fattr->valid & NFS_ATTR_FATTR)) { error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, @@ -1143,6 +1199,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, if (error < 0) goto out_free_server; + nfs_server_set_init_caps(server); + /* probe the filesystem info for this server filesystem */ error = nfs_probe_server(server, fh); if (error < 0) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 4bf2526a3a18..8bdbc4dca89c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -79,14 +79,14 @@ static void nfs_mark_return_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags); set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } -static bool -nfs4_is_valid_delegation(const struct nfs_delegation *delegation, - fmode_t flags) +static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation, + fmode_t type) { - if (delegation != NULL && (delegation->type & flags) == flags && + if (delegation != NULL && (delegation->type & type) == type && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) return true; @@ -103,19 +103,22 @@ struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode) return NULL; } -static int -nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) +static int nfs4_do_check_delegation(struct inode *inode, fmode_t type, + int flags, bool mark) { struct nfs_delegation *delegation; int ret = 0; - flags &= FMODE_READ|FMODE_WRITE; + type &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (nfs4_is_valid_delegation(delegation, flags)) { + if (nfs4_is_valid_delegation(delegation, type)) { if (mark) nfs_mark_delegation_referenced(delegation); ret = 1; + if ((flags & NFS_DELEGATION_FLAG_TIME) && + !test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + ret = 0; } rcu_read_unlock(); return ret; @@ -124,22 +127,23 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) * nfs4_have_delegation - check if inode has a delegation, mark it * NFS_DELEGATION_REFERENCED if there is one. * @inode: inode to check - * @flags: delegation types to check for + * @type: delegation types to check for + * @flags: various modifiers * * Returns one if inode has the indicated delegation, otherwise zero. */ -int nfs4_have_delegation(struct inode *inode, fmode_t flags) +int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags) { - return nfs4_do_check_delegation(inode, flags, true); + return nfs4_do_check_delegation(inode, type, flags, true); } /* * nfs4_check_delegation - check if inode has a delegation, do not mark * NFS_DELEGATION_REFERENCED if it has one. */ -int nfs4_check_delegation(struct inode *inode, fmode_t flags) +int nfs4_check_delegation(struct inode *inode, fmode_t type) { - return nfs4_do_check_delegation(inode, flags, false); + return nfs4_do_check_delegation(inode, type, 0, false); } static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid) @@ -156,8 +160,8 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state list = &flctx->flc_posix; spin_lock(&flctx->flc_lock); restart: - list_for_each_entry(fl, list, fl_list) { - if (nfs_file_open_context(fl->fl_file)->state != state) + for_each_file_lock(fl, list) { + if (nfs_file_open_context(fl->c.flc_file)->state != state) continue; spin_unlock(&flctx->flc_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); @@ -181,7 +185,6 @@ static int nfs_delegation_claim_opens(struct inode *inode, struct nfs_open_context *ctx; struct nfs4_state_owner *sp; struct nfs4_state *state; - unsigned int seq; int err; again: @@ -202,12 +205,9 @@ again: sp = state->owner; /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid); if (!err) err = nfs_delegation_claim_locks(state, stateid); - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) - err = -EAGAIN; mutex_unlock(&sp->so_delegreturn_mutex); put_nfs_open_context(ctx); if (err != 0) @@ -225,11 +225,12 @@ again: * @type: delegation type * @stateid: delegation stateid * @pagemod_limit: write delegation "space_limit" + * @deleg_type: raw delegation type * */ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, - unsigned long pagemod_limit) + unsigned long pagemod_limit, u32 deleg_type) { struct nfs_delegation *delegation; const struct cred *oldcred = NULL; @@ -243,6 +244,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, delegation->pagemod_limit = pagemod_limit; oldcred = delegation->cred; delegation->cred = get_cred(cred); + switch (deleg_type) { + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + break; + default: + clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + } clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) @@ -254,11 +263,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, } else { rcu_read_unlock(); nfs_inode_set_delegation(inode, cred, type, stateid, - pagemod_limit); + pagemod_limit, deleg_type); } } -static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) +static int nfs_do_return_delegation(struct inode *inode, + struct nfs_delegation *delegation, + int issync) { const struct cred *cred; int res = 0; @@ -267,9 +278,8 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * spin_lock(&delegation->lock); cred = get_cred(delegation->cred); spin_unlock(&delegation->lock); - res = nfs4_proc_delegreturn(inode, cred, - &delegation->stateid, - issync); + res = nfs4_proc_delegreturn(inode, cred, &delegation->stateid, + delegation, issync); put_cred(cred); } return res; @@ -297,7 +307,8 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi) if (delegation == NULL) goto out; spin_lock(&delegation->lock); - if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + if (delegation->inode && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); /* Refcount matched in nfs_end_delegation_return() */ ret = nfs_get_delegation(delegation); @@ -321,14 +332,16 @@ nfs_start_delegation_return(struct nfs_inode *nfsi) } static void nfs_abort_delegation_return(struct nfs_delegation *delegation, - struct nfs_client *clp, int err) + struct nfs_server *server, int err) { - spin_lock(&delegation->lock); clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); if (err == -EAGAIN) { set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state); + set_bit(NFS4SERV_DELEGRETURN_DELAYED, + &server->delegation_flags); + set_bit(NFS4CLNT_DELEGRETURN_DELAYED, + &server->nfs_client->cl_state); } spin_unlock(&delegation->lock); } @@ -422,13 +435,13 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, * @type: delegation type * @stateid: delegation stateid * @pagemod_limit: write delegation "space_limit" + * @deleg_type: raw delegation type * * Returns zero on success, or a negative errno value. */ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, - const nfs4_stateid *stateid, - unsigned long pagemod_limit) + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; @@ -448,6 +461,12 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, delegation->cred = get_cred(cred); delegation->inode = inode; delegation->flags = 1<<NFS_DELEGATION_REFERENCED; + switch (deleg_type) { + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + delegation->flags |= BIT(NFS_DELEGATION_DELEGTIME); + } + delegation->test_gen = 0; spin_lock_init(&delegation->lock); spin_lock(&clp->cl_lock); @@ -511,6 +530,11 @@ add_new: atomic_long_inc(&nfs_active_delegations); trace_nfs4_set_delegation(inode, type); + + /* If we hold writebacks and have delegated mtime then update */ + if (deleg_type == NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG && + nfs_have_writebacks(inode)) + nfs_update_delegated_mtime(inode); out: spin_unlock(&clp->cl_lock); if (delegation != NULL) @@ -527,7 +551,7 @@ out: */ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); unsigned int mode = O_WRONLY | O_RDWR; int err = 0; @@ -549,11 +573,11 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation /* * Guard against state recovery */ - err = nfs4_wait_clnt_recover(clp); + err = nfs4_wait_clnt_recover(server->nfs_client); } if (err) { - nfs_abort_delegation_return(delegation, clp, err); + nfs_abort_delegation_return(delegation, server, err); goto out; } @@ -570,17 +594,6 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; - else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) { - struct inode *inode; - - spin_lock(&delegation->lock); - inode = delegation->inode; - if (inode && list_empty(&NFS_I(inode)->open_files)) - ret = true; - spin_unlock(&delegation->lock); - } - if (ret) - clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) || test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) @@ -599,6 +612,9 @@ static int nfs_server_return_marked_delegations(struct nfs_server *server, struct nfs_delegation *place_holder_deleg = NULL; int err = 0; + if (!test_and_clear_bit(NFS4SERV_DELEGRETURN, + &server->delegation_flags)) + return 0; restart: /* * To avoid quadratic looping we hold a reference @@ -650,6 +666,7 @@ restart: cond_resched(); if (!err) goto restart; + set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags); set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); goto out; } @@ -664,6 +681,9 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) struct nfs_delegation *d; bool ret = false; + if (!test_and_clear_bit(NFS4SERV_DELEGRETURN_DELAYED, + &server->delegation_flags)) + goto out; list_for_each_entry_rcu (d, &server->delegations, super_list) { if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags)) continue; @@ -671,6 +691,7 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags); ret = true; } +out: return ret; } @@ -761,6 +782,43 @@ int nfs4_inode_return_delegation(struct inode *inode) } /** + * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation + * @inode: inode to process + * + * This routine is called to request that the delegation be returned as soon + * as the file is closed. If the file is already closed, the delegation is + * immediately returned. + */ +void nfs4_inode_set_return_delegation_on_close(struct inode *inode) +{ + struct nfs_delegation *delegation; + struct nfs_delegation *ret = NULL; + + if (!inode) + return; + rcu_read_lock(); + delegation = nfs4_get_valid_delegation(inode); + if (!delegation) + goto out; + spin_lock(&delegation->lock); + if (!delegation->inode) + goto out_unlock; + if (list_empty(&NFS_I(inode)->open_files) && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + /* Refcount matched in nfs_end_delegation_return() */ + ret = nfs_get_delegation(delegation); + } else + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); +out_unlock: + spin_unlock(&delegation->lock); + if (ret) + nfs_clear_verifier_delegated(inode); +out: + rcu_read_unlock(); + nfs_end_delegation_return(inode, ret, 0); +} + +/** * nfs4_inode_return_delegation_on_close - asynchronously return a delegation * @inode: inode to process * @@ -821,11 +879,25 @@ int nfs4_inode_make_writeable(struct inode *inode) return nfs4_inode_return_delegation(inode); } -static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, - struct nfs_delegation *delegation) +static void +nfs_mark_return_if_closed_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); + struct inode *inode; + + if (test_bit(NFS_DELEGATION_RETURN, &delegation->flags) || + test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) + return; + spin_lock(&delegation->lock); + inode = delegation->inode; + if (!inode) + goto out; + if (list_empty(&NFS_I(inode)->open_files)) + nfs_mark_return_delegation(server, delegation); + else + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); +out: + spin_unlock(&delegation->lock); } static bool nfs_server_mark_return_all_delegations(struct nfs_server *server) @@ -1219,6 +1291,7 @@ static void nfs_mark_test_expired_delegation(struct nfs_server *server, return; clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); + set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags); set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state); } @@ -1295,6 +1368,11 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, struct inode *inode; const struct cred *cred; nfs4_stateid stateid; + unsigned long gen = ++server->delegation_gen; + + if (!test_and_clear_bit(NFS4SERV_DELEGATION_EXPIRED, + &server->delegation_flags)) + return 0; restart: rcu_read_lock(); list_for_each_entry_rcu(delegation, &server->delegations, super_list) { @@ -1303,7 +1381,8 @@ restart: test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_TEST_EXPIRED, - &delegation->flags) == 0) + &delegation->flags) == 0 || + delegation->test_gen == gen) continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) @@ -1312,6 +1391,7 @@ restart: cred = get_cred_rcu(delegation->cred); nfs4_stateid_copy(&stateid, &delegation->stateid); spin_unlock(&delegation->lock); + delegation->test_gen = gen; clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); rcu_read_unlock(); nfs_delegation_test_free_expired(inode, &stateid, cred); @@ -1322,6 +1402,9 @@ restart: goto restart; } nfs_inode_mark_test_expired_delegation(server,inode); + set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags); + set_bit(NFS4CLNT_DELEGATION_EXPIRED, + &server->nfs_client->cl_state); iput(inode); return -EAGAIN; } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 1c378992b7c0..8ff5ab9c5c25 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -21,6 +21,7 @@ struct nfs_delegation { fmode_t type; unsigned long pagemod_limit; __u64 change_attr; + unsigned long test_gen; unsigned long flags; refcount_t refcount; spinlock_t lock; @@ -37,14 +38,18 @@ enum { NFS_DELEGATION_TEST_EXPIRED, NFS_DELEGATION_INODE_FREEING, NFS_DELEGATION_RETURN_DELAYED, + NFS_DELEGATION_DELEGTIME, }; int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type); void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type); int nfs4_inode_return_delegation(struct inode *inode); void nfs4_inode_return_delegation_on_close(struct inode *inode); +void nfs4_inode_set_return_delegation_on_close(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_evict_delegation(struct inode *inode); @@ -66,7 +71,9 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp); void nfs_reap_expired_delegations(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ -int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync); +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred); @@ -74,8 +81,8 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); -int nfs4_have_delegation(struct inode *inode, fmode_t flags); -int nfs4_check_delegation(struct inode *inode, fmode_t flags); +int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags); +int nfs4_check_delegation(struct inode *inode, fmode_t type); bool nfs4_delegation_flush_on_close(const struct inode *inode); void nfs_inode_find_delegation_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); @@ -83,9 +90,37 @@ int nfs4_inode_make_writeable(struct inode *inode); #endif +#define NFS_DELEGATION_FLAG_TIME BIT(1) + +void nfs_update_delegated_atime(struct inode *inode); +void nfs_update_delegated_mtime(struct inode *inode); +void nfs_update_delegated_mtime_locked(struct inode *inode); + +static inline int nfs_have_read_or_write_delegation(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); +} + +static inline int nfs_have_write_delegation(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, 0); +} + static inline int nfs_have_delegated_attributes(struct inode *inode) { - return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); +} + +static inline int nfs_have_delegated_atime(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, + NFS_DELEGATION_FLAG_TIME); +} + +static inline int nfs_have_delegated_mtime(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, + NFS_DELEGATION_FLAG_TIME); } #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 39f7549afcf5..bbc625e742aa 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -56,6 +56,8 @@ static int nfs_readdir(struct file *, struct dir_context *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct folio *); +static int nfs_do_create(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flags); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, @@ -149,7 +151,7 @@ struct nfs_cache_array { unsigned char folio_full : 1, folio_is_eof : 1, cookies_are_ordered : 1; - struct nfs_cache_array_entry array[]; + struct nfs_cache_array_entry array[] __counted_by(size); }; struct nfs_readdir_descriptor { @@ -326,7 +328,8 @@ static int nfs_readdir_folio_array_append(struct folio *folio, goto out; } - cache_entry = &array->array[array->size]; + array->size++; + cache_entry = &array->array[array->size - 1]; cache_entry->cookie = array->last_cookie; cache_entry->ino = entry->ino; cache_entry->d_type = entry->d_type; @@ -335,7 +338,6 @@ static int nfs_readdir_folio_array_append(struct folio *folio, array->last_cookie = entry->cookie; if (array->last_cookie <= cache_entry->cookie) array->cookies_are_ordered = 0; - array->size++; if (entry->eof != 0) nfs_readdir_array_set_eof(array); out: @@ -1431,11 +1433,11 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry) static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) { struct inode *inode = d_inode(dentry); - struct inode *dir = d_inode(dentry->d_parent); + struct inode *dir = d_inode_rcu(dentry->d_parent); - if (!nfs_verify_change_attribute(dir, verf)) + if (!dir || !nfs_verify_change_attribute(dir, verf)) return; - if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0)) nfs_set_verifier_delegated(&verf); dentry->d_time = verf; } @@ -1835,9 +1837,7 @@ static void block_revalidate(struct dentry *dentry) static void unblock_revalidate(struct dentry *dentry) { - /* store_release ensures wait_var_event() sees the update */ - smp_store_release(&dentry->d_fsdata, NULL); - wake_up_var(&dentry->d_fsdata); + store_release_wake_up(&dentry->d_fsdata, NULL); } /* @@ -2217,6 +2217,8 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, { struct inode *inode; + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); + if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto full_reval; if (d_mountpoint(dentry)) @@ -2264,6 +2266,44 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) #endif /* CONFIG_NFSV4 */ +int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned int open_flags, + umode_t mode) +{ + + /* Same as look+open from lookup_open(), but with different O_TRUNC + * handling. + */ + int error = 0; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + return -ENAMETOOLONG; + + if (open_flags & O_CREAT) { + file->f_mode |= FMODE_CREATED; + error = nfs_do_create(dir, dentry, mode, open_flags); + if (error) + return error; + return finish_open(file, dentry, NULL); + } else if (d_in_lookup(dentry)) { + /* The only flags nfs_lookup considers are + * LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and + * we want those to be zero so the lookup isn't skipped. + */ + struct dentry *res = nfs_lookup(dir, dentry, 0); + + d_lookup_done(dentry); + if (unlikely(res)) { + if (IS_ERR(res)) + return PTR_ERR(res); + return finish_no_open(file, res); + } + } + return finish_no_open(file, NULL); + +} +EXPORT_SYMBOL_GPL(nfs_atomic_open_v23); + struct dentry * nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) @@ -2324,18 +2364,23 @@ EXPORT_SYMBOL_GPL(nfs_instantiate); * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -int nfs_create(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, bool excl) +static int nfs_do_create(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flags) { struct iattr attr; - int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; + open_flags |= O_CREAT; + dfprintk(VFS, "NFS: create(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + if (open_flags & O_TRUNC) { + attr.ia_size = 0; + attr.ia_valid |= ATTR_SIZE; + } trace_nfs_create_enter(dir, dentry, open_flags); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); @@ -2347,6 +2392,12 @@ out_err: d_drop(dentry); return error; } + +int nfs_create(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) +{ + return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0); +} EXPORT_SYMBOL_GPL(nfs_create); /* @@ -2552,7 +2603,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink); int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { - struct page *page; + struct folio *folio; char *kaddr; struct iattr attr; unsigned int pathlen = strlen(symname); @@ -2567,24 +2618,24 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, attr.ia_mode = S_IFLNK | S_IRWXUGO; attr.ia_valid = ATTR_MODE; - page = alloc_page(GFP_USER); - if (!page) + folio = folio_alloc(GFP_USER, 0); + if (!folio) return -ENOMEM; - kaddr = page_address(page); + kaddr = folio_address(folio); memcpy(kaddr, symname, pathlen); if (pathlen < PAGE_SIZE) memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); trace_nfs_symlink_enter(dir, dentry); - error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); + error = NFS_PROTO(dir)->symlink(dir, dentry, folio, pathlen, &attr); trace_nfs_symlink_exit(dir, dentry, error); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, dentry, symname, error); d_drop(dentry); - __free_page(page); + folio_put(folio); return error; } @@ -2594,18 +2645,13 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, * No big deal if we can't add this page to the page cache here. * READLINK will get the missing page from the server if needed. */ - if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0, - GFP_KERNEL)) { - SetPageUptodate(page); - unlock_page(page); - /* - * add_to_page_cache_lru() grabs an extra page refcount. - * Drop it here to avoid leaking this page later. - */ - put_page(page); - } else - __free_page(page); + if (filemap_add_folio(d_inode(dentry)->i_mapping, folio, 0, + GFP_KERNEL) == 0) { + folio_mark_uptodate(folio); + folio_unlock(folio); + } + folio_put(folio); return 0; } EXPORT_SYMBOL_GPL(nfs_symlink); @@ -2642,6 +2688,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) unblock_revalidate(new_dentry); } +static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry, + struct dentry *new_dentry) +{ + struct nfs_server *server = NFS_SB(old_dentry->d_sb); + + if (old_dentry->d_parent != new_dentry->d_parent) + return false; + if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE) + return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN); + return true; +} + /* * RENAME * FIXME: Some nfsds, like the Linux user space nfsd, may generate a @@ -2729,7 +2787,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } - if (S_ISREG(old_inode->i_mode)) + if (S_ISREG(old_inode->i_mode) && + nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry)) nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, must_unblock ? nfs_unblock_rename : NULL); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 258521d5125e..c1f1b826888c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -56,6 +56,7 @@ #include <linux/uaccess.h> #include <linux/atomic.h> +#include "delegation.h" #include "internal.h" #include "iostat.h" #include "pnfs.h" @@ -130,6 +131,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, dreq->count = req_start; } +static void nfs_direct_file_adjust_size_locked(struct inode *inode, + loff_t offset, size_t count) +{ + loff_t newsize = offset + (loff_t)count; + loff_t oldsize = i_size_read(inode); + + if (newsize > oldsize) { + i_size_write(inode, newsize); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; + trace_nfs_size_grow(inode, newsize); + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); + } +} + /** * nfs_swap_rw - NFS address space operation for swap I/O * @iocb: target I/O control block @@ -272,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) nfs_direct_count_bytes(dreq, hdr); spin_unlock(&dreq->lock); + nfs_update_delegated_atime(dreq->inode); + while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; @@ -367,7 +384,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, bytes -= req_len; requested_bytes += req_len; pos += req_len; - dreq->bytes_left -= req_len; } nfs_direct_release_pages(pagevec, npages); kvfree(pagevec); @@ -439,7 +455,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, goto out; dreq->inode = inode; - dreq->bytes_left = dreq->max_count = count; + dreq->max_count = count; dreq->io_start = iocb->ki_pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); @@ -605,6 +621,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) trace_nfs_direct_commit_complete(dreq); + spin_lock(&dreq->lock); if (status < 0) { /* Errors in commit are fatal */ dreq->error = status; @@ -612,6 +629,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) } else { status = dreq->error; } + spin_unlock(&dreq->lock); nfs_init_cinfo_from_dreq(&cinfo, dreq); @@ -624,7 +642,10 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) spin_unlock(&dreq->lock); nfs_release_request(req); } else if (!nfs_write_match_verf(verf, req)) { - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_lock(&dreq->lock); + if (dreq->flags == 0) + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); /* * Despite the reboot, the write was successful, * so reset wb_nio. @@ -728,6 +749,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct inode *inode = dreq->inode; int flags = NFS_ODIRECT_DONE; trace_nfs_direct_write_completion(dreq); @@ -749,6 +771,11 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } spin_unlock(&dreq->lock); + spin_lock(&inode->i_lock); + nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count); + nfs_update_delegated_mtime_locked(dreq->inode); + spin_unlock(&inode->i_lock); + while (!list_empty(&hdr->pages)) { req = nfs_list_entry(hdr->pages.next); @@ -879,7 +906,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, bytes -= req_len; requested_bytes += req_len; pos += req_len; - dreq->bytes_left -= req_len; if (defer) { nfs_mark_request_commit(req, NULL, &cinfo, 0); @@ -986,7 +1012,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, goto out; dreq->inode = inode; - dreq->bytes_left = dreq->max_count = count; + dreq->max_count = count; dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); @@ -1044,8 +1070,7 @@ int __init nfs_init_directcache(void) { nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", sizeof(struct nfs_direct_req), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), + 0, SLAB_RECLAIM_ACCOUNT, NULL); if (nfs_direct_cachep == NULL) return -ENOMEM; diff --git a/fs/nfs/export.c b/fs/nfs/export.c index be686b8e0c54..aeb17adcb2b6 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -66,14 +66,21 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, { struct nfs_fattr *fattr = NULL; struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw); - size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size; + size_t fh_size = offsetof(struct nfs_fh, data); const struct nfs_rpc_ops *rpc_ops; struct dentry *dentry; struct inode *inode; - int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size); + int len = EMBED_FH_OFF; u32 *p = fid->raw; int ret; + /* Initial check of bounds */ + if (fh_len < len + XDR_QUADLEN(fh_size) || + fh_len > XDR_QUADLEN(NFS_MAXFHSIZE)) + return NULL; + /* Calculate embedded filehandle size */ + fh_size += server_fh->size; + len += XDR_QUADLEN(fh_size); /* NULL translates to ESTALE */ if (fh_len < len || fh_type != len) return NULL; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3f9768810427..153d25d4b810 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -29,6 +29,7 @@ #include <linux/pagemap.h> #include <linux/gfp.h> #include <linux/swap.h> +#include <linux/compaction.h> #include <linux/uaccess.h> #include <linux/filelock.h> @@ -336,9 +337,10 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio, * increment the page use counts until he is done with the page. */ static int nfs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep, + loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { + fgf_t fgp = FGP_WRITEBEGIN; struct folio *folio; int once_thru = 0; int ret; @@ -346,12 +348,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", file, mapping->host->i_ino, len, (long long) pos); + fgp |= fgf_set_order(len); start: - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN, + folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return PTR_ERR(folio); - *pagep = &folio->page; + *foliop = folio; ret = nfs_flush_incompatible(file, folio); if (ret) { @@ -370,10 +373,9 @@ start: static int nfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct folio *folio = page_folio(page); unsigned offset = offset_in_folio(folio, pos); int status; @@ -425,7 +427,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, static void nfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", folio->index, offset, length); @@ -433,8 +435,8 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset, return; /* Cancel any unstarted writes on this page */ nfs_wb_folio_cancel(inode, folio); - folio_wait_fscache(folio); - trace_nfs_invalidate_folio(inode, folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ + trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length); } /* @@ -450,9 +452,9 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp) /* If the private flag is set, then the folio is not freeable */ if (folio_test_private(folio)) { if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || - current_is_kswapd()) + current_is_kswapd() || current_is_kcompactd()) return false; - if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0) + if (nfs_wb_folio(folio->mapping->host, folio) < 0) return false; } return nfs_fscache_release_folio(folio, gfp); @@ -500,9 +502,10 @@ static int nfs_launder_folio(struct folio *folio) dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n", inode->i_ino, folio_pos(folio)); - folio_wait_fscache(folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ ret = nfs_wb_folio(inode, folio); - trace_nfs_launder_folio_done(inode, folio, ret); + trace_nfs_launder_folio_done(inode, folio_pos(folio), + folio_size(folio), ret); return ret; } @@ -558,7 +561,6 @@ const struct address_space_operations nfs_file_aops = { .read_folio = nfs_read_folio, .readahead = nfs_readahead, .dirty_folio = filemap_dirty_folio, - .writepage = nfs_writepage, .writepages = nfs_writepages, .write_begin = nfs_write_begin, .write_end = nfs_write_end, @@ -567,7 +569,7 @@ const struct address_space_operations nfs_file_aops = { .migrate_folio = nfs_migrate_folio, .launder_folio = nfs_launder_folio, .is_dirty_writeback = nfs_check_dirty_writeback, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, .swap_activate = nfs_swap_activate, .swap_deactivate = nfs_swap_deactivate, .swap_rw = nfs_swap_rw, @@ -589,13 +591,13 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n", filp, filp->f_mapping->host->i_ino, - (long long)folio_file_pos(folio)); + (long long)folio_pos(folio)); sb_start_pagefault(inode->i_sb); /* make sure the cache has finished storing the page */ - if (folio_test_fscache(folio) && - folio_wait_fscache_killable(folio) < 0) { + if (folio_test_private_2(folio) && /* [DEPRECATED] */ + folio_wait_private_2_killable(folio) < 0) { ret = VM_FAULT_RETRY; goto out; } @@ -605,7 +607,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); folio_lock(folio); - mapping = folio_file_mapping(folio); + mapping = folio->mapping; if (mapping != inode->i_mapping) goto out_unlock; @@ -721,17 +723,17 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status = 0; - unsigned int saved_type = fl->fl_type; + unsigned int saved_type = fl->c.flc_type; /* Try local locking first */ posix_test_lock(filp, fl); - if (fl->fl_type != F_UNLCK) { + if (fl->c.flc_type != F_UNLCK) { /* found a conflict */ goto out; } - fl->fl_type = saved_type; + fl->c.flc_type = saved_type; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) goto out_noconflict; if (is_local) @@ -741,7 +743,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) out: return status; out_noconflict: - fl->fl_type = F_UNLCK; + fl->c.flc_type = F_UNLCK; goto out; } @@ -766,7 +768,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. */ - if (status < 0 && !(fl->fl_flags & FL_CLOSE)) + if (status < 0 && !(fl->c.flc_flags & FL_CLOSE)) return status; } @@ -814,7 +816,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { + if (!nfs_have_read_or_write_delegation(inode)) { nfs_zap_caches(inode); if (mapping_mapped(filp->f_mapping)) nfs_revalidate_mapping(inode, filp->f_mapping); @@ -833,12 +835,12 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) int is_local = 0; dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n", - filp, fl->fl_type, fl->fl_flags, + filp, fl->c.flc_type, fl->c.flc_flags, (long long)fl->fl_start, (long long)fl->fl_end); nfs_inc_stats(inode, NFSIOS_VFSLOCK); - if (fl->fl_flags & FL_RECLAIM) + if (fl->c.flc_flags & FL_RECLAIM) return -ENOGRACE; if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL) @@ -852,7 +854,7 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) if (IS_GETLK(cmd)) ret = do_getlk(filp, cmd, fl, is_local); - else if (fl->fl_type == F_UNLCK) + else if (lock_is_unlock(fl)) ret = do_unlk(filp, cmd, fl, is_local); else ret = do_setlk(filp, cmd, fl, is_local); @@ -870,16 +872,16 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) int is_local = 0; dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n", - filp, fl->fl_type, fl->fl_flags); + filp, fl->c.flc_type, fl->c.flc_flags); - if (!(fl->fl_flags & FL_FLOCK)) + if (!(fl->c.flc_flags & FL_FLOCK)) return -ENOLCK; if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; /* We're simulating flock() locks using posix locks on the server */ - if (fl->fl_type == F_UNLCK) + if (lock_is_unlock(fl)) return do_unlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local); } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 569ae4ec6084..d39a1f58e18d 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -488,7 +488,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) /* Perform an asynchronous read to ds */ nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, NFS_PROTO(hdr->inode), &filelayout_read_call_ops, - 0, RPC_TASK_SOFTCONN); + 0, RPC_TASK_SOFTCONN, NULL); return PNFS_ATTEMPTED; } @@ -530,7 +530,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) /* Perform an asynchronous write */ nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, NFS_PROTO(hdr->inode), &filelayout_write_call_ops, - sync, RPC_TASK_SOFTCONN); + sync, RPC_TASK_SOFTCONN, NULL); return PNFS_ATTEMPTED; } @@ -605,14 +605,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - /* FIXME: remove this check when layout segment support is added */ - if (lgr->range.offset != 0 || - lgr->range.length != NFS4_MAX_UINT64) { - dprintk("%s Only whole file layouts supported. Use MDS i/o\n", - __func__); - goto out; - } - if (fl->pattern_offset > lgr->range.offset) { dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); @@ -875,12 +867,12 @@ static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), - 0, - NFS4_MAX_UINT64, + req_offset(req), + req->wb_bytes, IOMODE_READ, false, nfs_io_gfp_mask()); @@ -899,12 +891,12 @@ static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), - 0, - NFS4_MAX_UINT64, + req_offset(req), + req->wb_bytes, IOMODE_RW, false, nfs_io_gfp_mask()); @@ -1019,7 +1011,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) data->args.fh = fh; return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode), &filelayout_commit_call_ops, how, - RPC_TASK_SOFTCONN); + RPC_TASK_SOFTCONN, NULL); out_err: pnfs_generic_prepare_to_resend_writes(data); pnfs_generic_commit_release(data); @@ -1118,7 +1110,6 @@ static const struct pnfs_commit_ops filelayout_commit_ops = { .clear_request_commit = pnfs_generic_clear_request_commit, .scan_commit_lists = pnfs_generic_scan_commit_lists, .recover_commit_reqs = pnfs_generic_recover_commit_reqs, - .search_commit_reqs = pnfs_generic_search_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, }; diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index aed0748fd6ec..c7bb5da93307 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -51,7 +51,7 @@ struct nfs4_file_layout_dsaddr { u32 stripe_count; u8 *stripe_indices; u32 ds_num; - struct nfs4_pnfs_ds *ds_list[]; + struct nfs4_pnfs_ds *ds_list[] __counted_by(ds_num); }; struct nfs4_filelayout_segment { diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index acf4b88889dc..29d9234d5c08 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -35,6 +35,7 @@ #include "../internal.h" #include "../nfs4session.h" #include "filelayout.h" +#include "../nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -75,6 +76,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct page *scratch; struct list_head dsaddrs; struct nfs4_pnfs_ds_addr *da; + struct net *net = server->nfs_client->cl_net; /* set up xdr stream */ scratch = alloc_page(gfp_flags); @@ -158,8 +160,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -169,9 +170,10 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err_free_deviceid; } - dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags); if (!dsaddr->ds_list[i]) goto out_err_drain_dsaddrs; + trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr); /* If DS was already in cache, free ds addrs */ while (!list_empty(&dsaddrs)) { diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 3e724cb7ef01..b685e763ef11 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -11,6 +11,7 @@ #include <linux/nfs_mount.h> #include <linux/nfs_page.h> #include <linux/module.h> +#include <linux/file.h> #include <linux/sched/mm.h> #include <linux/sunrpc/metrics.h> @@ -162,6 +163,21 @@ decode_name(struct xdr_stream *xdr, u32 *id) return 0; } +static struct nfsd_file * +ff_local_open_fh(struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, fmode_t mode) +{ + if (mode & FMODE_WRITE) { + /* + * Always request read and write access since this corresponds + * to a rw layout. + */ + mode |= FMODE_READ; + } + + return nfs_local_open_fh(clp, cred, fh, mode); +} + static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, const struct nfs4_ff_layout_mirror *m2) { @@ -237,7 +253,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) { - const struct cred *cred; + const struct cred *cred; ff_layout_remove_mirror(mirror); kfree(mirror->fh_versions); @@ -745,14 +761,14 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_ff_layout_mirror *mirror; - struct nfs4_pnfs_ds *ds; + struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN); u32 idx; /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); - if (!ds) + if (IS_ERR(ds)) continue; if (check_device && @@ -760,10 +776,10 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, continue; *best_idx = idx; - return ds; + break; } - return NULL; + return ds; } static struct nfs4_pnfs_ds * @@ -823,14 +839,6 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, } static void -ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio, - struct nfs_page *req) -{ - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); -} - -static void ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { @@ -839,8 +847,11 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs4_pnfs_ds *ds; u32 ds_idx; + if (NFS_SERVER(pgio->pg_inode)->flags & + (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) + pgio->pg_maxretrans = io_maxretrans; retry: - ff_layout_pg_check_layout(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); /* Use full layout for now */ if (!pgio->pg_lseg) { ff_layout_pg_get_read(pgio, req, false); @@ -852,6 +863,8 @@ retry: if (!pgio->pg_lseg) goto out_nolseg; } + /* Reset wb_nio, since getting layout segment was successful */ + req->wb_nio = 0; ds = ff_layout_get_ds_for_read(pgio, &ds_idx); if (!ds) { @@ -868,14 +881,24 @@ retry: pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; pgio->pg_mirror_idx = ds_idx; - - if (NFS_SERVER(pgio->pg_inode)->flags & - (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) - pgio->pg_maxretrans = io_maxretrans; return; out_nolseg: - if (pgio->pg_error < 0) - return; + if (pgio->pg_error < 0) { + if (pgio->pg_error != -EAGAIN) + return; + /* Retry getting layout segment if lower layer returned -EAGAIN */ + if (pgio->pg_maxretrans && req->wb_nio++ > pgio->pg_maxretrans) { + if (NFS_SERVER(pgio->pg_inode)->flags & NFS_MOUNT_SOFTERR) + pgio->pg_error = -ETIMEDOUT; + else + pgio->pg_error = -EIO; + return; + } + pgio->pg_error = 0; + /* Sleep for 1 second before retrying */ + ssleep(1); + goto retry; + } out_mds: trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode, 0, NFS4_MAX_UINT64, IOMODE_READ, @@ -895,7 +918,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, u32 i; retry: - ff_layout_pg_check_layout(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), @@ -918,7 +941,7 @@ retry: for (i = 0; i < pgio->pg_mirror_count; i++) { mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true); - if (!ds) { + if (IS_ERR(ds)) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; pnfs_generic_pg_cleanup(pgio); @@ -1081,6 +1104,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr) } static int ff_layout_async_handle_error_v4(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1091,32 +1115,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - switch (task->tk_status) { - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: + switch (op_status) { + case NFS4_OK: + case NFS4ERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFS4ERR_BADSESSION: + case NFS4ERR_BADSLOT: + case NFS4ERR_BAD_HIGH_SLOT: + case NFS4ERR_DEADSESSION: + case NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case NFS4ERR_SEQ_FALSE_RETRY: + case NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session. Exchangeid " "flags 0x%x\n", __func__, task->tk_status, clp->cl_exchange_flags); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - break; - case -NFS4ERR_DELAY: - case -NFS4ERR_GRACE: + goto out_retry; + case NFS4ERR_DELAY: + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); + fallthrough; + case NFS4ERR_GRACE: rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); - break; - case -NFS4ERR_RETRY_UNCACHED_REP: - break; + goto out_retry; + case NFS4ERR_RETRY_UNCACHED_REP: + goto out_retry; /* Invalidate Layout errors */ - case -NFS4ERR_PNFS_NO_LAYOUT: - case -ESTALE: /* mapped NFS4ERR_STALE */ - case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ - case -EISDIR: /* mapped NFS4ERR_ISDIR */ - case -NFS4ERR_FHEXPIRED: - case -NFS4ERR_WRONG_TYPE: + case NFS4ERR_PNFS_NO_LAYOUT: + case NFS4ERR_STALE: + case NFS4ERR_BADHANDLE: + case NFS4ERR_ISDIR: + case NFS4ERR_FHEXPIRED: + case NFS4ERR_WRONG_TYPE: dprintk("%s Invalid layout error %d\n", __func__, task->tk_status); /* @@ -1129,6 +1163,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; + default: + break; + } + + switch (task->tk_status) { /* RPC connection errors */ case -ECONNREFUSED: case -EHOSTDOWN: @@ -1144,26 +1183,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); - fallthrough; + break; default: - if (ff_layout_avoid_mds_available_ds(lseg)) - return -NFS4ERR_RESET_TO_PNFS; -reset: - dprintk("%s Retry through MDS. Error %d\n", __func__, - task->tk_status); - return -NFS4ERR_RESET_TO_MDS; + break; } + + if (ff_layout_avoid_mds_available_ds(lseg)) + return -NFS4ERR_RESET_TO_PNFS; +reset: + dprintk("%s Retry through MDS. Error %d\n", __func__, + task->tk_status); + return -NFS4ERR_RESET_TO_MDS; + +out_retry: task->tk_status = 0; return -EAGAIN; } /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + u32 op_status, + struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + switch (op_status) { + case NFS_OK: + case NFSERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFSERR_ACCES: + case NFSERR_BADHANDLE: + case NFSERR_FBIG: + case NFSERR_IO: + case NFSERR_NOSPC: + case NFSERR_ROFS: + case NFSERR_STALE: + goto out_reset_to_pnfs; + case NFSERR_JUKEBOX: + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); + goto out_retry; + default: + break; + } + switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1182,6 +1251,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); } +out_reset_to_pnfs: /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; out_retry: @@ -1192,6 +1262,7 @@ out_retry: } static int ff_layout_async_handle_error(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1210,10 +1281,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, lseg, idx); - case 4: - return ff_layout_async_handle_error_v4(task, state, clp, + return ff_layout_async_handle_error_v3(task, op_status, clp, lseg, idx); + case 4: + return ff_layout_async_handle_error_v4(task, op_status, state, + clp, lseg, idx); default: /* should never happen */ WARN_ON_ONCE(1); @@ -1240,6 +1312,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -ECONNRESET: case -EHOSTDOWN: case -EHOSTUNREACH: + case -ENETDOWN: case -ENETUNREACH: case -EADDRINUSE: case -ENOBUFS: @@ -1265,6 +1338,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, switch (status) { case NFS4ERR_DELAY: case NFS4ERR_GRACE: + case NFS4ERR_PERM: break; case NFS4ERR_NXIO: ff_layout_mark_ds_unreachable(lseg, idx); @@ -1297,7 +1371,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, trace_ff_layout_read_error(hdr); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1467,7 +1542,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task, trace_ff_layout_write_error(hdr); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1513,8 +1589,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, trace_ff_layout_commit_error(data); } - err = ff_layout_async_handle_error(task, NULL, data->ds_clp, - data->lseg, data->ds_commit_index); + err = ff_layout_async_handle_error(task, data->res.op_status, + NULL, data->ds_clp, data->lseg, + data->ds_commit_index); trace_nfs4_pnfs_commit_ds(data, err); switch (err) { @@ -1764,12 +1841,14 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfsd_file *localio; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; loff_t offset = hdr->args.offset; u32 idx = hdr->pgio_mirror_idx; int vers; struct nfs_fh *fh; + bool ds_fatal_error = false; dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", __func__, hdr->inode->i_ino, @@ -1777,8 +1856,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); - if (!ds) + if (IS_ERR(ds)) { + ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); goto out_failed; + } ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, hdr->inode); @@ -1810,16 +1891,23 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) hdr->args.offset = offset; hdr->mds_offset = offset; + /* Start IO accounting for local read */ + localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh, FMODE_READ); + if (localio) { + hdr->task.tk_start = ktime_get(); + ff_layout_read_record_layoutstats_start(&hdr->task, hdr); + } + /* Perform an asynchronous read to ds */ nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_read_call_ops_v3 : &ff_layout_read_call_ops_v4, - 0, RPC_TASK_SOFTCONN); + 0, RPC_TASK_SOFTCONN, localio); put_cred(ds_cred); return PNFS_ATTEMPTED; out_failed: - if (ff_layout_avoid_mds_available_ds(lseg)) + if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error) return PNFS_TRY_AGAIN; trace_pnfs_mds_fallback_read_pagelist(hdr->inode, hdr->args.offset, hdr->args.count, @@ -1834,17 +1922,21 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfsd_file *localio; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; loff_t offset = hdr->args.offset; int vers; struct nfs_fh *fh; u32 idx = hdr->pgio_mirror_idx; + bool ds_fatal_error = false; mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); - if (!ds) + if (IS_ERR(ds)) { + ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); goto out_failed; + } ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, hdr->inode); @@ -1878,16 +1970,24 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) */ hdr->args.offset = offset; + /* Start IO accounting for local write */ + localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh, + FMODE_READ|FMODE_WRITE); + if (localio) { + hdr->task.tk_start = ktime_get(); + ff_layout_write_record_layoutstats_start(&hdr->task, hdr); + } + /* Perform an asynchronous write */ nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_write_call_ops_v3 : &ff_layout_write_call_ops_v4, - sync, RPC_TASK_SOFTCONN); + sync, RPC_TASK_SOFTCONN, localio); put_cred(ds_cred); return PNFS_ATTEMPTED; out_failed: - if (ff_layout_avoid_mds_available_ds(lseg)) + if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error) return PNFS_TRY_AGAIN; trace_pnfs_mds_fallback_write_pagelist(hdr->inode, hdr->args.offset, hdr->args.count, @@ -1916,6 +2016,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfsd_file *localio; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; u32 idx; @@ -1929,7 +2030,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); - if (!ds) + if (IS_ERR(ds)) goto out_err; ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, @@ -1954,10 +2055,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) if (fh) data->args.fh = fh; + /* Start IO accounting for local commit */ + localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh, + FMODE_READ|FMODE_WRITE); + if (localio) { + data->task.tk_start = ktime_get(); + ff_layout_commit_record_layoutstats_start(&data->task, data); + } + ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, - how, RPC_TASK_SOFTCONN); + how, RPC_TASK_SOFTCONN, localio); put_cred(ds_cred); return ret; out_err: @@ -2095,12 +2204,6 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr, } static void -encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) -{ - WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); -} - -static void ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr, const nfs4_stateid *stateid, const struct nfs42_layoutstat_devinfo *devinfo) @@ -2556,7 +2659,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *dummy) { #if IS_ENABLED(CONFIG_NFS_V4_2) - server->caps |= NFS_CAP_LAYOUTSTATS; + server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN; #endif return 0; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 354a031c69b1..f84b3fb0dddd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -99,7 +99,7 @@ struct nfs4_ff_layout_segment { u64 stripe_unit; u32 flags; u32 mirror_array_cnt; - struct nfs4_ff_layout_mirror *mirror_array[]; + struct nfs4_ff_layout_mirror *mirror_array[] __counted_by(mirror_array_cnt); }; struct nfs4_flexfile_layout { diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e028f5a0ef5f..ef535baeefb6 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -49,6 +49,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct nfs4_pnfs_ds_addr *da; struct nfs4_ff_layout_ds *new_ds = NULL; struct nfs4_ff_ds_version *ds_versions = NULL; + struct net *net = server->nfs_client->cl_net; u32 mp_count; u32 version_count; __be32 *p; @@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, for (i = 0; i < mp_count; i++) { /* multipath ds */ - da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, new_ds->ds_versions = ds_versions; new_ds->ds_versions_cnt = version_count; - new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags); if (!new_ds->ds) goto out_err_drain_dsaddrs; @@ -370,11 +370,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, struct nfs4_ff_layout_mirror *mirror, bool fail_return) { - struct nfs4_pnfs_ds *ds = NULL; + struct nfs4_pnfs_ds *ds; struct inode *ino = lseg->pls_layout->plh_inode; struct nfs_server *s = NFS_SERVER(ino); unsigned int max_payload; - int status; + int status = -EAGAIN; if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror)) goto noconnect; @@ -395,6 +395,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, /* connect success, check rsize/wsize limit */ if (!status) { + /* + * ds_clp is put in destroy_ds(). + * keep ds_clp even if DS is local, so that if local IO cannot + * proceed somehow, we can fall back to NFS whenever we want. + */ + nfs_local_probe(ds->ds_clp); max_payload = nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), NULL); @@ -412,7 +418,7 @@ noconnect: ff_layout_send_layouterror(lseg); if (fail_return || !ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(ino, lseg); - ds = NULL; + ds = ERR_PTR(status); out: return ds; } diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 41126d6dcd76..7e000d782e28 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -49,6 +49,7 @@ enum nfs_param { Opt_bsize, Opt_clientaddr, Opt_cto, + Opt_alignwrite, Opt_fg, Opt_fscache, Opt_fscache_flag, @@ -149,6 +150,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_u32 ("bsize", Opt_bsize), fsparam_string("clientaddr", Opt_clientaddr), fsparam_flag_no("cto", Opt_cto), + fsparam_flag_no("alignwrite", Opt_alignwrite), fsparam_flag ("fg", Opt_fg), fsparam_flag_no("fsc", Opt_fscache_flag), fsparam_string("fsc", Opt_fscache), @@ -592,6 +594,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, else ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY; break; + case Opt_alignwrite: + if (result.negated) + ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE; + else + ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE; + break; case Opt_ac: if (result.negated) ctx->flags |= NFS_MOUNT_NOAC; @@ -600,9 +608,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_lock: if (result.negated) { + ctx->lock_status = NFS_LOCK_NOLOCK; ctx->flags |= NFS_MOUNT_NONLM; ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { + ctx->lock_status = NFS_LOCK_LOCK; ctx->flags &= ~NFS_MOUNT_NONLM; ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } @@ -652,6 +662,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->fscache_uniq = NULL; break; case Opt_fscache: + trace_nfs_mount_assign(param->key, param->string); ctx->options |= NFS_OPTION_FSCACHE; kfree(ctx->fscache_uniq); ctx->fscache_uniq = param->string; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 60a3c28784e0..d49e4ce27999 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -263,21 +263,25 @@ int nfs_netfs_readahead(struct readahead_control *ractl) static atomic_t nfs_netfs_debug_id; static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *file) { + if (!file) { + if (WARN_ON_ONCE(rreq->origin != NETFS_PGPRIV2_COPY_TO_CACHE)) + return -EIO; + return 0; + } + rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file)); rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); + rreq->io_streams[0].sreq_max_len = NFS_SB(rreq->inode->i_sb)->rsize; return 0; } static void nfs_netfs_free_request(struct netfs_io_request *rreq) { - put_nfs_open_context(rreq->netfs_priv); -} - -static inline int nfs_netfs_begin_cache_operation(struct netfs_io_request *rreq) -{ - return fscache_begin_read_operation(&rreq->cache_resources, - netfs_i_cookie(netfs_inode(rreq->inode))); + if (rreq->netfs_priv) + put_nfs_open_context(rreq->netfs_priv); } static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq) @@ -292,14 +296,6 @@ static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sre return netfs; } -static bool nfs_netfs_clamp_length(struct netfs_io_subrequest *sreq) -{ - size_t rsize = NFS_SB(sreq->rreq->inode->i_sb)->rsize; - - sreq->len = min(sreq->len, rsize); - return true; -} - static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq) { struct nfs_netfs_io_data *netfs; @@ -308,17 +304,18 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq) struct nfs_open_context *ctx = sreq->rreq->netfs_priv; struct page *page; unsigned long idx; + pgoff_t start, last; int err; - pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT; - pgoff_t last = ((sreq->start + sreq->len - - sreq->transferred - 1) >> PAGE_SHIFT); + + start = (sreq->start + sreq->transferred) >> PAGE_SHIFT; + last = ((sreq->start + sreq->len - sreq->transferred - 1) >> PAGE_SHIFT); nfs_pageio_init_read(&pgio, inode, false, &nfs_async_read_completion_ops); netfs = nfs_netfs_alloc(sreq); if (!netfs) - return netfs_subreq_terminated(sreq, -ENOMEM, false); + return netfs_read_subreq_terminated(sreq, -ENOMEM, false); pgio.pg_netfs = netfs; /* used in completion */ @@ -347,7 +344,7 @@ void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr) int nfs_netfs_folio_unlock(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; /* * If fscache is enabled, netfs will unlock pages. @@ -367,7 +364,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) return; sreq = netfs->sreq; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) + if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); if (hdr->error) @@ -382,7 +380,5 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) const struct netfs_request_ops nfs_netfs_ops = { .init_request = nfs_netfs_init_request, .free_request = nfs_netfs_free_request, - .begin_cache_operation = nfs_netfs_begin_cache_operation, .issue_read = nfs_netfs_issue_read, - .clamp_length = nfs_netfs_clamp_length }; diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 2dc64454492b..772d485e96d3 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -60,8 +60,6 @@ static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs) static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) { - ssize_t final_len; - /* Only the last RPC completion should call netfs_subreq_terminated() */ if (!refcount_dec_and_test(&netfs->refcount)) return; @@ -74,13 +72,14 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) * Correct the final length here to be no larger than the netfs subrequest * length, and thus avoid netfs's "Subreq overread" warning message. */ - final_len = min_t(s64, netfs->sreq->len, atomic64_read(&netfs->transferred)); - netfs_subreq_terminated(netfs->sreq, netfs->error ?: final_len, false); + netfs->sreq->transferred = min_t(s64, netfs->sreq->len, + atomic64_read(&netfs->transferred)); + netfs_read_subreq_terminated(netfs->sreq, netfs->error, false); kfree(netfs); } static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) { - netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops); + netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); } extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); @@ -101,10 +100,10 @@ extern int nfs_netfs_read_folio(struct file *file, struct folio *folio); static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) { - if (folio_test_fscache(folio)) { + if (folio_test_private_2(folio)) { /* [DEPRECATED] */ if (current_is_kswapd() || !(gfp & __GFP_FS)) return false; - folio_wait_fscache(folio); + folio_wait_private_2(folio); } fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host))); return true; @@ -114,8 +113,8 @@ static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata * struct inode *inode) { memset(auxdata, 0, sizeof(*auxdata)); - auxdata->mtime_sec = inode->i_mtime.tv_sec; - auxdata->mtime_nsec = inode->i_mtime.tv_nsec; + auxdata->mtime_sec = inode_get_mtime(inode).tv_sec; + auxdata->mtime_nsec = inode_get_mtime(inode).tv_nsec; auxdata->ctime_sec = inode_get_ctime(inode).tv_sec; auxdata->ctime_nsec = inode_get_ctime(inode).tv_nsec; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 11ff2b2e060f..f13d25d95b85 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i } /* - * get an NFS2/NFS3 root dentry from the root filehandle + * get a root dentry from the root filehandle */ int nfs_get_root(struct super_block *s, struct fs_context *fc) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 56bbf59bda3c..8827cb00f86d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -74,6 +74,8 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) { + if (unlikely(nfs_current_task_exiting())) + return -EINTR; schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; @@ -190,9 +192,8 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); - bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); - if (have_delegation) { + if (nfs_have_delegated_attributes(inode)) { if (!(flags & NFS_INO_REVAL_FORCED)) flags &= ~(NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER | @@ -279,6 +280,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { + if (nfs_have_delegated_atime(inode)) + return; spin_lock(&inode->i_lock); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); @@ -494,6 +497,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; nfs_inode_init_regular(nfsi); + mapping_set_large_folios(inode->i_mapping); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; @@ -515,8 +519,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) } else init_special_inode(inode, inode->i_mode, fattr->rdev); - memset(&inode->i_atime, 0, sizeof(inode->i_atime)); - memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); + inode_set_atime(inode, 0, 0); + inode_set_mtime(inode, 0, 0); inode_set_ctime(inode, 0, 0); inode_set_iversion_raw(inode, 0); inode->i_size = 0; @@ -530,11 +534,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = fattr->atime; + inode_set_atime_to_ts(inode, fattr->atime); else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = fattr->mtime; + inode_set_mtime_to_ts(inode, fattr->mtime); else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) @@ -553,6 +557,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) set_nlink(inode, fattr->nlink); else if (fattr_supported & NFS_ATTR_FATTR_NLINK) nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK); + else + set_nlink(inode, 1); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (fattr_supported & NFS_ATTR_FATTR_OWNER) @@ -607,6 +613,95 @@ out_no_inode: } EXPORT_SYMBOL_GPL(nfs_fhget); +static void +nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr) +{ + unsigned long cache_validity = NFS_I(inode)->cache_validity; + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME | + NFS_ATTR_FATTR_CTIME); + + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME | + NFS_ATTR_FATTR_MTIME); + + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } +} + +static void nfs_set_timestamps_to_ts(struct inode *inode, struct iattr *attr) +{ + unsigned int cache_flags = 0; + + if (attr->ia_valid & ATTR_MTIME_SET) { + struct timespec64 ctime = inode_get_ctime(inode); + struct timespec64 mtime = inode_get_mtime(inode); + struct timespec64 now; + int updated = 0; + + now = inode_set_ctime_current(inode); + if (!timespec64_equal(&now, &ctime)) + updated |= S_CTIME; + + inode_set_mtime_to_ts(inode, attr->ia_mtime); + if (!timespec64_equal(&now, &mtime)) + updated |= S_MTIME; + + inode_maybe_inc_iversion(inode, updated); + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (attr->ia_valid & ATTR_ATIME_SET) { + inode_set_atime_to_ts(inode, attr->ia_atime); + cache_flags |= NFS_INO_INVALID_ATIME; + } + NFS_I(inode)->cache_validity &= ~cache_flags; +} + +static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid) +{ + enum file_time_flags time_flags = 0; + unsigned int cache_flags = 0; + + if (ia_valid & ATTR_MTIME) { + time_flags |= S_MTIME | S_CTIME; + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (ia_valid & ATTR_ATIME) { + time_flags |= S_ATIME; + cache_flags |= NFS_INO_INVALID_ATIME; + } + inode_update_timestamps(inode, time_flags); + NFS_I(inode)->cache_validity &= ~cache_flags; +} + +void nfs_update_delegated_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (nfs_have_delegated_atime(inode)) + nfs_update_timestamps(inode, ATTR_ATIME); + spin_unlock(&inode->i_lock); +} + +void nfs_update_delegated_mtime_locked(struct inode *inode) +{ + if (nfs_have_delegated_mtime(inode)) + nfs_update_timestamps(inode, ATTR_MTIME); +} + +void nfs_update_delegated_mtime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_update_delegated_mtime_locked(inode); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime); + #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int @@ -634,6 +729,31 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, attr->ia_valid &= ~ATTR_SIZE; } + if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) { + spin_lock(&inode->i_lock); + if (attr->ia_valid & ATTR_MTIME_SET) { + nfs_set_timestamps_to_ts(inode, attr); + attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET| + ATTR_ATIME|ATTR_ATIME_SET); + } else { + nfs_update_timestamps(inode, attr->ia_valid); + attr->ia_valid &= ~(ATTR_MTIME|ATTR_ATIME); + } + spin_unlock(&inode->i_lock); + } else if (nfs_have_delegated_atime(inode) && + attr->ia_valid & ATTR_ATIME && + !(attr->ia_valid & ATTR_MTIME)) { + if (attr->ia_valid & ATTR_ATIME_SET) { + spin_lock(&inode->i_lock); + nfs_set_timestamps_to_ts(inode, attr); + spin_unlock(&inode->i_lock); + attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET); + } else { + nfs_update_delegated_atime(inode); + attr->ia_valid &= ~ATTR_ATIME; + } + } + /* Optimization: if the end result is no change, don't RPC */ if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; @@ -689,6 +809,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); + nfs_update_delegated_mtime_locked(inode); spin_lock(&inode->i_lock); out: return err; @@ -712,8 +833,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & ATTR_SIZE) != 0) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME | - NFS_INO_INVALID_BLOCKS); + if (!nfs_have_delegated_mtime(inode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } @@ -745,9 +867,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = fattr->atime; + inode_set_atime_to_ts(inode, fattr->atime); else if (attr->ia_valid & ATTR_ATIME_SET) - inode->i_atime = attr->ia_atime; + inode_set_atime_to_ts(inode, attr->ia_atime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); @@ -761,9 +883,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = fattr->mtime; + inode_set_mtime_to_ts(inode, fattr->mtime); else if (attr->ia_valid & ATTR_MTIME_SET) - inode->i_mtime = attr->ia_mtime; + inode_set_mtime_to_ts(inode, attr->ia_mtime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); @@ -859,8 +981,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, /* Flush out writes to the server in order to update c/mtime/version. */ if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) && - S_ISREG(inode->i_mode)) - filemap_write_and_wait(inode->i_mapping); + S_ISREG(inode->i_mode)) { + if (nfs_have_delegated_mtime(inode)) + filemap_fdatawrite(inode->i_mapping); + else + filemap_write_and_wait(inode->i_mapping); + } /* * We may force a getattr if the user cares about atime. @@ -1015,7 +1141,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) @@ -1461,11 +1587,11 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode_set_ctime_to_ts(inode, fattr->ctime); } - ts = inode->i_mtime; + ts = inode_get_mtime(inode); if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) && timespec64_equal(&ts, &fattr->pre_mtime)) { - inode->i_mtime = fattr->mtime; + inode_set_mtime_to_ts(inode, fattr->mtime); } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) @@ -1492,7 +1618,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; struct timespec64 ts; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_delegated_attributes(inode)) return 0; if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { @@ -1516,7 +1642,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) invalid |= NFS_INO_INVALID_CHANGE; - ts = inode->i_mtime; + ts = inode_get_mtime(inode); if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) invalid |= NFS_INO_INVALID_MTIME; @@ -1544,7 +1670,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) invalid |= NFS_INO_INVALID_NLINK; - ts = inode->i_atime; + ts = inode_get_atime(inode); if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; @@ -2013,7 +2139,7 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { - fattr->pre_mtime = inode->i_mtime; + fattr->pre_mtime = inode_get_mtime(inode); fattr->valid |= NFS_ATTR_FATTR_PREMTIME; } if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && @@ -2129,6 +2255,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; + /* Fix up any delegated attributes in the struct nfs_fattr */ + nfs_fattr_fixup_delegated(inode, fattr); + save_cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME @@ -2195,7 +2324,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = fattr->mtime; + inode_set_mtime_to_ts(inode, fattr->mtime); else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_MTIME; @@ -2231,7 +2360,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) save_cache_validity & NFS_INO_INVALID_SIZE; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = fattr->atime; + inode_set_atime_to_ts(inode, fattr->atime); else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATIME; @@ -2383,7 +2512,7 @@ static int __init nfs_init_inodecache(void) nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", sizeof(struct nfs_inode), 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), + SLAB_ACCOUNT), init_once); if (nfs_inode_cachep == NULL) return -ENOMEM; @@ -2401,35 +2530,54 @@ static void nfs_destroy_inodecache(void) kmem_cache_destroy(nfs_inode_cachep); } +struct workqueue_struct *nfslocaliod_workqueue; struct workqueue_struct *nfsiod_workqueue; EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* - * start up the nfsiod workqueue + * Destroy the nfsiod workqueues */ -static int nfsiod_start(void) +static void nfsiod_stop(void) { struct workqueue_struct *wq; - dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); - if (wq == NULL) - return -ENOMEM; - nfsiod_workqueue = wq; - return 0; + + wq = nfsiod_workqueue; + if (wq != NULL) { + nfsiod_workqueue = NULL; + destroy_workqueue(wq); + } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + wq = nfslocaliod_workqueue; + if (wq != NULL) { + nfslocaliod_workqueue = NULL; + destroy_workqueue(wq); + } +#endif /* CONFIG_NFS_LOCALIO */ } /* - * Destroy the nfsiod workqueue + * Start the nfsiod workqueues */ -static void nfsiod_stop(void) +static int nfsiod_start(void) { - struct workqueue_struct *wq; - - wq = nfsiod_workqueue; - if (wq == NULL) - return; - nfsiod_workqueue = NULL; - destroy_workqueue(wq); + dprintk("RPC: creating workqueue nfsiod\n"); + nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (nfsiod_workqueue == NULL) + return -ENOMEM; +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + /* + * localio writes need to use a normal (non-memreclaim) workqueue. + * When we start getting low on space, XFS goes and calls flush_work() on + * a non-memreclaim work queue, which causes a priority inversion problem. + */ + dprintk("RPC: creating workqueue nfslocaliod\n"); + nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0); + if (unlikely(nfslocaliod_workqueue == NULL)) { + nfsiod_stop(); + return -ENOMEM; + } +#endif /* CONFIG_NFS_LOCALIO */ + return 0; } unsigned int nfs_net_id; @@ -2438,15 +2586,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); + int err; nfs_clients_init(net); if (!rpc_proc_register(net, &nn->rpcstats)) { - nfs_clients_exit(net); - return -ENOMEM; + err = -ENOMEM; + goto err_proc_rpc; } - return nfs_fs_proc_net_init(net); + err = nfs_fs_proc_net_init(net); + if (err) + goto err_proc_nfs; + + return 0; + +err_proc_nfs: + rpc_proc_unregister(net, "nfs"); +err_proc_rpc: + nfs_clients_exit(net); + return err; } static void nfs_net_exit(struct net *net) @@ -2549,6 +2708,7 @@ static void __exit exit_nfs_fs(void) /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +MODULE_DESCRIPTION("NFS client support"); MODULE_LICENSE("GPL"); module_param(enable_ino64, bool, 0644); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a92b234ae087..882d804089ad 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -9,6 +9,7 @@ #include <linux/crc32.h> #include <linux/sunrpc/addr.h> #include <linux/nfs_page.h> +#include <linux/nfslocalio.h> #include <linux/wait_bit.h> #define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) @@ -112,6 +113,7 @@ struct nfs_fs_context { unsigned short protofamily; unsigned short mountfamily; bool has_sec_mnt_opts; + int lock_status; struct { union { @@ -153,6 +155,12 @@ struct nfs_fs_context { } clone_data; }; +enum nfs_lock_status { + NFS_LOCK_NOT_SET = 0, + NFS_LOCK_LOCK = 1, + NFS_LOCK_NOLOCK = 2, +}; + #define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ errorf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); })) @@ -223,7 +231,7 @@ extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct fs_context *); -extern void nfs4_server_set_init_caps(struct nfs_server *); +extern void nfs_server_set_init_caps(struct nfs_server *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, @@ -301,7 +309,8 @@ void nfs_pgio_header_free(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct cred *cred, const struct nfs_rpc_ops *rpc_ops, - const struct rpc_call_ops *call_ops, int how, int flags); + const struct rpc_call_ops *call_ops, int how, int flags, + struct nfsd_file *localio); void nfs_free_request(struct nfs_page *req); struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); @@ -431,6 +440,7 @@ int nfs_check_flags(int); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; +extern struct workqueue_struct *nfslocaliod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_free_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); @@ -442,6 +452,51 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) +/* localio.c */ +extern void nfs_local_disable(struct nfs_client *); +extern void nfs_local_probe(struct nfs_client *); +extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *, + const struct cred *, + struct nfs_fh *, + const fmode_t); +extern int nfs_local_doio(struct nfs_client *, + struct nfsd_file *, + struct nfs_pgio_header *, + const struct rpc_call_ops *); +extern int nfs_local_commit(struct nfsd_file *, + struct nfs_commit_data *, + const struct rpc_call_ops *, int); +extern bool nfs_server_is_local(const struct nfs_client *clp); + +#else /* CONFIG_NFS_LOCALIO */ +static inline void nfs_local_disable(struct nfs_client *clp) {} +static inline void nfs_local_probe(struct nfs_client *clp) {} +static inline struct nfsd_file * +nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, const fmode_t mode) +{ + return NULL; +} +static inline int nfs_local_doio(struct nfs_client *clp, + struct nfsd_file *localio, + struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) +{ + return -EINVAL; +} +static inline int nfs_local_commit(struct nfsd_file *localio, + struct nfs_commit_data *data, + const struct rpc_call_ops *call_ops, int how) +{ + return -EINVAL; +} +static inline bool nfs_server_is_local(const struct nfs_client *clp) +{ + return false; +} +#endif /* CONFIG_NFS_LOCALIO */ + /* super.c */ extern const struct super_operations nfs_sops; bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); @@ -498,7 +553,6 @@ extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, struct nfs_open_context *ctx, struct folio *folio); extern void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio); -extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ @@ -521,7 +575,8 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, - int how, int flags); + int how, int flags, + struct nfsd_file *localio); extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg, @@ -613,9 +668,12 @@ nfs_write_match_verf(const struct nfs_writeverf *verf, static inline gfp_t nfs_io_gfp_mask(void) { - if (current->flags & PF_WQ_WORKER) - return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; - return GFP_KERNEL; + gfp_t ret = current_gfp_context(GFP_KERNEL); + + /* For workers __GFP_NORETRY only with __GFP_IO or __GFP_FS */ + if ((current->flags & PF_WQ_WORKER) && ret == GFP_KERNEL) + ret |= __GFP_NORETRY | __GFP_NOWARN; + return ret; } /* @@ -778,7 +836,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, struct nfs_commit_info *cinfo) { if (folio && !cinfo->dreq) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; long nr = folio_nr_pages(folio); /* This page is really still in write-back - just that the @@ -793,31 +851,12 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, /* * Determine the number of bytes of data the page contains */ -static inline -unsigned int nfs_page_length(struct page *page) -{ - loff_t i_size = i_size_read(page_file_mapping(page)->host); - - if (i_size > 0) { - pgoff_t index = page_index(page); - pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT; - if (index < end_index) - return PAGE_SIZE; - if (index == end_index) - return ((i_size - 1) & ~PAGE_MASK) + 1; - } - return 0; -} - -/* - * Determine the number of bytes of data the page contains - */ static inline size_t nfs_folio_length(struct folio *folio) { - loff_t i_size = i_size_read(folio_file_mapping(folio)->host); + loff_t i_size = i_size_read(folio->mapping->host); if (i_size > 0) { - pgoff_t index = folio_index(folio) >> folio_order(folio); + pgoff_t index = folio->index >> folio_order(folio); pgoff_t end_index = (i_size - 1) >> folio_shift(folio); if (index < end_index) return folio_size(folio); @@ -859,18 +898,16 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } -#ifdef CONFIG_CRC32 static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) { return ~crc32_le(0xFFFFFFFF, &stateid->other[0], NFS4_STATEID_OTHER_SIZE); } -#else -static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) + +static inline bool nfs_current_task_exiting(void) { - return 0; + return (current->flags & PF_EXITING) != 0; } -#endif static inline bool nfs_error_is_fatal(int err) { @@ -934,7 +971,6 @@ struct nfs_direct_req { loff_t io_start; /* Start offset for I/O */ ssize_t count, /* bytes actually processed */ max_count, /* max expected count */ - bytes_left, /* bytes left to be sent */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 5aa776b5a3e7..49862c95b224 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -46,10 +46,11 @@ static inline void nfs_add_stats(const struct inode *inode, nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } -static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) -{ - return alloc_percpu(struct nfs_iostats); -} +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). + */ +#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats) static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) { diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c new file mode 100644 index 000000000000..21b2b38fae9f --- /dev/null +++ b/fs/nfs/localio.c @@ -0,0 +1,763 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NFS client support for local clients to bypass network stack + * + * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com> + * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com> + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + * Copyright (C) 2024 NeilBrown <neilb@suse.de> + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/vfs.h> +#include <linux/file.h> +#include <linux/inet.h> +#include <linux/sunrpc/addr.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> +#include <linux/nfs_common.h> +#include <linux/nfslocalio.h> +#include <linux/bvec.h> + +#include <linux/nfs.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_xdr.h> + +#include "internal.h" +#include "pnfs.h" +#include "nfstrace.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +struct nfs_local_kiocb { + struct kiocb kiocb; + struct bio_vec *bvec; + struct nfs_pgio_header *hdr; + struct work_struct work; + struct nfsd_file *localio; +}; + +struct nfs_local_fsync_ctx { + struct nfsd_file *localio; + struct nfs_commit_data *data; + struct work_struct work; + struct kref kref; + struct completion *done; +}; +static void nfs_local_fsync_work(struct work_struct *work); + +static bool localio_enabled __read_mostly = true; +module_param(localio_enabled, bool, 0644); + +static inline bool nfs_client_is_local(const struct nfs_client *clp) +{ + return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); +} + +bool nfs_server_is_local(const struct nfs_client *clp) +{ + return nfs_client_is_local(clp) && localio_enabled; +} +EXPORT_SYMBOL_GPL(nfs_server_is_local); + +/* + * UUID_IS_LOCAL XDR functions + */ + +static void localio_xdr_enc_uuidargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const u8 *uuid = data; + + encode_opaque_fixed(xdr, uuid, UUID_SIZE); +} + +static int localio_xdr_dec_uuidres(struct rpc_rqst *req, + struct xdr_stream *xdr, + void *result) +{ + /* void return */ + return 0; +} + +static const struct rpc_procinfo nfs_localio_procedures[] = { + [LOCALIOPROC_UUID_IS_LOCAL] = { + .p_proc = LOCALIOPROC_UUID_IS_LOCAL, + .p_encode = localio_xdr_enc_uuidargs, + .p_decode = localio_xdr_dec_uuidres, + .p_arglen = XDR_QUADLEN(UUID_SIZE), + .p_replen = 0, + .p_statidx = LOCALIOPROC_UUID_IS_LOCAL, + .p_name = "UUID_IS_LOCAL", + }, +}; + +static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)]; +static const struct rpc_version nfslocalio_version1 = { + .number = 1, + .nrprocs = ARRAY_SIZE(nfs_localio_procedures), + .procs = nfs_localio_procedures, + .counts = nfs_localio_counts, +}; + +static const struct rpc_version *nfslocalio_version[] = { + [1] = &nfslocalio_version1, +}; + +extern const struct rpc_program nfslocalio_program; +static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program }; + +const struct rpc_program nfslocalio_program = { + .name = "nfslocalio", + .number = NFS_LOCALIO_PROGRAM, + .nrvers = ARRAY_SIZE(nfslocalio_version), + .version = nfslocalio_version, + .stats = &nfslocalio_rpcstat, +}; + +/* + * nfs_local_enable - enable local i/o for an nfs_client + */ +static void nfs_local_enable(struct nfs_client *clp) +{ + spin_lock(&clp->cl_localio_lock); + set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); + trace_nfs_local_enable(clp); + spin_unlock(&clp->cl_localio_lock); +} + +/* + * nfs_local_disable - disable local i/o for an nfs_client + */ +void nfs_local_disable(struct nfs_client *clp) +{ + spin_lock(&clp->cl_localio_lock); + if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { + trace_nfs_local_disable(clp); + nfs_uuid_invalidate_one_client(&clp->cl_uuid); + } + spin_unlock(&clp->cl_localio_lock); +} + +/* + * nfs_init_localioclient - Initialise an NFS localio client connection + */ +static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp) +{ + struct rpc_clnt *rpcclient_localio; + + rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient, + &nfslocalio_program, 1); + + dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n", + __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + (IS_ERR(rpcclient_localio) ? "does not support" : "supports")); + + return rpcclient_localio; +} + +static bool nfs_server_uuid_is_local(struct nfs_client *clp) +{ + u8 uuid[UUID_SIZE]; + struct rpc_message msg = { + .rpc_argp = &uuid, + }; + struct rpc_clnt *rpcclient_localio; + int status; + + rpcclient_localio = nfs_init_localioclient(clp); + if (IS_ERR(rpcclient_localio)) + return false; + + export_uuid(uuid, &clp->cl_uuid.uuid); + + msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL]; + status = rpc_call_sync(rpcclient_localio, &msg, 0); + dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n", + __func__, status); + rpc_shutdown_client(rpcclient_localio); + + /* Server is only local if it initialized required struct members */ + if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom) + return false; + + return true; +} + +/* + * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client + * - called after alloc_client and init_client (so cl_rpcclient exists) + * - this function is idempotent, it can be called for old or new clients + */ +void nfs_local_probe(struct nfs_client *clp) +{ + /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */ + if (!localio_enabled || + clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) { + nfs_local_disable(clp); + return; + } + + if (nfs_client_is_local(clp)) { + /* If already enabled, disable and re-enable */ + nfs_local_disable(clp); + } + + if (!nfs_uuid_begin(&clp->cl_uuid)) + return; + if (nfs_server_uuid_is_local(clp)) + nfs_local_enable(clp); + nfs_uuid_end(&clp->cl_uuid); +} +EXPORT_SYMBOL_GPL(nfs_local_probe); + +/* + * nfs_local_open_fh - open a local filehandle in terms of nfsd_file + * + * Returns a pointer to a struct nfsd_file or NULL + */ +struct nfsd_file * +nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, const fmode_t mode) +{ + struct nfsd_file *localio; + int status; + + if (!nfs_server_is_local(clp)) + return NULL; + if (mode & ~(FMODE_READ | FMODE_WRITE)) + return NULL; + + localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, + cred, fh, mode); + if (IS_ERR(localio)) { + status = PTR_ERR(localio); + trace_nfs_local_open_fh(fh, mode, status); + switch (status) { + case -ENOMEM: + case -ENXIO: + case -ENOENT: + /* Revalidate localio, will disable if unsupported */ + nfs_local_probe(clp); + } + return NULL; + } + return localio; +} +EXPORT_SYMBOL_GPL(nfs_local_open_fh); + +static struct bio_vec * +nfs_bvec_alloc_and_import_pagevec(struct page **pagevec, + unsigned int npages, gfp_t flags) +{ + struct bio_vec *bvec, *p; + + bvec = kmalloc_array(npages, sizeof(*bvec), flags); + if (bvec != NULL) { + for (p = bvec; npages > 0; p++, pagevec++, npages--) { + p->bv_page = *pagevec; + p->bv_len = PAGE_SIZE; + p->bv_offset = 0; + } + } + return bvec; +} + +static void +nfs_local_iocb_free(struct nfs_local_kiocb *iocb) +{ + kfree(iocb->bvec); + kfree(iocb); +} + +static struct nfs_local_kiocb * +nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, + struct nfsd_file *localio, gfp_t flags) +{ + struct nfs_local_kiocb *iocb; + + iocb = kmalloc(sizeof(*iocb), flags); + if (iocb == NULL) + return NULL; + iocb->bvec = nfs_bvec_alloc_and_import_pagevec(hdr->page_array.pagevec, + hdr->page_array.npages, flags); + if (iocb->bvec == NULL) { + kfree(iocb); + return NULL; + } + init_sync_kiocb(&iocb->kiocb, nfs_to->nfsd_file_file(localio)); + iocb->kiocb.ki_pos = hdr->args.offset; + iocb->localio = localio; + iocb->hdr = hdr; + iocb->kiocb.ki_flags &= ~IOCB_APPEND; + return iocb; +} + +static void +nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + + iov_iter_bvec(i, dir, iocb->bvec, hdr->page_array.npages, + hdr->args.count + hdr->args.pgbase); + if (hdr->args.pgbase != 0) + iov_iter_advance(i, hdr->args.pgbase); +} + +static void +nfs_local_hdr_release(struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) +{ + call_ops->rpc_call_done(&hdr->task, hdr); + call_ops->rpc_release(hdr); +} + +static void +nfs_local_pgio_init(struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) +{ + hdr->task.tk_ops = call_ops; + if (!hdr->task.tk_start) + hdr->task.tk_start = ktime_get(); +} + +static void +nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) +{ + if (status >= 0) { + hdr->res.count = status; + hdr->res.op_status = NFS4_OK; + hdr->task.tk_status = 0; + } else { + hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); + hdr->task.tk_status = status; + } +} + +static void +nfs_local_pgio_release(struct nfs_local_kiocb *iocb) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + + nfs_to_nfsd_file_put_local(iocb->localio); + nfs_local_iocb_free(iocb); + nfs_local_hdr_release(hdr, hdr->task.tk_ops); +} + +static void +nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + struct file *filp = iocb->kiocb.ki_filp; + + nfs_local_pgio_done(hdr, status); + + /* + * Must clear replen otherwise NFSv3 data corruption will occur + * if/when switching from LOCALIO back to using normal RPC. + */ + hdr->res.replen = 0; + + if (hdr->res.count != hdr->args.count || + hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp))) + hdr->res.eof = true; + + dprintk("%s: read %ld bytes eof %d.\n", __func__, + status > 0 ? status : 0, hdr->res.eof); +} + +static void nfs_local_call_read(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + struct file *filp = iocb->kiocb.ki_filp; + const struct cred *save_cred; + struct iov_iter iter; + ssize_t status; + + save_cred = override_creds(filp->f_cred); + + nfs_local_iter_init(&iter, iocb, READ); + + status = filp->f_op->read_iter(&iocb->kiocb, &iter); + WARN_ON_ONCE(status == -EIOCBQUEUED); + + nfs_local_read_done(iocb, status); + nfs_local_pgio_release(iocb); + + revert_creds(save_cred); +} + +static int +nfs_do_local_read(struct nfs_pgio_header *hdr, + struct nfsd_file *localio, + const struct rpc_call_ops *call_ops) +{ + struct nfs_local_kiocb *iocb; + + dprintk("%s: vfs_read count=%u pos=%llu\n", + __func__, hdr->args.count, hdr->args.offset); + + iocb = nfs_local_iocb_alloc(hdr, localio, GFP_KERNEL); + if (iocb == NULL) + return -ENOMEM; + + nfs_local_pgio_init(hdr, call_ops); + hdr->res.eof = false; + + INIT_WORK(&iocb->work, nfs_local_call_read); + queue_work(nfslocaliod_workqueue, &iocb->work); + + return 0; +} + +static void +nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + u32 *verf = (u32 *)verifier->data; + int seq = 0; + + do { + read_seqbegin_or_lock(&clp->cl_boot_lock, &seq); + verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec; + verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec; + } while (need_seqretry(&clp->cl_boot_lock, seq)); + done_seqretry(&clp->cl_boot_lock, seq); +} + +static void +nfs_reset_boot_verifier(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + + write_seqlock(&clp->cl_boot_lock); + ktime_get_real_ts64(&clp->cl_nfssvc_boot); + write_sequnlock(&clp->cl_boot_lock); +} + +static void +nfs_set_local_verifier(struct inode *inode, + struct nfs_writeverf *verf, + enum nfs3_stable_how how) +{ + nfs_copy_boot_verifier(&verf->verifier, inode); + verf->committed = how; +} + +/* Factored out from fs/nfsd/vfs.h:fh_getattr() */ +static int __vfs_getattr(struct path *p, struct kstat *stat, int version) +{ + u32 request_mask = STATX_BASIC_STATS; + + if (version == 4) + request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE); + return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT); +} + +/* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */ +static u64 __nfsd4_change_attribute(const struct kstat *stat, + const struct inode *inode) +{ + u64 chattr; + + if (stat->result_mask & STATX_CHANGE_COOKIE) { + chattr = stat->change_cookie; + if (S_ISREG(inode->i_mode) && + !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { + chattr += (u64)stat->ctime.tv_sec << 30; + chattr += stat->ctime.tv_nsec; + } + } else { + chattr = time_to_chattr(&stat->ctime); + } + return chattr; +} + +static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb) +{ + struct kstat stat; + struct file *filp = iocb->kiocb.ki_filp; + struct nfs_pgio_header *hdr = iocb->hdr; + struct nfs_fattr *fattr = hdr->res.fattr; + int version = NFS_PROTO(hdr->inode)->version; + + if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version)) + return; + + fattr->valid = (NFS_ATTR_FATTR_FILEID | + NFS_ATTR_FATTR_CHANGE | + NFS_ATTR_FATTR_SIZE | + NFS_ATTR_FATTR_ATIME | + NFS_ATTR_FATTR_MTIME | + NFS_ATTR_FATTR_CTIME | + NFS_ATTR_FATTR_SPACE_USED); + + fattr->fileid = stat.ino; + fattr->size = stat.size; + fattr->atime = stat.atime; + fattr->mtime = stat.mtime; + fattr->ctime = stat.ctime; + if (version == 4) { + fattr->change_attr = + __nfsd4_change_attribute(&stat, file_inode(filp)); + } else + fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); + fattr->du.nfs3.used = stat.blocks << 9; +} + +static void +nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + struct inode *inode = hdr->inode; + + dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0); + + /* Handle short writes as if they are ENOSPC */ + if (status > 0 && status < hdr->args.count) { + hdr->mds_offset += status; + hdr->args.offset += status; + hdr->args.pgbase += status; + hdr->args.count -= status; + nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset); + status = -ENOSPC; + } + if (status < 0) + nfs_reset_boot_verifier(inode); + else if (nfs_should_remove_suid(inode)) { + /* Deal with the suid/sgid bit corner case */ + spin_lock(&inode->i_lock); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); + spin_unlock(&inode->i_lock); + } + nfs_local_pgio_done(hdr, status); +} + +static void nfs_local_call_write(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + struct file *filp = iocb->kiocb.ki_filp; + unsigned long old_flags = current->flags; + const struct cred *save_cred; + struct iov_iter iter; + ssize_t status; + + current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; + save_cred = override_creds(filp->f_cred); + + nfs_local_iter_init(&iter, iocb, WRITE); + + file_start_write(filp); + status = filp->f_op->write_iter(&iocb->kiocb, &iter); + file_end_write(filp); + WARN_ON_ONCE(status == -EIOCBQUEUED); + + nfs_local_write_done(iocb, status); + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); + + revert_creds(save_cred); + current->flags = old_flags; +} + +static int +nfs_do_local_write(struct nfs_pgio_header *hdr, + struct nfsd_file *localio, + const struct rpc_call_ops *call_ops) +{ + struct nfs_local_kiocb *iocb; + + dprintk("%s: vfs_write count=%u pos=%llu %s\n", + __func__, hdr->args.count, hdr->args.offset, + (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable"); + + iocb = nfs_local_iocb_alloc(hdr, localio, GFP_NOIO); + if (iocb == NULL) + return -ENOMEM; + + switch (hdr->args.stable) { + default: + break; + case NFS_DATA_SYNC: + iocb->kiocb.ki_flags |= IOCB_DSYNC; + break; + case NFS_FILE_SYNC: + iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; + } + nfs_local_pgio_init(hdr, call_ops); + + nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable); + + INIT_WORK(&iocb->work, nfs_local_call_write); + queue_work(nfslocaliod_workqueue, &iocb->work); + + return 0; +} + +int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, + struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) +{ + int status = 0; + struct file *filp = nfs_to->nfsd_file_file(localio); + + if (!hdr->args.count) + return 0; + /* Don't support filesystems without read_iter/write_iter */ + if (!filp->f_op->read_iter || !filp->f_op->write_iter) { + nfs_local_disable(clp); + status = -EAGAIN; + goto out; + } + + switch (hdr->rw_mode) { + case FMODE_READ: + status = nfs_do_local_read(hdr, localio, call_ops); + break; + case FMODE_WRITE: + status = nfs_do_local_write(hdr, localio, call_ops); + break; + default: + dprintk("%s: invalid mode: %d\n", __func__, + hdr->rw_mode); + status = -EINVAL; + } +out: + if (status != 0) { + nfs_to_nfsd_file_put_local(localio); + hdr->task.tk_status = status; + nfs_local_hdr_release(hdr, call_ops); + } + return status; +} + +static void +nfs_local_init_commit(struct nfs_commit_data *data, + const struct rpc_call_ops *call_ops) +{ + data->task.tk_ops = call_ops; +} + +static int +nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data) +{ + loff_t start = data->args.offset; + loff_t end = LLONG_MAX; + + if (data->args.count > 0) { + end = start + data->args.count - 1; + if (end < start) + end = LLONG_MAX; + } + + dprintk("%s: commit %llu - %llu\n", __func__, start, end); + return vfs_fsync_range(filp, start, end, 0); +} + +static void +nfs_local_commit_done(struct nfs_commit_data *data, int status) +{ + if (status >= 0) { + nfs_set_local_verifier(data->inode, + data->res.verf, + NFS_FILE_SYNC); + data->res.op_status = NFS4_OK; + data->task.tk_status = 0; + } else { + nfs_reset_boot_verifier(data->inode); + data->res.op_status = nfs_localio_errno_to_nfs4_stat(status); + data->task.tk_status = status; + } +} + +static void +nfs_local_release_commit_data(struct nfsd_file *localio, + struct nfs_commit_data *data, + const struct rpc_call_ops *call_ops) +{ + nfs_to_nfsd_file_put_local(localio); + call_ops->rpc_call_done(&data->task, data); + call_ops->rpc_release(data); +} + +static struct nfs_local_fsync_ctx * +nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data, + struct nfsd_file *localio, gfp_t flags) +{ + struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags); + + if (ctx != NULL) { + ctx->localio = localio; + ctx->data = data; + INIT_WORK(&ctx->work, nfs_local_fsync_work); + kref_init(&ctx->kref); + ctx->done = NULL; + } + return ctx; +} + +static void +nfs_local_fsync_ctx_kref_free(struct kref *kref) +{ + kfree(container_of(kref, struct nfs_local_fsync_ctx, kref)); +} + +static void +nfs_local_fsync_ctx_put(struct nfs_local_fsync_ctx *ctx) +{ + kref_put(&ctx->kref, nfs_local_fsync_ctx_kref_free); +} + +static void +nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx) +{ + nfs_local_release_commit_data(ctx->localio, ctx->data, + ctx->data->task.tk_ops); + nfs_local_fsync_ctx_put(ctx); +} + +static void +nfs_local_fsync_work(struct work_struct *work) +{ + struct nfs_local_fsync_ctx *ctx; + int status; + + ctx = container_of(work, struct nfs_local_fsync_ctx, work); + + status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio), + ctx->data); + nfs_local_commit_done(ctx->data, status); + if (ctx->done != NULL) + complete(ctx->done); + nfs_local_fsync_ctx_free(ctx); +} + +int nfs_local_commit(struct nfsd_file *localio, + struct nfs_commit_data *data, + const struct rpc_call_ops *call_ops, int how) +{ + struct nfs_local_fsync_ctx *ctx; + + ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL); + if (!ctx) { + nfs_local_commit_done(data, -ENOMEM); + nfs_local_release_commit_data(localio, data, call_ops); + return -ENOMEM; + } + + nfs_local_init_commit(data, call_ops); + kref_get(&ctx->kref); + if (how & FLUSH_SYNC) { + DECLARE_COMPLETION_ONSTACK(done); + ctx->done = &done; + queue_work(nfsiod_workqueue, &ctx->work); + wait_for_completion(&done); + } else + queue_work(nfsiod_workqueue, &ctx->work); + nfs_local_fsync_ctx_put(ctx); + return 0; +} diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 68e76b626371..57c9dd700b58 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -128,11 +128,6 @@ struct mountres { rpc_authflavor_t *auth_flavors; }; -struct mnt_fhstatus { - u32 status; - struct nfs_fh *fh; -}; - /** * nfs_mount - Obtain an NFS file handle for the given host and path * @info: pointer to mount request arguments diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index 5ba00610aede..0d3ce0460e35 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -18,7 +18,7 @@ struct nfs_subversion { const struct rpc_version *rpc_vers; /* NFS version information */ const struct nfs_rpc_ops *rpc_ops; /* NFS operations */ const struct super_operations *sops; /* NFS Super operations */ - const struct xattr_handler **xattr; /* NFS xattr handlers */ + const struct xattr_handler * const *xattr; /* NFS xattr handlers */ struct list_head list; /* List of NFS versions */ }; diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c index 467f21ee6a35..b1badc70bd71 100644 --- a/fs/nfs/nfs2super.c +++ b/fs/nfs/nfs2super.c @@ -26,6 +26,7 @@ static void __exit exit_nfs_v2(void) unregister_nfs_version(&nfs_v2); } +MODULE_DESCRIPTION("NFSv2 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v2); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index c19093814296..6e75c6c2d234 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -22,14 +22,12 @@ #include <linux/nfs.h> #include <linux/nfs2.h> #include <linux/nfs_fs.h> +#include <linux/nfs_common.h> #include "nfstrace.h" #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR -/* Mapping from NFS error code to "errno" error code. */ -#define errno_NFSERR_IO EIO - /* * Declare the space requirements for NFS arguments and replies as * number of 32bit-words @@ -64,8 +62,6 @@ #define NFS_readdirres_sz (1+NFS_pagepad_sz) #define NFS_statfsres_sz (1+NFS_info_sz) -static int nfs_stat_to_errno(enum nfs_stat); - /* * Encode/decode NFSv2 basic data types * @@ -1054,70 +1050,6 @@ out_default: return nfs_stat_to_errno(status); } - -/* - * We need to translate between nfs status return values and - * the local errno values which may not be the same. - */ -static const struct { - int stat; - int errno; -} nfs_errtbl[] = { - { NFS_OK, 0 }, - { NFSERR_PERM, -EPERM }, - { NFSERR_NOENT, -ENOENT }, - { NFSERR_IO, -errno_NFSERR_IO}, - { NFSERR_NXIO, -ENXIO }, -/* { NFSERR_EAGAIN, -EAGAIN }, */ - { NFSERR_ACCES, -EACCES }, - { NFSERR_EXIST, -EEXIST }, - { NFSERR_XDEV, -EXDEV }, - { NFSERR_NODEV, -ENODEV }, - { NFSERR_NOTDIR, -ENOTDIR }, - { NFSERR_ISDIR, -EISDIR }, - { NFSERR_INVAL, -EINVAL }, - { NFSERR_FBIG, -EFBIG }, - { NFSERR_NOSPC, -ENOSPC }, - { NFSERR_ROFS, -EROFS }, - { NFSERR_MLINK, -EMLINK }, - { NFSERR_NAMETOOLONG, -ENAMETOOLONG }, - { NFSERR_NOTEMPTY, -ENOTEMPTY }, - { NFSERR_DQUOT, -EDQUOT }, - { NFSERR_STALE, -ESTALE }, - { NFSERR_REMOTE, -EREMOTE }, -#ifdef EWFLUSH - { NFSERR_WFLUSH, -EWFLUSH }, -#endif - { NFSERR_BADHANDLE, -EBADHANDLE }, - { NFSERR_NOT_SYNC, -ENOTSYNC }, - { NFSERR_BAD_COOKIE, -EBADCOOKIE }, - { NFSERR_NOTSUPP, -ENOTSUPP }, - { NFSERR_TOOSMALL, -ETOOSMALL }, - { NFSERR_SERVERFAULT, -EREMOTEIO }, - { NFSERR_BADTYPE, -EBADTYPE }, - { NFSERR_JUKEBOX, -EJUKEBOX }, - { -1, -EIO } -}; - -/** - * nfs_stat_to_errno - convert an NFS status code to a local errno - * @status: NFS status code to convert - * - * Returns a local errno value, or -EIO if the NFS status code is - * not recognized. This function is used jointly by NFSv2 and NFSv3. - */ -static int nfs_stat_to_errno(enum nfs_stat status) -{ - int i; - - for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == (int)status) - return nfs_errtbl[i].errno; - } - dprintk("NFS: Unrecognized nfs status value: %u\n", status); - return nfs_errtbl[i].errno; -} - #define PROC(proc, argtype, restype, timer) \ [NFSPROC_##proc] = { \ .p_proc = NFSPROC_##proc, \ diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 674c012868b1..b0c8a39c2bbd 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -111,6 +111,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, cl_init.hostname = buf; switch (ds_proto) { + case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_TCP_TLS: if (mds_clp->cl_nconnect > 1) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4bf208a0a8e9..88b0fb343ae0 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -39,7 +39,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; - } while (!fatal_signal_pending(current)); + } while (!fatal_signal_pending(current) && !nfs_current_task_exiting()); return res; } @@ -543,9 +543,10 @@ out: } static int -nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, +nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio, unsigned int len, struct iattr *sattr) { + struct page *page = &folio->page; struct nfs3_createdata *data; struct dentry *d_alias; int status = -ENOMEM; @@ -962,7 +963,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) struct nfs_open_context *ctx = nfs_file_open_context(filp); int status; - if (fl->fl_flags & FL_CLOSE) { + if (fl->c.flc_flags & FL_CLOSE) { l_ctx = nfs_get_lock_context(ctx); if (IS_ERR(l_ctx)) l_ctx = NULL; @@ -978,13 +979,21 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return status; } -static int nfs3_have_delegation(struct inode *inode, fmode_t flags) +static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags) { return 0; } +static int nfs3_return_delegation(struct inode *inode) +{ + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); + return 0; +} + static const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, + .atomic_open = nfs_atomic_open_v23, .lookup = nfs_lookup, .link = nfs_link, .unlink = nfs_unlink, @@ -1060,6 +1069,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .clear_acl_cache = forget_all_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, + .return_delegation = nfs3_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index 8a9be9e47f76..20a80478449e 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -27,6 +27,7 @@ static void __exit exit_nfs_v3(void) unregister_nfs_version(&nfs_v3); } +MODULE_DESCRIPTION("NFSv3 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v3); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 60f032be805a..4ae01c10b7e2 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -21,14 +21,13 @@ #include <linux/nfs3.h> #include <linux/nfs_fs.h> #include <linux/nfsacl.h> +#include <linux/nfs_common.h> + #include "nfstrace.h" #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR -/* Mapping from NFS error code to "errno" error code. */ -#define errno_NFSERR_IO EIO - /* * Declare the space requirements for NFS arguments and replies as * number of 32bit-words @@ -91,8 +90,6 @@ NFS3_pagepad_sz) #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) -static int nfs3_stat_to_errno(enum nfs_stat); - /* * Map file type to S_IFMT bits */ @@ -1406,7 +1403,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1445,7 +1442,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1495,7 +1492,7 @@ out_default: error = decode_post_op_attr(xdr, result->dir_attr, userns); if (unlikely(error)) goto out; - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1537,7 +1534,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1578,7 +1575,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1658,7 +1655,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1728,7 +1725,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1795,7 +1792,7 @@ out_default: error = decode_wcc_data(xdr, result->dir_attr, userns); if (unlikely(error)) goto out; - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1835,7 +1832,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1881,7 +1878,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1926,7 +1923,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /** @@ -2101,7 +2098,7 @@ out_default: error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req)); if (unlikely(error)) goto out; - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -2167,7 +2164,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -2243,7 +2240,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -2304,7 +2301,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -2350,7 +2347,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } #ifdef CONFIG_NFS_V3_ACL @@ -2416,7 +2413,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, @@ -2435,76 +2432,11 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } #endif /* CONFIG_NFS_V3_ACL */ - -/* - * We need to translate between nfs status return values and - * the local errno values which may not be the same. - */ -static const struct { - int stat; - int errno; -} nfs_errtbl[] = { - { NFS_OK, 0 }, - { NFSERR_PERM, -EPERM }, - { NFSERR_NOENT, -ENOENT }, - { NFSERR_IO, -errno_NFSERR_IO}, - { NFSERR_NXIO, -ENXIO }, -/* { NFSERR_EAGAIN, -EAGAIN }, */ - { NFSERR_ACCES, -EACCES }, - { NFSERR_EXIST, -EEXIST }, - { NFSERR_XDEV, -EXDEV }, - { NFSERR_NODEV, -ENODEV }, - { NFSERR_NOTDIR, -ENOTDIR }, - { NFSERR_ISDIR, -EISDIR }, - { NFSERR_INVAL, -EINVAL }, - { NFSERR_FBIG, -EFBIG }, - { NFSERR_NOSPC, -ENOSPC }, - { NFSERR_ROFS, -EROFS }, - { NFSERR_MLINK, -EMLINK }, - { NFSERR_NAMETOOLONG, -ENAMETOOLONG }, - { NFSERR_NOTEMPTY, -ENOTEMPTY }, - { NFSERR_DQUOT, -EDQUOT }, - { NFSERR_STALE, -ESTALE }, - { NFSERR_REMOTE, -EREMOTE }, -#ifdef EWFLUSH - { NFSERR_WFLUSH, -EWFLUSH }, -#endif - { NFSERR_BADHANDLE, -EBADHANDLE }, - { NFSERR_NOT_SYNC, -ENOTSYNC }, - { NFSERR_BAD_COOKIE, -EBADCOOKIE }, - { NFSERR_NOTSUPP, -ENOTSUPP }, - { NFSERR_TOOSMALL, -ETOOSMALL }, - { NFSERR_SERVERFAULT, -EREMOTEIO }, - { NFSERR_BADTYPE, -EBADTYPE }, - { NFSERR_JUKEBOX, -EJUKEBOX }, - { -1, -EIO } -}; - -/** - * nfs3_stat_to_errno - convert an NFS status code to a local errno - * @status: NFS status code to convert - * - * Returns a local errno value, or -EIO if the NFS status code is - * not recognized. This function is used jointly by NFSv2 and NFSv3. - */ -static int nfs3_stat_to_errno(enum nfs_stat status) -{ - int i; - - for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == (int)status) - return nfs_errtbl[i].errno; - } - dprintk("NFS: Unrecognized nfs status value: %u\n", status); - return nfs_errtbl[i].errno; -} - - #define PROC(proc, argtype, restype, timer) \ [NFS3PROC_##proc] = { \ .p_proc = NFS3PROC_##proc, \ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 531c9c20ef1d..9f0d69e65264 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -552,7 +552,7 @@ static int nfs42_do_offload_cancel_async(struct file *dst, .rpc_message = &msg, .callback_ops = &nfs42_offload_cancel_ops, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; int status; diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 911f634ba3da..b6e3d8f77b91 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -132,7 +132,7 @@ nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry) lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; - return list_lru_add(lru, &entry->lru); + return list_lru_add_obj(lru, &entry->lru); } static bool @@ -143,7 +143,7 @@ nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry) lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; - return list_lru_del(lru, &entry->lru); + return list_lru_del_obj(lru, &entry->lru); } /* @@ -349,7 +349,7 @@ nfs4_xattr_cache_unlink(struct inode *inode) oldcache = nfsi->xattr_cache; if (oldcache != NULL) { - list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru); + list_lru_del_obj(&nfs4_xattr_cache_lru, &oldcache->lru); oldcache->inode = NULL; } nfsi->xattr_cache = NULL; @@ -474,7 +474,7 @@ nfs4_xattr_get_cache(struct inode *inode, int add) kref_get(&cache->ref); nfsi->xattr_cache = cache; cache->inode = inode; - list_lru_add(&nfs4_xattr_cache_lru, &cache->lru); + list_lru_add_obj(&nfs4_xattr_cache_lru, &cache->lru); } spin_unlock(&inode->i_lock); @@ -796,28 +796,9 @@ static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc); -static struct shrinker nfs4_xattr_cache_shrinker = { - .count_objects = nfs4_xattr_cache_count, - .scan_objects = nfs4_xattr_cache_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_MEMCG_AWARE, -}; - -static struct shrinker nfs4_xattr_entry_shrinker = { - .count_objects = nfs4_xattr_entry_count, - .scan_objects = nfs4_xattr_entry_scan, - .seeks = DEFAULT_SEEKS, - .batch = 512, - .flags = SHRINKER_MEMCG_AWARE, -}; - -static struct shrinker nfs4_xattr_large_entry_shrinker = { - .count_objects = nfs4_xattr_entry_count, - .scan_objects = nfs4_xattr_entry_scan, - .seeks = 1, - .batch = 512, - .flags = SHRINKER_MEMCG_AWARE, -}; +static struct shrinker *nfs4_xattr_cache_shrinker; +static struct shrinker *nfs4_xattr_entry_shrinker; +static struct shrinker *nfs4_xattr_large_entry_shrinker; static enum lru_status cache_lru_isolate(struct list_head *item, @@ -943,7 +924,7 @@ nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) struct nfs4_xattr_entry *entry; struct list_lru *lru; - lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + lru = (shrink == nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose); @@ -971,7 +952,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) unsigned long count; struct list_lru *lru; - lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? + lru = (shrink == nfs4_xattr_large_entry_shrinker) ? &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; count = list_lru_shrink_count(lru, sc); @@ -991,18 +972,34 @@ static void nfs4_xattr_cache_init_once(void *p) INIT_LIST_HEAD(&cache->dispose); } -static int nfs4_xattr_shrinker_init(struct shrinker *shrinker, - struct list_lru *lru, const char *name) +typedef unsigned long (*count_objects_cb)(struct shrinker *s, + struct shrink_control *sc); +typedef unsigned long (*scan_objects_cb)(struct shrinker *s, + struct shrink_control *sc); + +static int __init nfs4_xattr_shrinker_init(struct shrinker **shrinker, + struct list_lru *lru, const char *name, + count_objects_cb count, + scan_objects_cb scan, long batch, int seeks) { - int ret = 0; + int ret; - ret = register_shrinker(shrinker, name); - if (ret) + *shrinker = shrinker_alloc(SHRINKER_MEMCG_AWARE, name); + if (!*shrinker) + return -ENOMEM; + + ret = list_lru_init_memcg(lru, *shrinker); + if (ret) { + shrinker_free(*shrinker); return ret; + } - ret = list_lru_init_memcg(lru, shrinker); - if (ret) - unregister_shrinker(shrinker); + (*shrinker)->count_objects = count; + (*shrinker)->scan_objects = scan; + (*shrinker)->batch = batch; + (*shrinker)->seeks = seeks; + + shrinker_register(*shrinker); return ret; } @@ -1010,7 +1007,7 @@ static int nfs4_xattr_shrinker_init(struct shrinker *shrinker, static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker, struct list_lru *lru) { - unregister_shrinker(shrinker); + shrinker_free(shrinker); list_lru_destroy(lru); } @@ -1020,33 +1017,37 @@ int __init nfs4_xattr_cache_init(void) nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", sizeof(struct nfs4_xattr_cache), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + (SLAB_RECLAIM_ACCOUNT), nfs4_xattr_cache_init_once); if (nfs4_xattr_cache_cachep == NULL) return -ENOMEM; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker, - &nfs4_xattr_cache_lru, - "nfs-xattr_cache"); + &nfs4_xattr_cache_lru, "nfs-xattr_cache", + nfs4_xattr_cache_count, + nfs4_xattr_cache_scan, 0, DEFAULT_SEEKS); if (ret) goto out1; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker, - &nfs4_xattr_entry_lru, - "nfs-xattr_entry"); + &nfs4_xattr_entry_lru, "nfs-xattr_entry", + nfs4_xattr_entry_count, + nfs4_xattr_entry_scan, 512, DEFAULT_SEEKS); if (ret) goto out2; ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker, &nfs4_xattr_large_entry_lru, - "nfs-xattr_large_entry"); + "nfs-xattr_large_entry", + nfs4_xattr_entry_count, + nfs4_xattr_entry_scan, 512, 1); if (!ret) return 0; - nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker, &nfs4_xattr_entry_lru); out2: - nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker, &nfs4_xattr_cache_lru); out1: kmem_cache_destroy(nfs4_xattr_cache_cachep); @@ -1056,11 +1057,11 @@ out1: void nfs4_xattr_cache_exit(void) { - nfs4_xattr_shrinker_destroy(&nfs4_xattr_large_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_large_entry_shrinker, &nfs4_xattr_large_entry_lru); - nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker, &nfs4_xattr_entry_lru); - nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker, &nfs4_xattr_cache_lru); kmem_cache_destroy(nfs4_xattr_cache_cachep); } diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 9e3ae53e2205..becc3149aa9e 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -144,9 +144,11 @@ decode_putfh_maxsz + \ decode_offload_cancel_maxsz) #define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_copy_notify_maxsz) #define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_copy_notify_maxsz) #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 47c5c1f86d66..7d383d29a995 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -67,7 +67,8 @@ struct nfs4_minor_version_ops { void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); int (*test_and_free_expired)(struct nfs_server *, - nfs4_stateid *, const struct cred *); + const nfs4_stateid *, + const struct cred *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); void (*session_trunk)(struct rpc_clnt *clnt, @@ -82,7 +83,7 @@ struct nfs4_minor_version_ops { #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { ktime_t create_time; - int owner_id; + u64 owner_id; int flags; u32 counter; spinlock_t lock; /* Protects the list */ @@ -120,7 +121,6 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; - seqcount_spinlock_t so_reclaim_seqcount; struct mutex so_delegreturn_mutex; }; @@ -209,6 +209,7 @@ struct nfs4_exception { struct inode *inode; nfs4_stateid *stateid; long timeout; + unsigned short retrans; unsigned char task_is_privileged : 1; unsigned char delay : 1, recovering : 1, @@ -315,7 +316,7 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct nfs_fh *, struct nfs_fattr *); extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); -extern const struct xattr_handler *nfs4_xattr_handlers[]; +extern const struct xattr_handler * const nfs4_xattr_handlers[]; extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, @@ -329,7 +330,7 @@ extern int update_open_stateid(struct nfs4_state *state, const nfs4_stateid *deleg_stateid, fmode_t fmode); extern int nfs4_proc_setlease(struct file *file, int arg, - struct file_lock **lease, void **priv); + struct file_lease **lease, void **priv); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); extern void nfs4_update_changeattr(struct inode *dir, @@ -546,6 +547,7 @@ extern unsigned short max_session_slots; extern unsigned short max_session_cb_slots; extern unsigned short send_implementation_id; extern bool recover_lost_locks; +extern short nfs_delay_retrans; #define NFS4_CLIENT_ID_UNIQ_LEN (64) extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ac80f87cb9d9..37c17f70cebe 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -923,6 +923,7 @@ static int nfs4_set_client(struct nfs_server *server, else cl_init.max_connect = max_connect; switch (proto) { + case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_TCP_TLS: cl_init.nconnect = nconnect; @@ -999,6 +1000,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, cl_init.hostname = buf; switch (ds_proto) { + case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_TCP_TLS: if (mds_clp->cl_nconnect > 1) { @@ -1079,24 +1081,6 @@ static void nfs4_session_limit_xasize(struct nfs_server *server) #endif } -void nfs4_server_set_init_caps(struct nfs_server *server) -{ - /* Set the basic capabilities */ - server->caps |= server->nfs_client->cl_mvops->init_caps; - if (server->flags & NFS_MOUNT_NORDIRPLUS) - server->caps &= ~NFS_CAP_READDIRPLUS; - if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) - server->caps &= ~NFS_CAP_READ_PLUS; - - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && - server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; -} - static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { @@ -1111,7 +1095,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (error < 0) goto out; - nfs4_server_set_init_caps(server); + nfs_server_set_init_caps(server); /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh, auth_probe); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 02788c3c85e5..1cd9652f3c28 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -10,6 +10,7 @@ #include <linux/mount.h> #include <linux/nfs_fs.h> #include <linux/nfs_ssc.h> +#include <linux/splice.h> #include "delegation.h" #include "internal.h" #include "iostat.h" @@ -195,8 +196,8 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(file_in, pos_in, file_out, - pos_out, count, flags); + ret = splice_copy_file_range(file_in, pos_in, file_out, + pos_out, count); return ret; } @@ -438,7 +439,7 @@ void nfs42_ssc_unregister_ops(void) } #endif /* CONFIG_NFS_V4_2 */ -static int nfs4_setlease(struct file *file, int arg, struct file_lock **lease, +static int nfs4_setlease(struct file *file, int arg, struct file_lease **lease, void **priv) { return nfs4_proc_setlease(file, arg, lease, priv); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4b12e45f5753..e6b7cbc06c9c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, const struct cred *cred, struct nfs4_slot *slot, bool is_privileged); -static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, - const struct cred *); +static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *, + const struct cred *); static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, - const struct cred *, bool); + const struct cred *, bool); #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL @@ -293,7 +293,7 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, unsigned long cache_validity; memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst)); - if (!inode || !nfs4_have_delegation(inode, FMODE_READ)) + if (!inode || !nfs_have_read_or_write_delegation(inode)) return; cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags; @@ -310,6 +310,18 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, dst[1] &= ~FATTR4_WORD1_MODE; if (!(cache_validity & NFS_INO_INVALID_OTHER)) dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP); + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET); + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + dst[1] &= ~(FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET); + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + dst[1] &= ~(FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY_SET); + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET); + } } static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, @@ -422,6 +434,8 @@ static int nfs4_delay_killable(long *timeout) { might_sleep(); + if (unlikely(nfs_current_task_exiting())) + return -EINTR; __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(nfs4_update_delay(timeout)); if (!__fatal_signal_pending(current)) @@ -433,6 +447,8 @@ static int nfs4_delay_interruptible(long *timeout) { might_sleep(); + if (unlikely(nfs_current_task_exiting())) + return -EINTR; __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(nfs4_update_delay(timeout)); if (!signal_pending(current)) @@ -587,6 +603,21 @@ wait_on_recovery: return 0; } +/* + * Track the number of NFS4ERR_DELAY related retransmissions and return + * EAGAIN if the 'softerr' mount option is set, and we've exceeded the limit + * set by 'nfs_delay_retrans'. + */ +static int nfs4_exception_should_retrans(const struct nfs_server *server, + struct nfs4_exception *exception) +{ + if (server->flags & NFS_MOUNT_SOFTERR && nfs_delay_retrans >= 0) { + if (exception->retrans++ >= (unsigned short)nfs_delay_retrans) + return -EAGAIN; + } + return 0; +} + /* This is the error handling routine for processes that are allowed * to sleep. */ @@ -597,6 +628,11 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ ret = nfs4_do_handle_exception(server, errorcode, exception); if (exception->delay) { + int ret2 = nfs4_exception_should_retrans(server, exception); + if (ret2 < 0) { + exception->retry = 0; + return ret2; + } ret = nfs4_delay(&exception->timeout, exception->interruptible); goto out_retry; @@ -625,6 +661,11 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, ret = nfs4_do_handle_exception(server, errorcode, exception); if (exception->delay) { + int ret2 = nfs4_exception_should_retrans(server, exception); + if (ret2 < 0) { + exception->retry = 0; + return ret2; + } rpc_delay(task, nfs4_update_delay(&exception->timeout)); goto out_retry; } @@ -1220,7 +1261,8 @@ nfs4_update_changeattr_locked(struct inode *inode, struct nfs_inode *nfsi = NFS_I(inode); u64 change_attr = inode_peek_iversion_raw(inode); - cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + if (!nfs_have_delegated_mtime(inode)) + cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; if (S_ISDIR(inode->i_mode)) cache_validity |= NFS_INO_INVALID_DATA; @@ -1239,7 +1281,7 @@ nfs4_update_changeattr_locked(struct inode *inode, if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (!nfs_have_delegated_attributes(inode)) cache_validity |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | @@ -1295,8 +1337,7 @@ static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx) } static u32 -nfs4_map_atomic_open_share(struct nfs_server *server, - fmode_t fmode, int openflags) +nfs4_fmode_to_share_access(fmode_t fmode) { u32 res = 0; @@ -1310,11 +1351,27 @@ nfs4_map_atomic_open_share(struct nfs_server *server, case FMODE_READ|FMODE_WRITE: res = NFS4_SHARE_ACCESS_BOTH; } + return res; +} + +static u32 +nfs4_map_atomic_open_share(struct nfs_server *server, + fmode_t fmode, int openflags) +{ + u32 res = nfs4_fmode_to_share_access(fmode); + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) goto out; /* Want no delegation if we're using O_DIRECT */ - if (openflags & O_DIRECT) + if (openflags & O_DIRECT) { res |= NFS4_SHARE_WANT_NO_DELEG; + goto out; + } + /* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */ + if (server->caps & NFS_CAP_DELEGTIME) + res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS; + if (server->caps & NFS_CAP_OPEN_XOR) + res |= NFS4_SHARE_WANT_OPEN_XOR_DELEGATION; out: return res; } @@ -1712,7 +1769,8 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, rcu_read_unlock(); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); - if (!fatal_signal_pending(current)) { + if (!fatal_signal_pending(current) && + !nfs_current_task_exiting()) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; else @@ -1929,44 +1987,41 @@ out_return_state: } static void -nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) -{ - struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client; - struct nfs_delegation *delegation; - int delegation_flags = 0; - - rcu_read_lock(); - delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation) - delegation_flags = delegation->flags; - rcu_read_unlock(); - switch (data->o_arg.claim) { - default: +nfs4_process_delegation(struct inode *inode, const struct cred *cred, + enum open_claim_type4 claim, + const struct nfs4_open_delegation *delegation) +{ + switch (delegation->open_delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: break; + default: + return; + } + switch (claim) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: pr_err_ratelimited("NFS: Broken NFSv4 server %s is " "returning a delegation for " "OPEN(CLAIM_DELEGATE_CUR)\n", - clp->cl_hostname); - return; + NFS_SERVER(inode)->nfs_client->cl_hostname); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + nfs_inode_reclaim_delegation(inode, cred, delegation->type, + &delegation->stateid, + delegation->pagemod_limit, + delegation->open_delegation_type); + break; + default: + nfs_inode_set_delegation(inode, cred, delegation->type, + &delegation->stateid, + delegation->pagemod_limit, + delegation->open_delegation_type); } - if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) - nfs_inode_set_delegation(state->inode, - data->owner->so_cred, - data->o_res.delegation_type, - &data->o_res.delegation, - data->o_res.pagemod_limit); - else - nfs_inode_reclaim_delegation(state->inode, - data->owner->so_cred, - data->o_res.delegation_type, - &data->o_res.delegation, - data->o_res.pagemod_limit); - - if (data->o_res.do_recall) - nfs_async_inode_return_delegation(state->inode, - &data->o_res.delegation); + if (delegation->do_recall) + nfs_async_inode_return_delegation(inode, &delegation->stateid); } /* @@ -1990,11 +2045,16 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (ret) return ERR_PTR(ret); - if (data->o_res.delegation_type != 0) - nfs4_opendata_check_deleg(data, state); + nfs4_process_delegation(state->inode, + data->owner->so_cred, + data->o_arg.claim, + &data->o_res.delegation); - if (!update_open_stateid(state, &data->o_res.stateid, - NULL, data->o_arg.fmode)) + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) { + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); + } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) return ERR_PTR(-EAGAIN); refcount_inc(&state->count); @@ -2058,10 +2118,18 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) if (IS_ERR(state)) goto out; - if (data->o_res.delegation_type != 0) - nfs4_opendata_check_deleg(data, state); - if (!update_open_stateid(state, &data->o_res.stateid, - NULL, data->o_arg.fmode)) { + nfs4_process_delegation(state->inode, + data->owner->so_cred, + data->o_arg.claim, + &data->o_res.delegation); + + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) { + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) { + nfs4_put_open_state(state); + state = ERR_PTR(-EAGAIN); + } + } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) { nfs4_put_open_state(state); state = ERR_PTR(-EAGAIN); } @@ -2197,7 +2265,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state { struct nfs_delegation *delegation; struct nfs4_opendata *opendata; - fmode_t delegation_type = 0; + u32 delegation_type = NFS4_OPEN_DELEGATE_NONE; int status; opendata = nfs4_open_recoverdata_alloc(ctx, state, @@ -2206,8 +2274,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state return PTR_ERR(opendata); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) - delegation_type = delegation->type; + if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) { + switch(delegation->type) { + case FMODE_READ: + delegation_type = NFS4_OPEN_DELEGATE_READ; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG; + break; + case FMODE_WRITE: + case FMODE_READ|FMODE_WRITE: + delegation_type = NFS4_OPEN_DELEGATE_WRITE; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG; + } + } rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); @@ -2802,16 +2882,16 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st } static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { return -NFS4ERR_BAD_STATEID; } #if defined(CONFIG_NFS_V4_1) static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; @@ -3046,10 +3126,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx); struct inode *dir = d_inode(opendata->dir); unsigned long dir_verifier; - unsigned int seq; int ret; - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); dir_verifier = nfs_save_change_attribute(dir); ret = _nfs4_proc_open(opendata, ctx); @@ -3090,7 +3168,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, case NFS4_OPEN_CLAIM_DELEGATE_PREV: if (!opendata->rpc_done) break; - if (opendata->o_res.delegation_type != 0) + if (opendata->o_res.delegation.type != 0) dir_verifier = nfs_save_change_attribute(dir); nfs_set_verifier(dentry, dir_verifier); } @@ -3102,11 +3180,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - if (d_inode(dentry) == state->inode) { + if (d_inode(dentry) == state->inode) nfs_inode_attach_open_context(ctx); - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) - nfs4_schedule_stateid_recovery(server, state); - } out: if (!opendata->cancelled) { @@ -3376,13 +3451,18 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, .inode = inode, .stateid = &arg.stateid, }; - unsigned long adjust_flags = NFS_INO_INVALID_CHANGE; + unsigned long adjust_flags = NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME; int err; if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID)) adjust_flags |= NFS_INO_INVALID_MODE; if (sattr->ia_valid & (ATTR_UID | ATTR_GID)) adjust_flags |= NFS_INO_INVALID_OTHER; + if (sattr->ia_valid & ATTR_ATIME) + adjust_flags |= NFS_INO_INVALID_ATIME; + if (sattr->ia_valid & ATTR_MTIME) + adjust_flags |= NFS_INO_INVALID_MTIME; do { nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), @@ -3494,7 +3574,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, write_sequnlock(&state->seqlock); trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); - if (fatal_signal_pending(current)) + if (fatal_signal_pending(current) || nfs_current_task_exiting()) status = -EINTR; else if (schedule_timeout(5*HZ) != 0) @@ -3682,7 +3762,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ - if (!nfs4_have_delegation(inode, FMODE_READ)) { + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { nfs4_bitmask_set(calldata->arg.bitmask_store, server->cache_consistency_bitmask, inode, 0); @@ -3692,8 +3772,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) } calldata->arg.share_access = - nfs4_map_atomic_open_share(NFS_SERVER(inode), - calldata->arg.fmode, 0); + nfs4_fmode_to_share_access(calldata->arg.fmode); if (calldata->res.fattr == NULL) calldata->arg.bitmask = NULL; @@ -3824,8 +3903,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) { + struct dentry *dentry = ctx->dentry; if (ctx->state == NULL) return; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + nfs4_inode_set_return_delegation_on_close(d_inode(dentry)); if (is_sync) nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); else @@ -3834,11 +3916,26 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL) + +#define FATTR4_WORD2_NFS42_TIME_DELEG_MASK \ + (FATTR4_WORD2_TIME_DELEG_MODIFY|FATTR4_WORD2_TIME_DELEG_ACCESS) +static bool nfs4_server_delegtime_capable(struct nfs4_server_caps_res *res) +{ + u32 share_access_want = res->open_caps.oa_share_access_want[0]; + u32 attr_bitmask = res->attr_bitmask[2]; + + return (share_access_want & NFS4_SHARE_WANT_DELEG_TIMESTAMPS) && + ((attr_bitmask & FATTR4_WORD2_NFS42_TIME_DELEG_MASK) == + FATTR4_WORD2_NFS42_TIME_DELEG_MASK); +} static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { - u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion; + u32 minorversion = server->nfs_client->cl_minorversion; + u32 bitmask[3] = { + [0] = FATTR4_WORD0_SUPPORTED_ATTRS, + }; struct nfs4_server_caps_arg args = { .fhandle = fhandle, .bitmask = bitmask, @@ -3861,9 +3958,19 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f FATTR4_WORD0_CASE_PRESERVING; if (minorversion) bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; + if (minorversion > 1) + bitmask[2] |= FATTR4_WORD2_OPEN_ARGUMENTS; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { + bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS | + FATTR4_WORD0_FH_EXPIRE_TYPE | + FATTR4_WORD0_LINK_SUPPORT | + FATTR4_WORD0_SYMLINK_SUPPORT | + FATTR4_WORD0_ACLSUPPORT | + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING) & + res.attr_bitmask[0]; /* Sanity check the server answers */ switch (minorversion) { case 0: @@ -3872,9 +3979,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f break; case 1: res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT & + res.attr_bitmask[2]; break; case 2: res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS) & + res.attr_bitmask[2]; } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | @@ -3921,6 +4033,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f sizeof(server->attr_bitmask)); server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (res.open_caps.oa_share_access_want[0] & + NFS4_SHARE_WANT_OPEN_XOR_DELEGATION) + server->caps |= NFS_CAP_OPEN_XOR; + if (nfs4_server_delegtime_capable(&res)) + server->caps |= NFS_CAP_DELEGTIME; + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; @@ -3946,7 +4064,7 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) }; int err; - nfs4_server_set_init_caps(server); + nfs_server_set_init_caps(server); do { err = nfs4_handle_exception(server, _nfs4_server_capabilities(server, fhandle), @@ -4620,7 +4738,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry }; int status = 0; - if (!nfs4_have_delegation(inode, FMODE_READ)) { + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; @@ -4938,8 +5056,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct goto out; nfs4_inode_make_writeable(inode); - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode, - NFS_INO_INVALID_CHANGE); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), + inode, + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start, @@ -5036,9 +5155,10 @@ static void nfs4_free_createdata(struct nfs4_createdata *data) } static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct page *page, unsigned int len, struct iattr *sattr, + struct folio *folio, unsigned int len, struct iattr *sattr, struct nfs4_label *label) { + struct page *page = &folio->page; struct nfs4_createdata *data; int status = -ENAMETOOLONG; @@ -5063,7 +5183,7 @@ out: } static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct page *page, unsigned int len, struct iattr *sattr) + struct folio *folio, unsigned int len, struct iattr *sattr) { struct nfs4_exception exception = { .interruptible = true, @@ -5074,7 +5194,7 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, label = nfs4_label_init_security(dir, dentry, sattr, &l); do { - err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label); + err = _nfs4_proc_symlink(dir, dentry, folio, len, sattr, label); trace_nfs4_symlink(dir, &dentry->d_name, err); err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); @@ -5588,7 +5708,7 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) /* Otherwise, request attributes if and only if we don't hold * a delegation */ - return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; + return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0; } void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], @@ -6054,6 +6174,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen, struct nfs_server *server = NFS_SERVER(inode); int ret; + if (unlikely(NFS_FH(inode)->size == 0)) + return -ENODATA; if (!nfs4_server_supports_acls(server, type)) return -EOPNOTSUPP; ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); @@ -6128,6 +6250,9 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, { struct nfs4_exception exception = { }; int err; + + if (unlikely(NFS_FH(inode)->size == 0)) + return -ENODATA; do { err = __nfs4_proc_set_acl(inode, buf, buflen, type); trace_nfs4_set_acl(inode, err); @@ -6556,6 +6681,7 @@ struct nfs4_delegreturndata { u32 roc_barrier; bool roc; } lr; + struct nfs4_delegattr sattr; struct nfs_fattr fattr; int rpc_status; struct inode *inode; @@ -6580,6 +6706,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) &data->res.lr_ret) == -EAGAIN) goto out_restart; + if (data->args.sattr_args && task->tk_status != 0) { + switch(data->res.sattr_ret) { + case 0: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + /* Let the main handler below do stateid recovery */ + break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_delegation_stateid(&data->stateid, + data->inode)) + goto out_restart; + fallthrough; + default: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + goto out_restart; + } + } + switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); @@ -6673,7 +6823,10 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = { .rpc_release = nfs4_delegreturn_release, }; -static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync) +static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, + int issync) { struct nfs4_delegreturndata *data; struct nfs_server *server = NFS_SERVER(inode); @@ -6725,12 +6878,27 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, } } + if (delegation && + test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) { + if (delegation->type & FMODE_READ) { + data->sattr.atime = inode_get_atime(inode); + data->sattr.atime_set = true; + } + if (delegation->type & FMODE_WRITE) { + data->sattr.mtime = inode_get_mtime(inode); + data->sattr.mtime_set = true; + } + data->args.sattr_args = &data->sattr; + data->res.sattr_res = true; + } + if (!data->inode) nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 1); else nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); + task_setup_data.callback_data = data; msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; @@ -6748,13 +6916,16 @@ out: return status; } -int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync) +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, int issync) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); + err = _nfs4_proc_delegreturn(inode, cred, stateid, + delegation, issync); trace_nfs4_delegreturn(inode, stateid, err); switch (err) { case -NFS4ERR_STALE_STATEID: @@ -6798,7 +6969,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); switch (status) { case 0: - request->fl_type = F_UNLCK; + request->c.flc_type = F_UNLCK; break; case -NFS4ERR_DENIED: status = 0; @@ -6880,10 +7051,18 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, struct nfs4_unlockdata *p; struct nfs4_state *state = lsp->ls_state; struct inode *inode = state->inode; + struct nfs_lock_context *l_ctx; p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return NULL; + l_ctx = nfs_get_lock_context(ctx); + if (!IS_ERR(l_ctx)) { + p->l_ctx = l_ctx; + } else { + kfree(p); + return NULL; + } p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.seqid = seqid; @@ -6891,7 +7070,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->lsp = lsp; /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); - p->l_ctx = nfs_get_lock_context(ctx); locks_init_lock(&p->fl); locks_copy_lock(&p->fl, fl); p->server = NFS_SERVER(inode); @@ -7016,8 +7194,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, /* Ensure this is an unlock - when canceling a lock, the * canceled lock is passed in, and it won't be an unlock. */ - fl->fl_type = F_UNLCK; - if (fl->fl_flags & FL_CLOSE) + fl->c.flc_type = F_UNLCK; + if (fl->c.flc_flags & FL_CLOSE) set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags); data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); @@ -7043,11 +7221,11 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * struct rpc_task *task; struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); int status = 0; - unsigned char fl_flags = request->fl_flags; + unsigned char saved_flags = request->c.flc_flags; status = nfs4_set_lock_state(state, request); /* Unlock _before_ we do the RPC call */ - request->fl_flags |= FL_EXISTS; + request->c.flc_flags |= FL_EXISTS; /* Exclude nfs_delegation_claim_locks() */ mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ @@ -7071,14 +7249,16 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = -ENOMEM; if (IS_ERR(seqid)) goto out; - task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); + task = nfs4_do_unlck(request, + nfs_file_open_context(request->c.flc_file), + lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) goto out; status = rpc_wait_for_completion_task(task); rpc_put_task(task); out: - request->fl_flags = fl_flags; + request->c.flc_flags = saved_flags; trace_nfs4_unlock(request, state, F_SETLK, status); return status; } @@ -7189,7 +7369,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), data->timestamp); if (data->arg.new_lock && !data->cancelled) { - data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); + data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) goto out_restart; } @@ -7290,7 +7470,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE)) task_setup_data.flags |= RPC_TASK_MOVEABLE; - data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), + data = nfs4_alloc_lockdata(fl, + nfs_file_open_context(fl->c.flc_file), fl->fl_u.nfs4_fl.owner, GFP_KERNEL); if (data == NULL) return -ENOMEM; @@ -7396,10 +7577,10 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock { struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs4_state_owner *sp = state->owner; - unsigned char fl_flags = request->fl_flags; + unsigned char flags = request->c.flc_flags; int status; - request->fl_flags |= FL_ACCESS; + request->c.flc_flags |= FL_ACCESS; status = locks_lock_inode_wait(state->inode, request); if (status < 0) goto out; @@ -7408,7 +7589,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ - request->fl_flags = fl_flags & ~FL_SLEEP; + request->c.flc_flags = flags & ~FL_SLEEP; status = locks_lock_inode_wait(state->inode, request); up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); @@ -7418,7 +7599,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock mutex_unlock(&sp->so_delegreturn_mutex); status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); out: - request->fl_flags = fl_flags; + request->c.flc_flags = flags; return status; } @@ -7560,7 +7741,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) return -EINVAL; - if (request->fl_type == F_UNLCK) { + if (lock_is_unlock(request)) { if (state != NULL) return nfs4_proc_unlck(state, cmd, request); return 0; @@ -7569,7 +7750,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (state == NULL) return -ENOLCK; - if ((request->fl_flags & FL_POSIX) && + if ((request->c.flc_flags & FL_POSIX) && !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) return -ENOLCK; @@ -7577,7 +7758,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) * Don't rely on the VFS having checked the file open mode, * since it won't do this for flock() locks. */ - switch (request->fl_type) { + switch (request->c.flc_type) { case F_RDLCK: if (!(filp->f_mode & FMODE_READ)) return -EBADF; @@ -7599,7 +7780,7 @@ static int nfs4_delete_lease(struct file *file, void **priv) return generic_setlease(file, F_UNLCK, NULL, priv); } -static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease, +static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease, void **priv) { struct inode *inode = file_inode(file); @@ -7607,17 +7788,17 @@ static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease, int ret; /* No delegation, no lease */ - if (!nfs4_have_delegation(inode, type)) + if (!nfs4_have_delegation(inode, type, 0)) return -EAGAIN; ret = generic_setlease(file, arg, lease, priv); - if (ret || nfs4_have_delegation(inode, type)) + if (ret || nfs4_have_delegation(inode, type, 0)) return ret; /* We raced with a delegation return */ nfs4_delete_lease(file, priv); return -EAGAIN; } -int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease, +int nfs4_proc_setlease(struct file *file, int arg, struct file_lease **lease, void **priv) { switch (arg) { @@ -8968,10 +9149,12 @@ try_again: return; status = task->tk_status; - if (status == 0) + if (status == 0) { status = nfs4_detect_session_trunking(adata->clp, task->tk_msg.rpc_resp, xprt); - + trace_nfs4_trunked_exchange_id(adata->clp, + xprt->address_strings[RPC_DISPLAY_ADDR], status); + } if (status == 0) rpc_clnt_xprt_switch_add_xprt(clnt, xprt); else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt, @@ -9653,6 +9836,9 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, nfs4_sequence_free_slot(&lgp->res.seq_res); + exception->state = NULL; + exception->stateid = NULL; + switch (nfs4err) { case 0: goto out; @@ -9749,7 +9935,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { }; struct pnfs_layout_segment * -nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, + struct nfs4_exception *exception) { struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); @@ -9769,13 +9956,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) RPC_TASK_MOVEABLE, }; struct pnfs_layout_segment *lseg = NULL; - struct nfs4_exception exception = { - .inode = inode, - .timeout = *timeout, - }; int status = 0; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); + exception->retry = 0; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) @@ -9786,11 +9970,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) goto out; if (task->tk_status < 0) { - status = nfs4_layoutget_handle_exception(task, lgp, &exception); - *timeout = exception.timeout; + exception->retry = 1; + status = nfs4_layoutget_handle_exception(task, lgp, exception); } else if (lgp->res.layoutp->len == 0) { + exception->retry = 1; status = -EAGAIN; - *timeout = nfs4_update_delay(&exception.timeout); + nfs4_update_delay(&exception->timeout); } else lseg = pnfs_layout_process(lgp); out: @@ -9828,6 +10013,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) if (!nfs41_sequence_process(task, &lrp->res.seq_res)) return; + if (task->tk_rpc_status == -ETIMEDOUT) { + lrp->rpc_status = -EAGAIN; + lrp->res.lrs_present = 0; + return; + } /* * Was there an RPC level error? Assume the call succeeded, * and that we need to release the layout @@ -9851,6 +10041,15 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) fallthrough; case 0: break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + nfs4_schedule_session_recovery(server->nfs_client->cl_session, + task->tk_status); + lrp->res.lrs_present = 0; + lrp->rpc_status = -EAGAIN; + task->tk_status = 0; + break; case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) @@ -9870,8 +10069,13 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; - pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range, + if (lrp->rpc_status == 0 || !lrp->inode) + pnfs_layoutreturn_free_lsegs( + lo, &lrp->args.stateid, &lrp->args.range, lrp->res.lrs_present ? &lrp->res.stateid : NULL); + else + pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid, + &lrp->args.range); nfs4_sequence_free_slot(&lrp->res.seq_res); if (lrp->ld_private.ops && lrp->ld_private.ops->free) lrp->ld_private.ops->free(&lrp->ld_private); @@ -9887,7 +10091,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { .rpc_release = nfs4_layoutreturn_release, }; -int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags) { struct rpc_task *task; struct rpc_message msg = { @@ -9910,7 +10114,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) &task_setup_data.rpc_client, &msg); lrp->inode = nfs_igrab_and_active(lrp->args.inode); - if (!sync) { + if (flags & PNFS_FL_LAYOUTRETURN_ASYNC) { if (!lrp->inode) { nfs4_layoutreturn_release(lrp); return -EAGAIN; @@ -9918,6 +10122,8 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) task_setup_data.flags |= RPC_TASK_ASYNC; } if (!lrp->inode) + flags |= PNFS_FL_LAYOUTRETURN_PRIVILEGED; + if (flags & PNFS_FL_LAYOUTRETURN_PRIVILEGED) nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 1); else @@ -9926,7 +10132,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - if (sync) + if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC)) status = task->tk_status; trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status); dprintk("<-- %s status=%d\n", __func__, status); @@ -10244,12 +10450,12 @@ out: } static int _nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; struct nfs41_test_stateid_args args = { - .stateid = stateid, + .stateid = *stateid, }; struct nfs41_test_stateid_res res; struct rpc_message msg = { @@ -10305,8 +10511,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server, * failed or the state ID is not currently valid. */ static int nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { struct nfs4_exception exception = { .interruptible = true, @@ -10613,7 +10819,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { - ssize_t error, error2, error3; + ssize_t error, error2, error3, error4 = 0; size_t left = size; error = generic_listxattr(dentry, list, left); @@ -10636,8 +10842,18 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left); if (error3 < 0) return error3; + if (list) { + list += error3; + left -= error3; + } + + if (!nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) { + error4 = security_inode_listsecurity(d_inode(dentry), list, left); + if (error4 < 0) + return error4; + } - error += error2 + error3; + error += error2 + error3 + error4; if (size && error > size) return -ERANGE; return error; @@ -10736,6 +10952,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, + .return_delegation = nfs4_inode_return_delegation, .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, @@ -10777,7 +10994,7 @@ static const struct xattr_handler nfs4_xattr_nfs4_user_handler = { }; #endif -const struct xattr_handler *nfs4_xattr_handlers[] = { +const struct xattr_handler * const nfs4_xattr_handlers[] = { &nfs4_xattr_nfs4_acl_handler, #if defined(CONFIG_NFS_V4_1) &nfs4_xattr_nfs4_dacl_handler, diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 351616c61df5..f9c291e2165c 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -148,16 +148,12 @@ static inline void nfs4_copy_sessionid(struct nfs4_sessionid *dst, memcpy(dst->data, src->data, NFS4_MAX_SESSIONID_LEN); } -#ifdef CONFIG_CRC32 /* * nfs_session_id_hash - calculate the crc32 hash for the session id * @session - pointer to session */ #define nfs_session_id_hash(sess_id) \ (~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data))) -#else -#define nfs_session_id_hash(session) (0) -#endif #else /* defined(CONFIG_NFS_V4_1) */ static inline int nfs4_init_session(struct nfs_client *clp) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 794bb4aa588d..397a86011878 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -501,11 +501,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, sp = kzalloc(sizeof(*sp), gfp_flags); if (!sp) return NULL; - sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags); - if (sp->so_seqid.owner_id < 0) { - kfree(sp); - return NULL; - } + sp->so_seqid.owner_id = atomic64_inc_return(&server->owner_ctr); sp->so_server = server; sp->so_cred = get_cred(cred); spin_lock_init(&sp->so_lock); @@ -513,7 +509,6 @@ nfs4_alloc_state_owner(struct nfs_server *server, nfs4_init_seqid_counter(&sp->so_seqid); atomic_set(&sp->so_count, 1); INIT_LIST_HEAD(&sp->so_lru); - seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock); mutex_init(&sp->so_delegreturn_mutex); return sp; } @@ -537,7 +532,6 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { nfs4_destroy_seqid_counter(&sp->so_seqid); put_cred(sp->so_cred); - ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id); kfree(sp); } @@ -847,15 +841,15 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) */ static struct nfs4_lock_state * __nfs4_find_lock_state(struct nfs4_state *state, - fl_owner_t fl_owner, fl_owner_t fl_owner2) + fl_owner_t owner, fl_owner_t owner2) { struct nfs4_lock_state *pos, *ret = NULL; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (pos->ls_owner == fl_owner) { + if (pos->ls_owner == owner) { ret = pos; break; } - if (pos->ls_owner == fl_owner2) + if (pos->ls_owner == owner2) ret = pos; } if (ret) @@ -868,7 +862,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t owner) { struct nfs4_lock_state *lsp; struct nfs_server *server = state->owner->so_server; @@ -879,20 +873,14 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f nfs4_init_seqid_counter(&lsp->ls_seqid); refcount_set(&lsp->ls_count, 1); lsp->ls_state = state; - lsp->ls_owner = fl_owner; - lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT); - if (lsp->ls_seqid.owner_id < 0) - goto out_free; + lsp->ls_owner = owner; + lsp->ls_seqid.owner_id = atomic64_inc_return(&server->owner_ctr); INIT_LIST_HEAD(&lsp->ls_locks); return lsp; -out_free: - kfree(lsp); - return NULL; } void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id); nfs4_destroy_seqid_counter(&lsp->ls_seqid); kfree(lsp); } @@ -980,7 +968,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) if (fl->fl_ops != NULL) return 0; - lsp = nfs4_get_lock_state(state, fl->fl_owner); + lsp = nfs4_get_lock_state(state, fl->c.flc_owner); if (lsp == NULL) return -ENOMEM; fl->fl_u.nfs4_fl.owner = lsp; @@ -993,7 +981,7 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, const struct nfs_lock_context *l_ctx) { struct nfs4_lock_state *lsp; - fl_owner_t fl_owner, fl_flock_owner; + fl_owner_t owner, fl_flock_owner; int ret = -ENOENT; if (l_ctx == NULL) @@ -1002,11 +990,11 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) goto out; - fl_owner = l_ctx->lockowner; + owner = l_ctx->lockowner; fl_flock_owner = l_ctx->open_context->flock_owner; spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner, fl_flock_owner); + lsp = __nfs4_find_lock_state(state, owner, fl_flock_owner); if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) ret = -EIO; else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { @@ -1529,8 +1517,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ down_write(&nfsi->rwsem); spin_lock(&flctx->flc_lock); restart: - list_for_each_entry(fl, list, fl_list) { - if (nfs_file_open_context(fl->fl_file)->state != state) + for_each_file_lock(fl, list) { + if (nfs_file_open_context(fl->c.flc_file)->state != state) continue; spin_unlock(&flctx->flc_lock); status = ops->recover_lock(state, fl); @@ -1667,7 +1655,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, * server that doesn't support a grace period. */ spin_lock(&sp->so_lock); - raw_write_seqcount_begin(&sp->so_reclaim_seqcount); restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) @@ -1735,7 +1722,6 @@ restart: spin_lock(&sp->so_lock); goto restart; } - raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); #ifdef CONFIG_NFS_V4_2 if (found_ssc_copy_state) @@ -1745,7 +1731,6 @@ restart: out_err: nfs4_put_open_state(state); spin_lock(&sp->so_lock); - raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); return status; } @@ -1867,6 +1852,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) if (!nfs4_state_clear_reclaim_reboot(clp)) return; + pnfs_destroy_all_layouts(clp); ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); err = nfs4_reclaim_complete(clp, ops, cred); @@ -1928,9 +1914,12 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov struct nfs_server *server; struct rb_node *pos; LIST_HEAD(freeme); - int status = 0; int lost_locks = 0; + int status; + status = nfs4_begin_drain_session(clp); + if (status < 0) + return status; restart: rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { @@ -2024,6 +2013,12 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) nfs_mark_client_ready(clp, -EPERM); clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); return -EPERM; + case -ETIMEDOUT: + if (clp->cl_cons_state == NFS_CS_SESSION_INITING) { + nfs_mark_client_ready(clp, -EIO); + return -EIO; + } + fallthrough; case -EACCES: case -NFS4ERR_DELAY: case -EAGAIN: @@ -2070,7 +2065,6 @@ static int nfs4_establish_lease(struct nfs_client *clp) put_cred(cred); if (status != 0) return status; - pnfs_destroy_all_layouts(clp); return 0; } @@ -2682,6 +2676,8 @@ static void nfs4_state_manager(struct nfs_client *clp) section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); + if (status == 0) + status = pnfs_layout_handle_reboot(clp); if (status == -EAGAIN) continue; if (status < 0) @@ -2693,6 +2689,9 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Detect expired delegations... */ if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) { section = "detect expired delegations"; + status = nfs4_begin_drain_session(clp); + if (status < 0) + goto out_error; nfs_reap_expired_delegations(clp); continue; } @@ -2741,7 +2740,15 @@ out_error: pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" " with error %d\n", section_sep, section, clp->cl_hostname, -status); - ssleep(1); + switch (status) { + case -ENETDOWN: + case -ENETUNREACH: + nfs_mark_client_ready(clp, -EIO); + break; + default: + ssleep(1); + break; + } out_drain: memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index d09bcfd7db89..b29a26923ce0 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -145,6 +145,7 @@ static int do_nfs4_mount(struct nfs_server *server, const char *export_path) { struct nfs_fs_context *root_ctx; + struct nfs_fs_context *ctx; struct fs_context *root_fc; struct vfsmount *root_mnt; struct dentry *dentry; @@ -157,6 +158,12 @@ static int do_nfs4_mount(struct nfs_server *server, .dirfd = -1, }; + struct fs_parameter param_fsc = { + .key = "fsc", + .type = fs_value_is_string, + .dirfd = -1, + }; + if (IS_ERR(server)) return PTR_ERR(server); @@ -168,9 +175,26 @@ static int do_nfs4_mount(struct nfs_server *server, kfree(root_fc->source); root_fc->source = NULL; + ctx = nfs_fc2context(fc); root_ctx = nfs_fc2context(root_fc); root_ctx->internal = true; root_ctx->server = server; + + if (ctx->fscache_uniq) { + len = strlen(ctx->fscache_uniq); + param_fsc.size = len; + param_fsc.string = kmemdup_nul(ctx->fscache_uniq, len, GFP_KERNEL); + if (param_fsc.string == NULL) { + put_fs_context(root_fc); + return -ENOMEM; + } + ret = vfs_parse_fs_param(root_fc, ¶m_fsc); + kfree(param_fsc.string); + if (ret < 0) { + put_fs_context(root_fc); + return ret; + } + } /* We leave export_path unset as it's not used to find the root. */ len = strlen(hostname) + 5; @@ -308,6 +332,7 @@ static void __exit exit_nfs_v4(void) nfs_dns_resolver_destroy(); } +MODULE_DESCRIPTION("NFSv4 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v4); diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index e776200e9a11..886a7c4c60b3 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -34,7 +34,6 @@ static struct ctl_table nfs4_cb_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; int nfs4_register_sysctl(void) diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index d9ac556bebcf..389941ccc9c9 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -2,6 +2,8 @@ /* * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com> */ +#include <uapi/linux/pr.h> +#include <linux/blkdev.h> #include <linux/nfs_fs.h> #include "nfs4_fs.h" #include "internal.h" @@ -28,4 +30,11 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg_err); + +EXPORT_TRACEPOINT_SYMBOL_GPL(fl_getdevinfo); #endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index d27919d7241d..22c973316f0b 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -47,7 +47,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_fast_assign( __entry->error = error < 0 ? -error : 0; - __assign_str(dstaddr, clp->cl_hostname); + __assign_str(dstaddr); ), TP_printk( @@ -77,6 +77,36 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); +TRACE_EVENT(nfs4_trunked_exchange_id, + TP_PROTO( + const struct nfs_client *clp, + const char *addr, + int error + ), + + TP_ARGS(clp, addr, error), + + TP_STRUCT__entry( + __string(main_addr, clp->cl_hostname) + __string(trunk_addr, addr) + __field(unsigned long, error) + ), + + TP_fast_assign( + __entry->error = error < 0 ? -error : 0; + __assign_str(main_addr); + __assign_str(trunk_addr); + ), + + TP_printk( + "error=%ld (%s) main_addr=%s trunk_addr=%s", + -__entry->error, + show_nfs4_status(__entry->error), + __get_str(main_addr), + __get_str(trunk_addr) + ) +); + TRACE_EVENT(nfs4_sequence_done, TP_PROTO( const struct nfs4_session *session, @@ -335,7 +365,7 @@ TRACE_EVENT(nfs4_state_mgr, TP_fast_assign( __entry->state = clp->cl_state; - __assign_str(hostname, clp->cl_hostname); + __assign_str(hostname); ), TP_printk( @@ -363,8 +393,8 @@ TRACE_EVENT(nfs4_state_mgr_failed, TP_fast_assign( __entry->error = status < 0 ? -status : 0; __entry->state = clp->cl_state; - __assign_str(hostname, clp->cl_hostname); - __assign_str(section, section); + __assign_str(hostname); + __assign_str(section); ), TP_printk( @@ -548,7 +578,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, __entry->fhandle = 0; } __entry->dir = NFS_FILEID(d_inode(ctx->dentry->d_parent)); - __assign_str(name, ctx->dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -699,7 +729,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; - __entry->type = request->fl_type; + __entry->type = request->c.flc_type; __entry->start = request->fl_start; __entry->end = request->fl_end; __entry->dev = inode->i_sb->s_dev; @@ -771,7 +801,7 @@ TRACE_EVENT(nfs4_set_lock, __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; - __entry->type = request->fl_type; + __entry->type = request->c.flc_type; __entry->start = request->fl_start; __entry->end = request->fl_end; __entry->dev = inode->i_sb->s_dev; @@ -1042,7 +1072,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = -error; - __assign_str(name, name->name); + __assign_str(name); ), TP_printk( @@ -1126,8 +1156,8 @@ TRACE_EVENT(nfs4_rename, __entry->olddir = NFS_FILEID(olddir); __entry->newdir = NFS_FILEID(newdir); __entry->error = error < 0 ? -error : 0; - __assign_str(oldname, oldname->name); - __assign_str(newname, newname->name); + __assign_str(oldname); + __assign_str(newname); ), TP_printk( @@ -1329,7 +1359,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown"); + __assign_str(dstaddr); ), TP_printk( @@ -1386,7 +1416,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown"); + __assign_str(dstaddr); __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = @@ -1930,7 +1960,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_event, ), TP_fast_assign( - __assign_str(dstaddr, clp->cl_hostname); + __assign_str(dstaddr); memcpy(__entry->deviceid, deviceid->data, NFS4_DEVICEID4_SIZE); ), @@ -1968,7 +1998,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status, TP_fast_assign( __entry->dev = server->s_dev; __entry->status = status; - __assign_str(dstaddr, server->nfs_client->cl_hostname); + __assign_str(dstaddr); memcpy(__entry->deviceid, deviceid->data, NFS4_DEVICEID4_SIZE); ), @@ -1991,6 +2021,34 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status, DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo); DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid); +TRACE_EVENT(fl_getdevinfo, + TP_PROTO( + const struct nfs_server *server, + const struct nfs4_deviceid *deviceid, + char *ds_remotestr + ), + TP_ARGS(server, deviceid, ds_remotestr), + + TP_STRUCT__entry( + __string(mds_addr, server->nfs_client->cl_hostname) + __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE) + __string(ds_ips, ds_remotestr) + ), + + TP_fast_assign( + __assign_str(mds_addr); + __assign_str(ds_ips); + memcpy(__entry->deviceid, deviceid->data, + NFS4_DEVICEID4_SIZE); + ), + TP_printk( + "deviceid=%s, mds_addr=%s, ds_ips=%s", + __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE), + __get_str(mds_addr), + __get_str(ds_ips) + ) +); + DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_PROTO( const struct nfs_pgio_header *hdr @@ -2025,9 +2083,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, be32_to_cpu(hdr->args.stateid.seqid); __entry->stateid_hash = nfs_stateid_hash(&hdr->args.stateid); - __assign_str(dstaddr, hdr->ds_clp ? - rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown"); + __assign_str(dstaddr); ), TP_printk( @@ -2081,9 +2137,7 @@ TRACE_EVENT(ff_layout_commit_error, __entry->dev = inode->i_sb->s_dev; __entry->offset = data->args.offset; __entry->count = data->args.count; - __assign_str(dstaddr, data->ds_clp ? - rpc_peeraddr2str(data->ds_clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown"); + __assign_str(dstaddr); ), TP_printk( @@ -2099,6 +2153,94 @@ TRACE_EVENT(ff_layout_commit_error, ) ); +DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class, + TP_PROTO( + const struct block_device *bdev, + u64 key + ), + TP_ARGS(bdev, key), + TP_STRUCT__entry( + __field(u64, key) + __field(dev_t, dev) + __string(device, bdev->bd_disk->disk_name) + ), + TP_fast_assign( + __entry->key = key; + __entry->dev = bdev->bd_dev; + __assign_str(device); + ), + TP_printk("dev=%d,%d (%s) key=0x%016llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __get_str(device), __entry->key + ) +); + +#define DEFINE_NFS4_BLOCK_PRKEY_EVENT(name) \ + DEFINE_EVENT(pnfs_bl_pr_key_class, name, \ + TP_PROTO( \ + const struct block_device *bdev, \ + u64 key \ + ), \ + TP_ARGS(bdev, key)) +DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_reg); +DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_unreg); + +/* + * From uapi/linux/pr.h + */ +TRACE_DEFINE_ENUM(PR_STS_SUCCESS); +TRACE_DEFINE_ENUM(PR_STS_IOERR); +TRACE_DEFINE_ENUM(PR_STS_RESERVATION_CONFLICT); +TRACE_DEFINE_ENUM(PR_STS_RETRY_PATH_FAILURE); +TRACE_DEFINE_ENUM(PR_STS_PATH_FAST_FAILED); +TRACE_DEFINE_ENUM(PR_STS_PATH_FAILED); + +#define show_pr_status(x) \ + __print_symbolic(x, \ + { PR_STS_SUCCESS, "SUCCESS" }, \ + { PR_STS_IOERR, "IOERR" }, \ + { PR_STS_RESERVATION_CONFLICT, "RESERVATION_CONFLICT" }, \ + { PR_STS_RETRY_PATH_FAILURE, "RETRY_PATH_FAILURE" }, \ + { PR_STS_PATH_FAST_FAILED, "PATH_FAST_FAILED" }, \ + { PR_STS_PATH_FAILED, "PATH_FAILED" }) + +DECLARE_EVENT_CLASS(pnfs_bl_pr_key_err_class, + TP_PROTO( + const struct block_device *bdev, + u64 key, + int status + ), + TP_ARGS(bdev, key, status), + TP_STRUCT__entry( + __field(u64, key) + __field(dev_t, dev) + __field(unsigned long, status) + __string(device, bdev->bd_disk->disk_name) + ), + TP_fast_assign( + __entry->key = key; + __entry->dev = bdev->bd_dev; + __entry->status = status; + __assign_str(device); + ), + TP_printk("dev=%d,%d (%s) key=0x%016llx status=%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __get_str(device), __entry->key, + show_pr_status(__entry->status) + ) +); + +#define DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(name) \ + DEFINE_EVENT(pnfs_bl_pr_key_err_class, name, \ + TP_PROTO( \ + const struct block_device *bdev, \ + u64 key, \ + int status \ + ), \ + TP_ARGS(bdev, key, status)) +DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_reg_err); +DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_unreg_err); + #ifdef CONFIG_NFS_V4_2 TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA); TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE); @@ -2521,7 +2663,7 @@ DECLARE_EVENT_CLASS(nfs4_xattr_event, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __assign_str(name, name); + __assign_str(name); ), TP_printk( diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index deec76cf5afe..71f45cc0ca74 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -52,6 +52,7 @@ #include <linux/nfs.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/nfs_common.h> #include "nfs4_fs.h" #include "nfs4trace.h" @@ -63,11 +64,7 @@ #define NFSDBG_FACILITY NFSDBG_XDR -/* Mapping from NFS error code to "errno" error code. */ -#define errno_NFSERR_IO EIO - struct compound_hdr; -static int nfs4_stat_to_errno(int); static void encode_layoutget(struct xdr_stream *xdr, const struct nfs4_layoutget_args *args, struct compound_hdr *hdr); @@ -85,9 +82,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ #define pagepad_maxsz (1) -#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2) -#define lock_owner_id_maxsz (1 + 1 + 4) -#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define open_owner_id_maxsz (2 + 1 + 2 + 2) +#define lock_owner_id_maxsz (2 + 1 + 2) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define op_encode_hdr_maxsz (1) @@ -188,7 +184,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define encode_claim_null_maxsz (1 + nfs4_name_maxsz) #define encode_open_maxsz (op_encode_hdr_maxsz + \ 2 + encode_share_access_maxsz + 2 + \ - open_owner_id_maxsz + \ + 1 + open_owner_id_maxsz + \ encode_opentype_maxsz + \ encode_claim_null_maxsz) #define decode_space_limit_maxsz (3) @@ -224,6 +220,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_attrs_maxsz) #define decode_setattr_maxsz (op_decode_hdr_maxsz + \ nfs4_fattr_bitmap_maxsz) +#define encode_delegattr_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + nfs4_fattr_bitmap_maxsz + \ + 2*nfstime4_maxsz) +#define decode_delegattr_maxsz (decode_setattr_maxsz) #define encode_read_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) #define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz) @@ -253,13 +254,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define encode_link_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) #define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) -#define encode_lockowner_maxsz (7) +#define encode_lockowner_maxsz (2 + 1 + lock_owner_id_maxsz) + #define encode_lock_maxsz (op_encode_hdr_maxsz + \ 7 + \ 1 + encode_stateid_maxsz + 1 + \ encode_lockowner_maxsz) #define decode_lock_denied_maxsz \ - (8 + decode_lockowner_maxsz) + (2 + 2 + 1 + 2 + 1 + lock_owner_id_maxsz) #define decode_lock_maxsz (op_decode_hdr_maxsz + \ decode_lock_denied_maxsz) #define encode_lockt_maxsz (op_encode_hdr_maxsz + 5 + \ @@ -615,7 +617,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_lockowner_maxsz) #define NFS4_dec_release_lockowner_sz \ (compound_decode_hdr_maxsz + \ - decode_lockowner_maxsz) + decode_release_lockowner_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -758,12 +760,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_layoutreturn_maxsz + \ + encode_delegattr_maxsz + \ encode_delegreturn_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutreturn_maxsz + \ + decode_delegattr_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ @@ -968,11 +972,6 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) return p; } -static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) -{ - WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); -} - static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { WARN_ON_ONCE(xdr_stream_encode_opaque(xdr, str, len) < 0); @@ -1060,9 +1059,10 @@ static void encode_nops(struct compound_hdr *hdr) *hdr->nops_p = htonl(hdr->nops); } -static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid) +static void encode_nfs4_stateid(struct xdr_stream *xdr, + const nfs4_stateid *stateid) { - encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); + encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); } static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) @@ -1305,7 +1305,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct static inline int nfs4_lock_type(struct file_lock *fl, int block) { - if (fl->fl_type == F_RDLCK) + if (lock_is_read(fl)) return block ? NFS4_READW_LT : NFS4_READ_LT; return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT; } @@ -1412,16 +1412,16 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena __be32 *p; /* * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, - * owner 4 = 32 + * owner 28 */ encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->share_access); - p = reserve_space(xdr, 36); + p = reserve_space(xdr, 40); p = xdr_encode_hyper(p, arg->clientid); - *p++ = cpu_to_be32(24); + *p++ = cpu_to_be32(28); p = xdr_encode_opaque_fixed(p, "open id:", 8); *p++ = cpu_to_be32(arg->server->s_dev); - *p++ = cpu_to_be32(arg->id.uniquifier); + p = xdr_encode_hyper(p, arg->id.uniquifier); xdr_encode_hyper(p, arg->id.create_time); } @@ -1468,20 +1468,18 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a } } -static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type) +static inline void encode_delegation_type(struct xdr_stream *xdr, u32 delegation_type) { __be32 *p; p = reserve_space(xdr, 4); switch (delegation_type) { - case 0: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE); - break; - case FMODE_READ: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ); - break; - case FMODE_WRITE|FMODE_READ: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE); + case NFS4_OPEN_DELEGATE_NONE: + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + *p = cpu_to_be32(delegation_type); break; default: BUG(); @@ -1497,7 +1495,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr * encode_string(xdr, name->len, name->name); } -static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type) +static inline void encode_claim_previous(struct xdr_stream *xdr, u32 type) { __be32 *p; @@ -1602,7 +1600,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { uint32_t attrs[3] = { - FATTR4_WORD0_RDATTR_ERROR, + FATTR4_WORD0_TYPE + | FATTR4_WORD0_RDATTR_ERROR, FATTR4_WORD1_MOUNTED_ON_FILEID, }; uint32_t dircount = readdir->count; @@ -1612,12 +1611,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg unsigned int i; if (readdir->plus) { - attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| - FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID; - attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| - FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| - FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| - FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + attrs[0] |= FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEHANDLE + | FATTR4_WORD0_FILEID; + attrs[1] |= FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY; attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; } /* Use mounted_on_fileid only if the server supports it */ @@ -1726,6 +1733,33 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs server->attr_bitmask); } +static void encode_delegattr(struct xdr_stream *xdr, + const nfs4_stateid *stateid, + const struct nfs4_delegattr *attr, + struct compound_hdr *hdr) +{ + uint32_t bitmap[3] = { 0 }; + uint32_t len = 0; + __be32 *p; + + encode_op_hdr(xdr, OP_SETATTR, encode_delegattr_maxsz, hdr); + encode_nfs4_stateid(xdr, stateid); + if (attr->atime_set) { + bitmap[2] |= FATTR4_WORD2_TIME_DELEG_ACCESS; + len += (nfstime4_maxsz << 2); + } + if (attr->mtime_set) { + bitmap[2] |= FATTR4_WORD2_TIME_DELEG_MODIFY; + len += (nfstime4_maxsz << 2); + } + xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap)); + xdr_stream_encode_opaque_inline(xdr, (void **)&p, len); + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) + p = xdr_encode_nfstime4(p, &attr->atime); + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) + p = xdr_encode_nfstime4(p, &attr->mtime); +} + static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) { __be32 *p; @@ -2096,7 +2130,7 @@ static void encode_test_stateid(struct xdr_stream *xdr, { encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr); encode_uint32(xdr, 1); - encode_nfs4_stateid(xdr, args->stateid); + encode_nfs4_stateid(xdr, &args->stateid); } static void encode_free_stateid(struct xdr_stream *xdr, @@ -2803,6 +2837,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_putfh(xdr, args->fhandle, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); + if (args->sattr_args) + encode_delegattr(xdr, args->stateid, args->sattr_args, &hdr); if (args->bitmask) encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); @@ -3403,7 +3439,7 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; } - dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: link support=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3421,7 +3457,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; } - dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: symlink support=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3563,7 +3599,7 @@ static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE; } - dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: case_insensitive=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3581,7 +3617,7 @@ static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING; } - dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: case_preserving=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -4289,8 +4325,29 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT; } - dprintk("%s: XATTR support=%s\n", __func__, - *res == 0 ? "false" : "true"); + dprintk("%s: XATTR support=%s\n", __func__, str_false_true(*res == 0)); + return 0; +} + +static int decode_attr_open_arguments(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs4_open_caps *res) +{ + memset(res, 0, sizeof(*res)); + if (unlikely(bitmap[2] & (FATTR4_WORD2_OPEN_ARGUMENTS - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_OPEN_ARGUMENTS)) { + if (decode_bitmap4(xdr, res->oa_share_access, ARRAY_SIZE(res->oa_share_access)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_share_deny, ARRAY_SIZE(res->oa_share_deny)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_share_access_want, ARRAY_SIZE(res->oa_share_access_want)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_open_claim, ARRAY_SIZE(res->oa_open_claim)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_createmode, ARRAY_SIZE(res->oa_createmode)) < 0) + return -EIO; + bitmap[2] &= ~FATTR4_WORD2_OPEN_ARGUMENTS; + } return 0; } @@ -4343,14 +4400,6 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access) return 0; } -static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) -{ - ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len); - if (unlikely(ret < 0)) - return -EIO; - return 0; -} - static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); @@ -4468,6 +4517,8 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re if ((status = decode_attr_exclcreat_supported(xdr, bitmap, res->exclcreat_bitmask)) != 0) goto xdr_error; + if ((status = decode_attr_open_arguments(xdr, bitmap, &res->open_caps)) != 0) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: dprintk("%s: xdr returned %d!\n", __func__, -status); @@ -5026,7 +5077,7 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) /* * We create the owner, so we know a proper owner.id length is 4. */ -static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) +static int decode_lock_denied(struct xdr_stream *xdr, struct file_lock *fl) { uint64_t offset, length, clientid; __be32 *p; @@ -5043,10 +5094,10 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) fl->fl_end = fl->fl_start + (loff_t)length - 1; if (length == ~(uint64_t)0) fl->fl_end = OFFSET_MAX; - fl->fl_type = F_WRLCK; + fl->c.flc_type = F_WRLCK; if (type & 1) - fl->fl_type = F_RDLCK; - fl->fl_pid = 0; + fl->c.flc_type = F_RDLCK; + fl->c.flc_pid = 0; } p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */ namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */ @@ -5139,13 +5190,12 @@ static int decode_space_limit(struct xdr_stream *xdr, } static int decode_rw_delegation(struct xdr_stream *xdr, - uint32_t delegation_type, - struct nfs_openres *res) + struct nfs4_open_delegation *res) { __be32 *p; int status; - status = decode_delegation_stateid(xdr, &res->delegation); + status = decode_delegation_stateid(xdr, &res->stateid); if (unlikely(status)) return status; p = xdr_inline_decode(xdr, 4); @@ -5153,52 +5203,57 @@ static int decode_rw_delegation(struct xdr_stream *xdr, return -EIO; res->do_recall = be32_to_cpup(p); - switch (delegation_type) { + switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_READ: - res->delegation_type = FMODE_READ; + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + res->type = FMODE_READ; break; case NFS4_OPEN_DELEGATE_WRITE: - res->delegation_type = FMODE_WRITE|FMODE_READ; + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + res->type = FMODE_WRITE|FMODE_READ; if (decode_space_limit(xdr, &res->pagemod_limit) < 0) return -EIO; } return decode_ace(xdr, NULL); } -static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_no_delegation(struct xdr_stream *xdr, + struct nfs4_open_delegation *res) { __be32 *p; - uint32_t why_no_delegation; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) return -EIO; - why_no_delegation = be32_to_cpup(p); - switch (why_no_delegation) { + res->why_no_delegation = be32_to_cpup(p); + switch (res->why_no_delegation) { case WND4_CONTENTION: case WND4_RESOURCE: - xdr_inline_decode(xdr, 4); - /* Ignore for now */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + res->will_notify = be32_to_cpup(p); } return 0; } -static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_delegation(struct xdr_stream *xdr, + struct nfs4_open_delegation *res) { __be32 *p; - uint32_t delegation_type; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) return -EIO; - delegation_type = be32_to_cpup(p); - res->delegation_type = 0; - switch (delegation_type) { + res->open_delegation_type = be32_to_cpup(p); + switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_NONE: return 0; case NFS4_OPEN_DELEGATE_READ: case NFS4_OPEN_DELEGATE_WRITE: - return decode_rw_delegation(xdr, delegation_type, res); + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + return decode_rw_delegation(xdr, res); case NFS4_OPEN_DELEGATE_NONE_EXT: return decode_no_delegation(xdr, res); } @@ -5239,7 +5294,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) for (; i < NFS4_BITMAP_SIZE; i++) res->attrset[i] = 0; - return decode_delegation(xdr, res); + return decode_delegation(xdr, &res->delegation); xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); return -EIO; @@ -5471,6 +5526,11 @@ static int decode_setattr(struct xdr_stream *xdr) return -EIO; } +static int decode_delegattr(struct xdr_stream *xdr) +{ + return decode_setattr(xdr); +} + static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res) { __be32 *p; @@ -7043,6 +7103,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, if (status) goto out; } + if (res->sattr_res) { + status = decode_delegattr(xdr); + res->sattr_ret = status; + if (status) + goto out; + } if (res->fattr) { status = decode_getfattr(xdr, res->fattr, res->server); if (status != 0) @@ -7538,72 +7604,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return 0; } -/* - * We need to translate between nfs status return values and - * the local errno values which may not be the same. - */ -static struct { - int stat; - int errno; -} nfs_errtbl[] = { - { NFS4_OK, 0 }, - { NFS4ERR_PERM, -EPERM }, - { NFS4ERR_NOENT, -ENOENT }, - { NFS4ERR_IO, -errno_NFSERR_IO}, - { NFS4ERR_NXIO, -ENXIO }, - { NFS4ERR_ACCESS, -EACCES }, - { NFS4ERR_EXIST, -EEXIST }, - { NFS4ERR_XDEV, -EXDEV }, - { NFS4ERR_NOTDIR, -ENOTDIR }, - { NFS4ERR_ISDIR, -EISDIR }, - { NFS4ERR_INVAL, -EINVAL }, - { NFS4ERR_FBIG, -EFBIG }, - { NFS4ERR_NOSPC, -ENOSPC }, - { NFS4ERR_ROFS, -EROFS }, - { NFS4ERR_MLINK, -EMLINK }, - { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG }, - { NFS4ERR_NOTEMPTY, -ENOTEMPTY }, - { NFS4ERR_DQUOT, -EDQUOT }, - { NFS4ERR_STALE, -ESTALE }, - { NFS4ERR_BADHANDLE, -EBADHANDLE }, - { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, - { NFS4ERR_NOTSUPP, -ENOTSUPP }, - { NFS4ERR_TOOSMALL, -ETOOSMALL }, - { NFS4ERR_SERVERFAULT, -EREMOTEIO }, - { NFS4ERR_BADTYPE, -EBADTYPE }, - { NFS4ERR_LOCKED, -EAGAIN }, - { NFS4ERR_SYMLINK, -ELOOP }, - { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, - { NFS4ERR_DEADLOCK, -EDEADLK }, - { NFS4ERR_NOXATTR, -ENODATA }, - { NFS4ERR_XATTR2BIG, -E2BIG }, - { -1, -EIO } -}; - -/* - * Convert an NFS error code to a local one. - * This one is used jointly by NFSv2 and NFSv3. - */ -static int -nfs4_stat_to_errno(int stat) -{ - int i; - for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == stat) - return nfs_errtbl[i].errno; - } - if (stat <= 10000 || stat > 10100) { - /* The server is looney tunes. */ - return -EREMOTEIO; - } - /* If we cannot translate the error, the recovery routines should - * handle it. - * Note: remaining NFSv4 error codes have values > 10000, so should - * not conflict with native Linux error codes. - */ - return -stat; -} - #ifdef CONFIG_NFS_V4_2 #include "nfs42xdr.c" #endif /* CONFIG_NFS_V4_2 */ diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 4e90ca531176..1eab98c277fa 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -400,6 +400,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) + __field(u64, fileid) __string(name, dentry->d_name.name) ), @@ -407,16 +408,18 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __assign_str(name, dentry->d_name.name); + __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); + __assign_str(name); ), TP_printk( - "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, - __get_str(name) + __get_str(name), + __entry->fileid ) ); @@ -444,6 +447,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) + __field(u64, fileid) __string(name, dentry->d_name.name) ), @@ -452,17 +456,19 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __entry->flags = flags; - __assign_str(name, dentry->d_name.name); + __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); + __assign_str(name); ), TP_printk( - "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, - __get_str(name) + __get_str(name), + __entry->fileid ) ); @@ -506,7 +512,7 @@ TRACE_EVENT(nfs_atomic_open_enter, __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fmode = (__force unsigned long)ctx->mode; - __assign_str(name, ctx->dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -545,7 +551,7 @@ TRACE_EVENT(nfs_atomic_open_exit, __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fmode = (__force unsigned long)ctx->mode; - __assign_str(name, ctx->dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -581,7 +587,7 @@ TRACE_EVENT(nfs_create_enter, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -617,7 +623,7 @@ TRACE_EVENT(nfs_create_exit, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -648,7 +654,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -687,7 +693,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -741,7 +747,7 @@ TRACE_EVENT(nfs_link_enter, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -777,7 +783,7 @@ TRACE_EVENT(nfs_link_exit, __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -813,8 +819,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event, __entry->dev = old_dir->i_sb->s_dev; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); - __assign_str(old_name, old_dentry->d_name.name); - __assign_str(new_name, new_dentry->d_name.name); + __assign_str(old_name); + __assign_str(new_name); ), TP_printk( @@ -862,8 +868,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, __entry->error = -error; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); - __assign_str(old_name, old_dentry->d_name.name); - __assign_str(new_name, new_dentry->d_name.name); + __assign_str(old_name); + __assign_str(new_name); ), TP_printk( @@ -893,7 +899,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, DEFINE_NFS_RENAME_EVENT(nfs_rename_enter); DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit); -DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename); +DEFINE_NFS_RENAME_EVENT_DONE(nfs_async_rename_done); TRACE_EVENT(nfs_sillyrename_unlink, TP_PROTO( @@ -933,10 +939,11 @@ TRACE_EVENT(nfs_sillyrename_unlink, DECLARE_EVENT_CLASS(nfs_folio_event, TP_PROTO( const struct inode *inode, - struct folio *folio + loff_t offset, + size_t count ), - TP_ARGS(inode, folio), + TP_ARGS(inode, offset, count), TP_STRUCT__entry( __field(dev_t, dev) @@ -944,7 +951,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event, __field(u64, fileid) __field(u64, version) __field(loff_t, offset) - __field(u32, count) + __field(size_t, count) ), TP_fast_assign( @@ -954,13 +961,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = folio_file_pos(folio); - __entry->count = nfs_folio_length(folio); + __entry->offset = offset, + __entry->count = count; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " - "offset=%lld count=%u", + "offset=%lld count=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, @@ -972,18 +979,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event, DEFINE_EVENT(nfs_folio_event, name, \ TP_PROTO( \ const struct inode *inode, \ - struct folio *folio \ + loff_t offset, \ + size_t count \ ), \ - TP_ARGS(inode, folio)) + TP_ARGS(inode, offset, count)) DECLARE_EVENT_CLASS(nfs_folio_event_done, TP_PROTO( const struct inode *inode, - struct folio *folio, + loff_t offset, + size_t count, int ret ), - TP_ARGS(inode, folio, ret), + TP_ARGS(inode, offset, count, ret), TP_STRUCT__entry( __field(dev_t, dev) @@ -992,7 +1001,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, __field(u64, fileid) __field(u64, version) __field(loff_t, offset) - __field(u32, count) + __field(size_t, count) ), TP_fast_assign( @@ -1002,14 +1011,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = folio_file_pos(folio); - __entry->count = nfs_folio_length(folio); + __entry->offset = offset, + __entry->count = count, __entry->ret = ret; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " - "offset=%lld count=%u ret=%d", + "offset=%lld count=%zu ret=%d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, @@ -1021,10 +1030,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, DEFINE_EVENT(nfs_folio_event_done, name, \ TP_PROTO( \ const struct inode *inode, \ - struct folio *folio, \ + loff_t offset, \ + size_t count, \ int ret \ ), \ - TP_ARGS(inode, folio, ret)) + TP_ARGS(inode, offset, count, ret)) DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done); @@ -1539,7 +1549,6 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class, __field(u32, fhandle) __field(loff_t, offset) __field(ssize_t, count) - __field(ssize_t, bytes_left) __field(ssize_t, error) __field(int, flags) ), @@ -1554,19 +1563,18 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class, __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = dreq->io_start; __entry->count = dreq->count; - __entry->bytes_left = dreq->bytes_left; __entry->error = dreq->error; __entry->flags = dreq->flags; ), TP_printk( "error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zd bytes_left=%zd flags=%s", + "offset=%lld count=%zd flags=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, - __entry->count, __entry->bytes_left, + __entry->count, nfs_show_direct_req_flags(__entry->flags) ) ); @@ -1632,8 +1640,8 @@ TRACE_EVENT(nfs_mount_assign, ), TP_fast_assign( - __assign_str(option, option); - __assign_str(value, value); + __assign_str(option); + __assign_str(value); ), TP_printk("option %s=%s", @@ -1653,7 +1661,7 @@ TRACE_EVENT(nfs_mount_option, ), TP_fast_assign( - __assign_str(option, param->key); + __assign_str(option); ), TP_printk("option %s", __get_str(option)) @@ -1671,12 +1679,73 @@ TRACE_EVENT(nfs_mount_path, ), TP_fast_assign( - __assign_str(path, path); + __assign_str(path); ), TP_printk("path='%s'", __get_str(path)) ); +TRACE_EVENT(nfs_local_open_fh, + TP_PROTO( + const struct nfs_fh *fh, + fmode_t fmode, + int error + ), + + TP_ARGS(fh, fmode, error), + + TP_STRUCT__entry( + __field(int, error) + __field(u32, fhandle) + __field(unsigned int, fmode) + ), + + TP_fast_assign( + __entry->error = error; + __entry->fhandle = nfs_fhandle_hash(fh); + __entry->fmode = (__force unsigned int)fmode; + ), + + TP_printk( + "error=%d fhandle=0x%08x mode=%s", + __entry->error, + __entry->fhandle, + show_fs_fmode_flags(__entry->fmode) + ) +); + +DECLARE_EVENT_CLASS(nfs_local_client_event, + TP_PROTO( + const struct nfs_client *clp + ), + + TP_ARGS(clp), + + TP_STRUCT__entry( + __field(unsigned int, protocol) + __string(server, clp->cl_hostname) + ), + + TP_fast_assign( + __entry->protocol = clp->rpc_ops->version; + __assign_str(server); + ), + + TP_printk( + "server=%s NFSv%u", __get_str(server), __entry->protocol + ) +); + +#define DEFINE_NFS_LOCAL_CLIENT_EVENT(name) \ + DEFINE_EVENT(nfs_local_client_event, name, \ + TP_PROTO( \ + const struct nfs_client *clp \ + ), \ + TP_ARGS(clp)) + +DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_local_enable); +DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_local_disable); + DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, @@ -1706,9 +1775,8 @@ DECLARE_EVENT_CLASS(nfs_xdr_event, __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->version = task->tk_client->cl_vers; __entry->error = error; - __assign_str(program, - task->tk_client->cl_program->name); - __assign_str(procedure, task->tk_msg.rpc_proc->p_name); + __assign_str(program); + __assign_str(procedure); ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 040b6b79c75e..82c3e2ca59a2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -188,102 +188,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); /* - * nfs_page_lock_head_request - page lock the head of the page group - * @req: any member of the page group - */ -struct nfs_page * -nfs_page_group_lock_head(struct nfs_page *req) -{ - struct nfs_page *head = req->wb_head; - - while (!nfs_lock_request(head)) { - int ret = nfs_wait_on_request(head); - if (ret < 0) - return ERR_PTR(ret); - } - if (head != req) - kref_get(&head->wb_kref); - return head; -} - -/* - * nfs_unroll_locks - unlock all newly locked reqs and wait on @req - * @head: head request of page group, must be holding head lock - * @req: request that couldn't lock and needs to wait on the req bit lock - * - * This is a helper function for nfs_lock_and_join_requests - * returns 0 on success, < 0 on error. - */ -static void -nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) -{ - struct nfs_page *tmp; - - /* relinquish all the locks successfully grabbed this run */ - for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { - if (!kref_read(&tmp->wb_kref)) - continue; - nfs_unlock_and_release_request(tmp); - } -} - -/* - * nfs_page_group_lock_subreq - try to lock a subrequest - * @head: head request of page group - * @subreq: request to lock - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request and page group both locked. - * On error, it returns with the page group unlocked. - */ -static int -nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) -{ - int ret; - - if (!kref_get_unless_zero(&subreq->wb_kref)) - return 0; - while (!nfs_lock_request(subreq)) { - nfs_page_group_unlock(head); - ret = nfs_wait_on_request(subreq); - if (!ret) - ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unroll_locks(head, subreq); - nfs_release_request(subreq); - return ret; - } - } - return 0; -} - -/* - * nfs_page_group_lock_subrequests - try to lock the subrequests - * @head: head request of page group - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request locked. - */ -int nfs_page_group_lock_subrequests(struct nfs_page *head) -{ - struct nfs_page *subreq; - int ret; - - ret = nfs_page_group_lock(head); - if (ret < 0) - return ret; - /* lock each request in the page group */ - for (subreq = head->wb_this_page; subreq != head; - subreq = subreq->wb_this_page) { - ret = nfs_page_group_lock_subreq(head, subreq); - if (ret < 0) - return ret; - } - nfs_page_group_unlock(head); - return 0; -} - -/* * nfs_page_set_headlock - set the request PG_HEADLOCK * @req: request that is to be locked * @@ -349,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req) nfs_page_clear_headlock(req); } -/* - * nfs_page_group_sync_on_bit_locked +/** + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set + * @req: request in page group + * @bit: PG_* bit that is used to sync page group * * must be called with page group lock held */ -static bool -nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) { struct nfs_page *head = req->wb_head; struct nfs_page *tmp; @@ -569,7 +474,7 @@ struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, if (IS_ERR(l_ctx)) return ERR_CAST(l_ctx); - ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count); + ret = nfs_page_create(l_ctx, offset, folio->index, offset, count); if (!IS_ERR(ret)) { nfs_page_assign_folio(ret, folio); nfs_page_group_init(ret, NULL); @@ -694,25 +599,6 @@ void nfs_release_request(struct nfs_page *req) } EXPORT_SYMBOL_GPL(nfs_release_request); -/** - * nfs_wait_on_request - Wait for a request to complete. - * @req: request to wait upon. - * - * Interruptible by fatal signals only. - * The user is responsible for holding a count on the request. - */ -int -nfs_wait_on_request(struct nfs_page *req) -{ - if (!test_bit(PG_BUSY, &req->wb_flags)) - return 0; - set_bit(PG_CONTENDED2, &req->wb_flags); - smp_mb__after_atomic(); - return wait_on_bit_io(&req->wb_flags, PG_BUSY, - TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL_GPL(nfs_wait_on_request); - /* * nfs_generic_pg_test - determine if requests can be coalesced * @desc: pointer to descriptor @@ -846,7 +732,8 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct cred *cred, const struct nfs_rpc_ops *rpc_ops, - const struct rpc_call_ops *call_ops, int how, int flags) + const struct rpc_call_ops *call_ops, int how, int flags, + struct nfsd_file *localio) { struct rpc_task *task; struct rpc_message msg = { @@ -876,6 +763,10 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, hdr->args.count, (unsigned long long)hdr->args.offset); + if (localio) + return nfs_local_doio(NFS_SERVER(hdr->inode)->nfs_client, + localio, hdr, call_ops); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -1068,6 +959,12 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) { + struct nfs_client *clp = NFS_SERVER(hdr->inode)->nfs_client; + + struct nfsd_file *localio = + nfs_local_open_fh(clp, hdr->cred, + hdr->args.fh, hdr->args.context->mode); + if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion) task_flags = RPC_TASK_MOVEABLE; ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), @@ -1076,7 +973,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) NFS_PROTO(hdr->inode), desc->pg_rpc_callops, desc->pg_ioflags, - RPC_TASK_CRED_NOREF | task_flags); + RPC_TASK_CRED_NOREF | task_flags, + localio); } return ret; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fe83c681e3fe..89d49dd3978f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, u32 seq); static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list); +static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -476,6 +477,18 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, return !list_empty(&lo->plh_segs); } +static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo, + struct list_head *lseg_list, + enum pnfs_iomode iomode, u32 seq) +{ + struct pnfs_layout_range range = { + .iomode = iomode, + .length = NFS4_MAX_UINT64, + }; + + return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq); +} + static int pnfs_iomode_to_fail_bit(u32 iomode) { @@ -732,6 +745,14 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return remaining; } +static void pnfs_reset_return_info(struct pnfs_layout_hdr *lo) +{ + struct pnfs_layout_segment *lseg; + + list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); +} + static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, struct list_head *free_me, @@ -846,8 +867,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, break; inode = pnfs_grab_inode_layout_hdr(lo); if (inode != NULL) { - if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) - list_del_rcu(&lo->plh_layouts); if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) continue; @@ -868,7 +887,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, static int pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, - bool is_bulk_recall) + enum pnfs_layout_destroy_mode mode) { struct pnfs_layout_hdr *lo; struct inode *inode; @@ -886,8 +905,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, spin_lock(&inode->i_lock); list_del_init(&lo->plh_bulk_destroy); - if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { - if (is_bulk_recall) + if (mode == PNFS_LAYOUT_FILE_BULK_RETURN) { + pnfs_mark_layout_stateid_return(lo, &lseg_list, + IOMODE_ANY, 0); + } else if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { + if (mode == PNFS_LAYOUT_BULK_RETURN) set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); ret = -EAGAIN; } @@ -901,10 +923,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, return ret; } -int -pnfs_destroy_layouts_byfsid(struct nfs_client *clp, - struct nfs_fsid *fsid, - bool is_recall) +int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, + enum pnfs_layout_destroy_mode mode) { struct nfs_server *server; LIST_HEAD(layout_list); @@ -923,33 +943,40 @@ restart: rcu_read_unlock(); spin_unlock(&clp->cl_lock); - if (list_empty(&layout_list)) - return 0; - return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); + return pnfs_layout_free_bulk_destroy_list(&layout_list, mode); } -int -pnfs_destroy_layouts_byclid(struct nfs_client *clp, - bool is_recall) +static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp, + struct list_head *list) { struct nfs_server *server; - LIST_HEAD(layout_list); spin_lock(&clp->cl_lock); rcu_read_lock(); restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - if (pnfs_layout_bulk_destroy_byserver_locked(clp, - server, - &layout_list) != 0) + if (pnfs_layout_bulk_destroy_byserver_locked(clp, server, + list) != 0) goto restart; } rcu_read_unlock(); spin_unlock(&clp->cl_lock); +} - if (list_empty(&layout_list)) - return 0; - return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); +static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp, + struct list_head *list, + enum pnfs_layout_destroy_mode mode) +{ + pnfs_layout_build_destroy_list_byclient(clp, list); + return pnfs_layout_free_bulk_destroy_list(list, mode); +} + +int pnfs_layout_destroy_byclid(struct nfs_client *clp, + enum pnfs_layout_destroy_mode mode) +{ + LIST_HEAD(layout_list); + + return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode); } /* @@ -962,7 +989,68 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) nfs4_deviceid_mark_client_invalid(clp); nfs4_deviceid_purge_client(clp); - pnfs_destroy_layouts_byclid(clp, false); + pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE); +} + +static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp, + struct list_head *list) +{ + struct nfs_server *server; + + spin_lock(&clp->cl_lock); + rcu_read_lock(); +restart: + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN)) + continue; + if (pnfs_layout_bulk_destroy_byserver_locked(clp, server, + list) != 0) + goto restart; + } + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); +} + +static int pnfs_layout_bulk_list_reboot(struct list_head *list) +{ + struct pnfs_layout_hdr *lo; + struct nfs_server *server; + int ret; + + list_for_each_entry(lo, list, plh_bulk_destroy) { + server = NFS_SERVER(lo->plh_inode); + ret = pnfs_layout_return_on_reboot(lo); + switch (ret) { + case 0: + continue; + case -NFS4ERR_BAD_STATEID: + server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN; + break; + case -NFS4ERR_NO_GRACE: + break; + default: + goto err; + } + break; + } + return 0; +err: + return ret; +} + +int pnfs_layout_handle_reboot(struct nfs_client *clp) +{ + LIST_HEAD(list); + int ret = 0, ret2; + + pnfs_layout_build_recover_list_byclient(clp, &list); + if (!list_empty(&list)) + ret = pnfs_layout_bulk_list_reboot(&list); + ret2 = pnfs_layout_do_destroy_byclid(clp, &list, + PNFS_LAYOUT_INVALIDATE); + if (!ret) + ret = ret2; + return (ret == 0) ? 0 : -EAGAIN; } static void @@ -1163,6 +1251,38 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } +static void +pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range) +{ + const struct pnfs_layout_segment *lseg; + u32 seq = be32_to_cpu(arg_stateid->seqid); + + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) { + list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) { + if (pnfs_seqid_is_newer(lseg->pls_seq, seq) || + !pnfs_should_free_range(&lseg->pls_range, range)) + continue; + pnfs_set_plh_return_info(lo, range->iomode, seq); + break; + } + } +} + +void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range) +{ + struct inode *inode = lo->plh_inode; + + spin_lock(&inode->i_lock); + pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range); + pnfs_clear_layoutreturn_waitbit(lo); + spin_unlock(&inode->i_lock); +} + void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, @@ -1180,6 +1300,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); pnfs_free_returned_lsegs(lo, &freeme, range, seq); pnfs_set_layout_stateid(lo, stateid, NULL, true); + pnfs_reset_return_info(lo); } else pnfs_mark_layout_stateid_invalid(lo, &freeme); out_unlock: @@ -1238,7 +1359,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, const struct cred **pcred, enum pnfs_iomode iomode, - bool sync) + unsigned int flags) { struct inode *ino = lo->plh_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; @@ -1265,33 +1386,21 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, if (ld->prepare_layoutreturn) ld->prepare_layoutreturn(&lrp->args); - status = nfs4_proc_layoutreturn(lrp, sync); + status = nfs4_proc_layoutreturn(lrp, flags); out: dprintk("<-- %s status: %d\n", __func__, status); return status; } -static bool -pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo, - enum pnfs_iomode iomode, - u32 seq) -{ - struct pnfs_layout_range recall_range = { - .length = NFS4_MAX_UINT64, - .iomode = iomode, - }; - return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, - &recall_range, seq) != -EBUSY; -} - /* Return true if layoutreturn is needed */ static bool pnfs_layout_need_return(struct pnfs_layout_hdr *lo) { if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) return false; - return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode, - lo->plh_return_seq); + return pnfs_mark_layout_stateid_return(lo, &lo->plh_return_segs, + lo->plh_return_iomode, + lo->plh_return_seq) != EBUSY; } static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) @@ -1311,7 +1420,8 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) spin_unlock(&inode->i_lock); if (send) { /* Send an async layoutreturn so we dont deadlock */ - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); } } else spin_unlock(&inode->i_lock); @@ -1378,7 +1488,8 @@ _pnfs_return_layout(struct inode *ino) send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL); spin_unlock(&ino->i_lock); if (send) - status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true); + status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, + 0); out_wait_layoutreturn: wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); out_put_layout_hdr: @@ -1416,6 +1527,24 @@ pnfs_commit_and_return_layout(struct inode *inode) return ret; } +static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo) +{ + struct inode *inode = lo->plh_inode; + const struct cred *cred; + + spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo)) { + spin_unlock(&inode->i_lock); + return 0; + } + cred = get_cred(lo->plh_lc_cred); + pnfs_get_layout_hdr(lo); + spin_unlock(&inode->i_lock); + + return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY, + PNFS_FL_LAYOUTRETURN_PRIVILEGED); +} + bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, @@ -1519,7 +1648,7 @@ out_noroc: return true; } if (layoutreturn) - pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true); + pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, 0); pnfs_put_layout_hdr(lo); return false; } @@ -1569,8 +1698,7 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, } void pnfs_roc_release(struct nfs4_layoutreturn_args *args, - struct nfs4_layoutreturn_res *res, - int ret) + struct nfs4_layoutreturn_res *res, int ret) { struct pnfs_layout_hdr *lo = args->layout; struct inode *inode = args->inode; @@ -1578,11 +1706,13 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct nfs4_xdr_opaque_data *ld_private = args->ld_private; switch (ret) { + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_NOMATCHING_LAYOUT: spin_lock(&inode->i_lock); - if (pnfs_layout_is_valid(lo) && - nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) - pnfs_set_plh_return_info(lo, args->range.iomode, 0); + pnfs_layoutreturn_retry_later_locked(lo, &args->stateid, + &args->range); pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&inode->i_lock); break; @@ -1921,8 +2051,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) { if (atomic_dec_and_test(&lo->plh_outstanding) && - test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) { + smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); + } } static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) @@ -1979,7 +2111,9 @@ pnfs_update_layout(struct inode *ino, struct pnfs_layout_segment *lseg = NULL; struct nfs4_layoutget *lgp; nfs4_stateid stateid; - long timeout = 0; + struct nfs4_exception exception = { + .inode = ino, + }; unsigned long giveup = jiffies + (clp->cl_lease_time << 1); bool first; @@ -2151,7 +2285,7 @@ lookup_again: lgp->lo = lo; pnfs_get_layout_hdr(lo); - lseg = nfs4_proc_layoutget(lgp, &timeout); + lseg = nfs4_proc_layoutget(lgp, &exception); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); nfs_layoutget_end(lo); @@ -2178,6 +2312,8 @@ lookup_again: goto out_put_layout_hdr; } if (lseg) { + if (!exception.retry) + goto out_put_layout_hdr; if (first) pnfs_clear_first_layoutget(lo); trace_pnfs_update_layout(ino, pos, count, @@ -2561,7 +2697,8 @@ pnfs_mark_layout_for_return(struct inode *inode, return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode); spin_unlock(&inode->i_lock); if (return_now) - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); } else { spin_unlock(&inode->i_lock); nfs_commit_inode(inode, 0); @@ -2677,7 +2814,8 @@ restart: } spin_unlock(&inode->i_lock); rcu_read_unlock(); - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); pnfs_put_layout_hdr(lo); cond_resched(); goto restart; @@ -2700,38 +2838,28 @@ pnfs_layout_return_unused_byclid(struct nfs_client *clp, &range); } +/* Check if we have we have a valid layout but if there isn't an intersection + * between the request and the pgio->pg_lseg, put this pgio->pg_lseg away. + */ void -pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) +pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) { if (pgio->pg_lseg == NULL || - test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags)) + (test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags) && + pnfs_lseg_request_intersecting(pgio->pg_lseg, req))) return; pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout); -/* - * Check for any intersection between the request and the pgio->pg_lseg, - * and if none, put this pgio->pg_lseg away. - */ -void -pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) -{ - if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; - } -} -EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range); - void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { u64 rd_size; - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (pgio->pg_lseg == NULL) { if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); @@ -2761,8 +2889,7 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), @@ -3205,6 +3332,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) struct nfs_inode *nfsi = NFS_I(inode); loff_t end_pos; int status; + bool mark_as_dirty = false; if (!pnfs_layoutcommit_outstanding(inode)) return 0; @@ -3256,19 +3384,23 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) if (ld->prepare_layoutcommit) { status = ld->prepare_layoutcommit(&data->args); if (status) { - put_cred(data->cred); + if (status != -ENOSPC) + put_cred(data->cred); spin_lock(&inode->i_lock); set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - goto out_unlock; + if (status != -ENOSPC) + goto out_unlock; + spin_unlock(&inode->i_lock); + mark_as_dirty = true; } } status = nfs4_proc_layoutcommit(data, sync); out: - if (status) + if (status || mark_as_dirty) mark_inode_dirty_sync(inode); dprintk("<-- %s status %d\n", __func__, status); return status; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d886c8226d8f..91ff877185c8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -35,6 +35,7 @@ #include <linux/nfs_page.h> #include <linux/workqueue.h> +struct nfs4_exception; struct nfs4_opendata; enum { @@ -59,6 +60,7 @@ struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ char *ds_remotestr; /* comma sep list of addrs */ struct list_head ds_addrs; + const struct net *ds_net; struct nfs_client *ds_clp; refcount_t ds_count; unsigned long ds_state; @@ -117,6 +119,12 @@ enum layoutdriver_policy_flags { PNFS_LAYOUTGET_ON_OPEN = 1 << 3, }; +enum pnfs_layout_destroy_mode { + PNFS_LAYOUT_INVALIDATE = 0, + PNFS_LAYOUT_BULK_RETURN, + PNFS_LAYOUT_FILE_BULK_RETURN, +}; + struct nfs4_deviceid_node; /* Per-layout driver specific registration structure */ @@ -126,7 +134,6 @@ struct pnfs_layoutdriver_type { const char *name; struct module *owner; unsigned flags; - unsigned max_deviceinfo_size; unsigned max_layoutget_response; int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); @@ -192,8 +199,6 @@ struct pnfs_commit_ops { int max); void (*recover_commit_reqs) (struct list_head *list, struct nfs_commit_info *cinfo); - struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, - struct folio *folio); }; struct pnfs_layout_hdr { @@ -241,12 +246,18 @@ extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); /* nfs4proc.c */ +#define PNFS_FL_LAYOUTRETURN_ASYNC (1U << 0) +#define PNFS_FL_LAYOUTRETURN_PRIVILEGED (1U << 1) + extern size_t max_response_pages(struct nfs_server *server); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, const struct cred *cred); -extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout); -extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); +extern struct pnfs_layout_segment * +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, + struct nfs4_exception *exception); +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, + unsigned int flags); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); @@ -254,8 +265,7 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); void unset_pnfs_layoutdriver(struct nfs_server *); -void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio); -void pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req); +void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, @@ -271,11 +281,10 @@ void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_layout_final(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); -int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, - struct nfs_fsid *fsid, - bool is_recall); -int pnfs_destroy_layouts_byclid(struct nfs_client *clp, - bool is_recall); +int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, + enum pnfs_layout_destroy_mode mode); +int pnfs_layout_destroy_byclid(struct nfs_client *clp, + enum pnfs_layout_destroy_mode mode); bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst, struct pnfs_layout_range *dst_range, struct inode *inode); @@ -321,6 +330,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, enum pnfs_iomode iomode, bool strict_iomode, gfp_t gfp_flags); +void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range); void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, @@ -342,6 +354,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); void pnfs_layout_return_unused_byclid(struct nfs_client *clp, enum pnfs_iomode iomode); +int pnfs_layout_handle_reboot(struct nfs_client *clp); /* nfs4_deviceid_flags */ enum { @@ -394,8 +407,6 @@ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data); void pnfs_generic_rw_release(void *data); void pnfs_generic_recover_commit_reqs(struct list_head *dst, struct nfs_commit_info *cinfo); -struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct folio *folio); int pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, int how, @@ -405,7 +416,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode, int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); -struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, +struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net, + struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_v3_ds_connect_unload(void); int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, @@ -555,17 +567,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) fl_cinfo->ops->recover_commit_reqs(head, cinfo); } -static inline struct nfs_page * -pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct folio *folio) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - - if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs) - return NULL; - return fl_cinfo->ops->search_commit_reqs(cinfo, folio); -} - /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -723,6 +724,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) { } +static inline int pnfs_layout_handle_reboot(struct nfs_client *clp) +{ + return 0; +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { @@ -862,13 +868,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) { } -static inline struct nfs_page * -pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct folio *folio) -{ - return NULL; -} - static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 178001c90156..bf0f2d67e96c 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -110,9 +110,6 @@ nfs4_get_device_info(struct nfs_server *server, * GETDEVICEINFO's maxcount */ max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - if (server->pnfs_curr_ld->max_deviceinfo_size && - server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz) - max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size; max_pages = nfs_page_array_len(0, max_resp_sz); dprintk("%s: server %p max_resp_sz %u max_pages %d\n", __func__, server, max_resp_sz, max_pages); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 88e061bd711b..2ee20a0f0b36 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -351,53 +351,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); -static struct nfs_page * -pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets, - unsigned int nbuckets, struct folio *folio) -{ - struct nfs_page *req; - struct pnfs_commit_bucket *b; - unsigned int i; - - /* Linearly search the commit lists for each bucket until a matching - * request is found */ - for (i = 0, b = buckets; i < nbuckets; i++, b++) { - list_for_each_entry(req, &b->written, wb_list) { - if (nfs_page_to_folio(req) == folio) - return req->wb_head; - } - list_for_each_entry(req, &b->committing, wb_list) { - if (nfs_page_to_folio(req) == folio) - return req->wb_head; - } - } - return NULL; -} - -/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request - * for @folio - * @cinfo - commit info for current inode - * @folio - page to search for matching head request - * - * Return: the head request if one is found, otherwise %NULL. - */ -struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct folio *folio) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - struct pnfs_commit_array *array; - struct nfs_page *req; - - list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) { - req = pnfs_bucket_search_commit_reqs(array->buckets, - array->nbuckets, folio); - if (req) - return req; - } - return NULL; -} -EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs); - static struct pnfs_layout_segment * pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, @@ -537,7 +490,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(data->inode), data->mds_ops, how, - RPC_TASK_CRED_NOREF); + RPC_TASK_CRED_NOREF, NULL); } else { nfs_init_commit(data, NULL, data->lseg, cinfo); initiate_commit(data, how); @@ -651,12 +604,12 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1, * Lookup DS by addresses. nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * -_data_server_lookup_locked(const struct list_head *dsaddrs) +_data_server_lookup_locked(const struct net *net, const struct list_head *dsaddrs) { struct nfs4_pnfs_ds *ds; list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) + if (ds->ds_net == net && _same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) return ds; return NULL; } @@ -763,7 +716,7 @@ out_err: * uncached and return cached struct nfs4_pnfs_ds. */ struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) +nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags) { struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; char *remotestr; @@ -781,13 +734,14 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(dsaddrs); + tmp_ds = _data_server_lookup_locked(net, dsaddrs); if (tmp_ds == NULL) { INIT_LIST_HEAD(&ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; refcount_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); + ds->ds_net = net; ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); dprintk("%s add new data server %s\n", __func__, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e3570c656b0f..6c09cd090c34 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -396,9 +396,10 @@ nfs_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) } static int -nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, +nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio, unsigned int len, struct iattr *sattr) { + struct page *page = &folio->page; struct nfs_fh *fh; struct nfs_fattr *fattr; struct nfs_symlinkargs arg = { @@ -686,14 +687,22 @@ out_einval: return -EINVAL; } -static int nfs_have_delegation(struct inode *inode, fmode_t flags) +static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags) { return 0; } +static int nfs_return_delegation(struct inode *inode) +{ + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); + return 0; +} + static const struct inode_operations nfs_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open_v23, .link = nfs_link, .unlink = nfs_unlink, .symlink = nfs_symlink, @@ -755,6 +764,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, + .return_delegation = nfs_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a142287d86f6..3c1fa320b3f1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -28,6 +28,7 @@ #include "fscache.h" #include "pnfs.h" #include "nfstrace.h" +#include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -47,8 +48,7 @@ static struct nfs_pgio_header *nfs_readhdr_alloc(void) static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) { - if (rhdr->res.scratch != NULL) - kfree(rhdr->res.scratch); + kfree(rhdr->res.scratch); kmem_cache_free(nfs_rdata_cachep, rhdr); } @@ -56,7 +56,8 @@ static int nfs_return_empty_folio(struct folio *folio) { folio_zero_segment(folio, 0, folio_size(folio)); folio_mark_uptodate(folio); - folio_unlock(folio); + if (nfs_netfs_folio_unlock(folio)) + folio_unlock(folio); return 0; } @@ -122,8 +123,6 @@ static void nfs_readpage_release(struct nfs_page *req, int error) { struct folio *folio = nfs_page_to_folio(req); - if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT) - folio_set_error(folio); if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) if (nfs_netfs_folio_unlock(folio)) folio_unlock(folio); @@ -288,7 +287,7 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, struct nfs_open_context *ctx, struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_server *server = NFS_SERVER(inode); size_t fsize = folio_size(folio); unsigned int rsize = server->rsize; @@ -324,21 +323,57 @@ out: } /* - * Read a page over NFS. - * We read the page synchronously in the following case: - * - The error flag is set for this page. This happens only when a - * previous async read operation failed. + * Actually read a folio over the wire. */ -int nfs_read_folio(struct file *file, struct folio *folio) +static int nfs_do_read_folio(struct file *file, struct folio *folio) { struct inode *inode = file_inode(file); struct nfs_pageio_descriptor pgio; struct nfs_open_context *ctx; int ret; - trace_nfs_aop_readpage(inode, folio); + ctx = get_nfs_open_context(nfs_file_open_context(file)); + + xchg(&ctx->error, 0); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); + + ret = nfs_read_add_folio(&pgio, ctx, folio); + if (ret) + goto out_put; + + nfs_pageio_complete_read(&pgio); + nfs_update_delegated_atime(inode); + if (pgio.pg_error < 0) { + ret = pgio.pg_error; + goto out_put; + } + + ret = folio_wait_locked_killable(folio); + if (!folio_test_uptodate(folio) && !ret) + ret = xchg(&ctx->error, 0); + +out_put: + put_nfs_open_context(ctx); + return ret; +} + +/* + * Synchronously read a folio. + * + * This is not heavily used as most users to try an asynchronous + * large read through ->readahead first. + */ +int nfs_read_folio(struct file *file, struct folio *folio) +{ + struct inode *inode = file_inode(file); + loff_t pos = folio_pos(folio); + size_t len = folio_size(folio); + int ret; + + trace_nfs_aop_readpage(inode, pos, len); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - task_io_account_read(folio_size(folio)); + task_io_account_read(len); /* * Try to flush any pending writes to the file.. @@ -358,30 +393,10 @@ int nfs_read_folio(struct file *file, struct folio *folio) goto out_unlock; ret = nfs_netfs_read_folio(file, folio); - if (!ret) - goto out; - - ctx = get_nfs_open_context(nfs_file_open_context(file)); - - xchg(&ctx->error, 0); - nfs_pageio_init_read(&pgio, inode, false, - &nfs_async_read_completion_ops); - - ret = nfs_read_add_folio(&pgio, ctx, folio); if (ret) - goto out_put; - - nfs_pageio_complete_read(&pgio); - ret = pgio.pg_error < 0 ? pgio.pg_error : 0; - if (!ret) { - ret = folio_wait_locked_killable(folio); - if (!folio_test_uptodate(folio) && !ret) - ret = xchg(&ctx->error, 0); - } -out_put: - put_nfs_open_context(ctx); + ret = nfs_do_read_folio(file, folio); out: - trace_nfs_aop_readpage_done(inode, folio, ret); + trace_nfs_aop_readpage_done(inode, pos, len, ret); return ret; out_unlock: folio_unlock(folio); @@ -428,6 +443,7 @@ void nfs_readahead(struct readahead_control *ractl) } nfs_pageio_complete_read(&pgio); + nfs_update_delegated_atime(inode); put_nfs_open_context(ctx); out: diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e1bcad5906ae..da5286514d8c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -130,11 +130,7 @@ static void nfs_ssc_unregister_ops(void) } #endif /* CONFIG_NFS_V4_2 */ -static struct shrinker acl_shrinker = { - .count_objects = nfs_access_cache_count, - .scan_objects = nfs_access_cache_scan, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *acl_shrinker; /* * Register the NFS filesystems @@ -154,9 +150,18 @@ int __init register_nfs_fs(void) ret = nfs_register_sysctl(); if (ret < 0) goto error_2; - ret = register_shrinker(&acl_shrinker, "nfs-acl"); - if (ret < 0) + + acl_shrinker = shrinker_alloc(0, "nfs-acl"); + if (!acl_shrinker) { + ret = -ENOMEM; goto error_3; + } + + acl_shrinker->count_objects = nfs_access_cache_count; + acl_shrinker->scan_objects = nfs_access_cache_scan; + + shrinker_register(acl_shrinker); + #ifdef CONFIG_NFS_V4_2 nfs_ssc_register_ops(); #endif @@ -176,7 +181,7 @@ error_0: */ void __exit unregister_nfs_fs(void) { - unregister_shrinker(&acl_shrinker); + shrinker_free(acl_shrinker); nfs_unregister_sysctl(); unregister_nfs4_fs(); #ifdef CONFIG_NFS_V4_2 @@ -513,8 +518,16 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else nfs_show_nfsv4_options(m, nfss, showdefaults); - if (nfss->options & NFS_OPTION_FSCACHE) + if (nfss->options & NFS_OPTION_FSCACHE) { +#ifdef CONFIG_NFS_FSCACHE + if (nfss->fscache_uniq) + seq_printf(m, ",fsc=%s", nfss->fscache_uniq); + else + seq_puts(m, ",fsc"); +#else seq_puts(m, ",fsc"); +#endif + } if (nfss->options & NFS_OPTION_MIGRATION) seq_puts(m, ",migration"); @@ -538,6 +551,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else seq_puts(m, ",local_lock=posix"); + if (nfss->flags & NFS_MOUNT_NO_ALIGNWRITE) + seq_puts(m, ",noalignwrite"); + if (nfss->flags & NFS_MOUNT_WRITE_EAGER) { if (nfss->flags & NFS_MOUNT_WRITE_WAIT) seq_puts(m, ",write=wait"); @@ -898,6 +914,16 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; unsigned int authlist_len = ARRAY_SIZE(authlist); + /* make sure 'nolock'/'lock' override the 'local_lock' mount option */ + if (ctx->lock_status) { + if (ctx->lock_status == NFS_LOCK_NOLOCK) { + ctx->flags |= NFS_MOUNT_NONLM; + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } else { + ctx->flags &= ~NFS_MOUNT_NONLM; + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } + } status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len); if (status) return ERR_PTR(status); @@ -1021,6 +1047,16 @@ int nfs_reconfigure(struct fs_context *fc) sync_filesystem(sb); /* + * The SB_RDONLY flag has been removed from the superblock during + * mounts to prevent interference between different filesystems. + * Similarly, it is also necessary to ignore the SB_RDONLY flag + * during reconfiguration; otherwise, it may also result in the + * creation of redundant superblocks when mounting a directory with + * different rw and ro flags multiple times. + */ + fc->sb_flags_mask &= ~SB_RDONLY; + + /* * Userspace mount programs that send binary options generally send * them populated with default values. We have no way to know which * ones were explicitly specified. Fall back to legacy behavior and @@ -1081,7 +1117,7 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) sb->s_export_op = &nfs_export_ops; break; case 4: - sb->s_flags |= SB_POSIXACL; + sb->s_iflags |= SB_I_NOUMASK; sb->s_time_gran = 1; sb->s_time_min = S64_MIN; sb->s_time_max = S64_MAX; @@ -1277,8 +1313,17 @@ int nfs_get_tree_common(struct fs_context *fc) if (IS_ERR(server)) return PTR_ERR(server); + /* + * When NFS_MOUNT_UNSHARED is not set, NFS forces the sharing of a + * superblock among each filesystem that mounts sub-directories + * belonging to a single exported root path. + * To prevent interference between different filesystems, the + * SB_RDONLY flag should be removed from the superblock. + */ if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; + else + fc->sb_flags &= ~SB_RDONLY; /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) @@ -1376,6 +1421,7 @@ unsigned short max_session_cb_slots = NFS4_DEF_CB_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; bool recover_lost_locks = false; +short nfs_delay_retrans = -1; EXPORT_SYMBOL_GPL(nfs_callback_nr_threads); EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); @@ -1386,6 +1432,7 @@ EXPORT_SYMBOL_GPL(max_session_cb_slots); EXPORT_SYMBOL_GPL(send_implementation_id); EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); EXPORT_SYMBOL_GPL(recover_lost_locks); +EXPORT_SYMBOL_GPL(nfs_delay_retrans); #define NFS_CALLBACK_MAXPORTNR (65535U) @@ -1434,5 +1481,9 @@ MODULE_PARM_DESC(recover_lost_locks, "If the server reports that a lock might be lost, " "try to recover it risking data corruption."); - +module_param_named(delay_retrans, nfs_delay_retrans, short, 0644); +MODULE_PARM_DESC(delay_retrans, + "Unless negative, specifies the number of times the NFSv4 " + "client retries a request before returning an EAGAIN error, " + "after a reply of NFS4ERR_DELAY from the server."); #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 13818129d268..1c62a5a9f51d 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -32,15 +32,7 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio) int error; error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE); - if (error < 0) - goto error; - folio_mark_uptodate(folio); - folio_unlock(folio); - return 0; - -error: - folio_set_error(folio); - folio_unlock(folio); + folio_end_read(folio, error == 0); return error; } diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index f39e2089bc4c..e645be1a3381 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -29,7 +29,6 @@ static struct ctl_table nfs_cb_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; int nfs_register_sysctl(void) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index bf378ecd5d9f..784f7c1d003b 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -14,6 +14,7 @@ #include <linux/rcupdate.h> #include <linux/lockd/lockd.h> +#include "internal.h" #include "nfs4_fs.h" #include "netns.h" #include "sysfs.h" @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt *clnt) rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); } +/* + * Shut down the nfs_client only once all the superblocks + * have been shut down. + */ +static void shutdown_nfs_client(struct nfs_client *clp) +{ + struct nfs_server *server; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!(server->flags & NFS_MOUNT_SHUTDOWN)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + nfs_mark_client_ready(clp, -EIO); + shutdown_client(clp->cl_rpcclient); +} + static ssize_t shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, server->flags |= NFS_MOUNT_SHUTDOWN; shutdown_client(server->client); - shutdown_client(server->nfs_client->cl_rpcclient); if (!IS_ERR(server->client_acl)) shutdown_client(server->client_acl); @@ -267,6 +286,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, if (server->nlm_host) shutdown_client(server->nlm_host->h_rpcclnt); out: + shutdown_nfs_client(server->nfs_client); return count; } @@ -280,9 +300,9 @@ void nfs_sysfs_link_rpc_client(struct nfs_server *server, char name[RPC_CLIENT_NAME_SIZE]; int ret; - strcpy(name, clnt->cl_program->name); - strcat(name, uniq ? uniq : ""); - strcat(name, "_client"); + strscpy(name, clnt->cl_program->name, sizeof(name)); + strncat(name, uniq ? uniq : "", sizeof(name) - strlen(name) - 1); + strncat(name, "_client", sizeof(name) - strlen(name) - 1); ret = sysfs_create_link_nowarn(&server->kobj, &clnt->cl_sysfs->kobject, name); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 150a953a8be9..bf77399696a7 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -232,6 +232,8 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); + NFS_PROTO(inode)->return_delegation(inode); + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data)) nfs_free_unlinkdata(data); } @@ -267,7 +269,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct inode *new_dir = data->new_dir; struct dentry *old_dentry = data->old_dentry; - trace_nfs_sillyrename_rename(old_dir, old_dentry, + trace_nfs_async_rename_done(old_dir, old_dentry, new_dir, data->new_dentry, task->tk_status); if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { rpc_restart_call_prepare(task); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7d03811f44a4..2b6b3542405c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,9 +63,6 @@ static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct folio *folio); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -156,132 +153,51 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) } } -static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; -} - -static struct nfs_page *nfs_folio_private_request(struct folio *folio) -{ - return folio_get_private(folio); } /** - * nfs_folio_find_private_request - find head request associated with a folio + * nfs_folio_find_head_request - find head request associated with a folio * @folio: pointer to folio * * must be called while holding the inode lock. * * returns matching head request with reference held, or NULL if not found. */ -static struct nfs_page *nfs_folio_find_private_request(struct folio *folio) +static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct nfs_page *req; if (!folio_test_private(folio)) return NULL; - spin_lock(&mapping->private_lock); - req = nfs_folio_private_request(folio); + spin_lock(&mapping->i_private_lock); + req = folio->private; if (req) { WARN_ON_ONCE(req->wb_head != req); kref_get(&req->wb_kref); } - spin_unlock(&mapping->private_lock); - return req; -} - -static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio) -{ - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req = NULL; - if (!folio_test_swapcache(folio)) - return NULL; - mutex_lock(&nfsi->commit_mutex); - if (folio_test_swapcache(folio)) { - req = nfs_page_search_commits_for_head_request_locked(nfsi, - folio); - if (req) { - WARN_ON_ONCE(req->wb_head != req); - kref_get(&req->wb_kref); - } - } - mutex_unlock(&nfsi->commit_mutex); + spin_unlock(&mapping->i_private_lock); return req; } -/** - * nfs_folio_find_head_request - find head request associated with a folio - * @folio: pointer to folio - * - * returns matching head request with reference held, or NULL if not found. - */ -static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) -{ - struct nfs_page *req; - - req = nfs_folio_find_private_request(folio); - if (!req) - req = nfs_folio_find_swap_request(folio); - return req; -} - -static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) -{ - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_page *req, *head; - int ret; - - for (;;) { - req = nfs_folio_find_head_request(folio); - if (!req) - return req; - head = nfs_page_group_lock_head(req); - if (head != req) - nfs_release_request(req); - if (IS_ERR(head)) - return head; - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); - } - /* Ensure that nobody removed the request before we locked it */ - if (head == nfs_folio_private_request(folio)) - break; - if (folio_test_swapcache(folio)) - break; - nfs_unlock_and_release_request(head); - } - return head; -} - /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct folio *folio, unsigned int offset, unsigned int count) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio); - if (i_size > 0 && folio_index(folio) < end_index) + if (i_size > 0 && folio->index < end_index) goto out; - end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count; + end = folio_pos(folio) + (loff_t)offset + (loff_t)count; if (i_size >= end) goto out; trace_nfs_size_grow(inode, end); @@ -289,6 +205,8 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset, NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); out: + /* Atomically update timestamps if they are delegated to us. */ + nfs_update_delegated_mtime_locked(inode); spin_unlock(&inode->i_lock); nfs_fscache_invalidate(inode, 0); } @@ -309,9 +227,8 @@ static void nfs_set_pageerror(struct address_space *mapping) static void nfs_mapping_set_error(struct folio *folio, int error) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; - folio_set_error(folio); filemap_set_wb_err(mapping, error); if (mapping->host) errseq_set(&mapping->host->i_sb->s_wb_err, @@ -410,7 +327,7 @@ int nfs_congestion_kb; static void nfs_folio_set_writeback(struct folio *folio) { - struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); + struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); folio_start_writeback(folio); if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) @@ -419,12 +336,14 @@ static void nfs_folio_set_writeback(struct folio *folio) static void nfs_folio_end_writeback(struct folio *folio) { - struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); + struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); folio_end_writeback(folio); if (atomic_long_dec_return(&nfss->writeback) < - NFS_CONGESTION_OFF_THRESH) + NFS_CONGESTION_OFF_THRESH) { nfss->write_congested = 0; + wake_up_all(&nfss->write_congestion_wait); + } } static void nfs_page_end_writeback(struct nfs_page *req) @@ -548,6 +467,74 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, nfs_destroy_unlinked_subrequests(destroy_list, head, inode); } +/** + * nfs_wait_on_request - Wait for a request to complete. + * @req: request to wait upon. + * + * Interruptible by fatal signals only. + * The user is responsible for holding a count on the request. + */ +static int nfs_wait_on_request(struct nfs_page *req) +{ + if (!test_bit(PG_BUSY, &req->wb_flags)) + return 0; + set_bit(PG_CONTENDED2, &req->wb_flags); + smp_mb__after_atomic(); + return wait_on_bit_io(&req->wb_flags, PG_BUSY, + TASK_UNINTERRUPTIBLE); +} + +/* + * nfs_unroll_locks - unlock all newly locked reqs and wait on @req + * @head: head request of page group, must be holding head lock + * @req: request that couldn't lock and needs to wait on the req bit lock + * + * This is a helper function for nfs_lock_and_join_requests + * returns 0 on success, < 0 on error. + */ +static void +nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) +{ + struct nfs_page *tmp; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { + if (!kref_read(&tmp->wb_kref)) + continue; + nfs_unlock_and_release_request(tmp); + } +} + +/* + * nfs_page_group_lock_subreq - try to lock a subrequest + * @head: head request of page group + * @subreq: request to lock + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +static int +nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) +{ + int ret; + + if (!kref_get_unless_zero(&subreq->wb_kref)) + return 0; + while (!nfs_lock_request(subreq)) { + nfs_page_group_unlock(head); + ret = nfs_wait_on_request(subreq); + if (!ret) + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unroll_locks(head, subreq); + nfs_release_request(subreq); + return ret; + } + } + return 0; +} + /* * nfs_lock_and_join_requests - join all subreqs to the head req * @folio: the folio used to lookup the "page group" of nfs_page structures @@ -565,31 +552,60 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, */ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_page *head; + struct inode *inode = folio->mapping->host; + struct nfs_page *head, *subreq; struct nfs_commit_info cinfo; int ret; - nfs_init_cinfo_from_inode(&cinfo, inode); /* * A reference is taken only on the head request which acts as a * reference to the whole page group - the group will not be destroyed * until the head reference is released. */ - head = nfs_folio_find_and_lock_request(folio); - if (IS_ERR_OR_NULL(head)) - return head; +retry: + head = nfs_folio_find_head_request(folio); + if (!head) + return NULL; - /* lock each request in the page group */ - ret = nfs_page_group_lock_subrequests(head); - if (ret < 0) { + while (!nfs_lock_request(head)) { + ret = nfs_wait_on_request(head); + if (ret < 0) { + nfs_release_request(head); + return ERR_PTR(ret); + } + } + + ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + + /* Ensure that nobody removed the request before we locked it */ + if (head != folio->private) { + nfs_page_group_unlock(head); nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + goto retry; } - nfs_join_page_group(head, &cinfo, inode); + nfs_cancel_remove_inode(head, inode); + + /* lock each request in the page group */ + for (subreq = head->wb_this_page; + subreq != head; + subreq = subreq->wb_this_page) { + ret = nfs_page_group_lock_subreq(head, subreq); + if (ret < 0) + goto out_unlock; + } + + nfs_page_group_unlock(head); + nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_join_page_group(head, &cinfo, inode); return head; + +out_unlock: + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); } static void nfs_write_error(struct nfs_page *req, int error) @@ -641,7 +657,7 @@ static int nfs_page_async_flush(struct folio *folio, nfs_redirty_request(req); pgio->pg_error = 0; } else - nfs_add_stats(folio_file_mapping(folio)->host, + nfs_add_stats(folio->mapping->host, NFSIOS_WRITEPAGES, 1); out: return ret; @@ -653,7 +669,7 @@ out_launder: static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - nfs_pageio_cond_complete(pgio, folio_index(folio)); + nfs_pageio_cond_complete(pgio, folio->index); return nfs_page_async_flush(folio, wbc, pgio); } @@ -664,15 +680,9 @@ static int nfs_writepage_locked(struct folio *folio, struct writeback_control *wbc) { struct nfs_pageio_descriptor pgio; - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; int err; - if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) { - folio_redirty_for_writepage(wbc, folio); - return AOP_WRITEPAGE_ACTIVATE; - } - nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, 0, false, &nfs_async_write_completion_ops); @@ -682,17 +692,6 @@ static int nfs_writepage_locked(struct folio *folio, return err; } -int nfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct folio *folio = page_folio(page); - int ret; - - ret = nfs_writepage_locked(folio, wbc); - if (ret != AOP_WRITEPAGE_ACTIVATE) - unlock_page(page); - return ret; -} - static int nfs_writepages_callback(struct folio *folio, struct writeback_control *wbc, void *data) { @@ -715,12 +714,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct nfs_pageio_descriptor pgio; struct nfs_io_completion *ioc = NULL; unsigned int mntflags = NFS_SERVER(inode)->flags; + struct nfs_server *nfss = NFS_SERVER(inode); int priority = 0; int err; - if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) - return 0; + /* Wait with writeback until write congestion eases */ + if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) { + err = wait_event_killable(nfss->write_congestion_wait, + nfss->write_congested == 0); + if (err) + return err; + } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); @@ -741,6 +745,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) &pgio); pgio.pg_error = 0; nfs_pageio_complete(&pgio); + if (err == -EAGAIN && mntflags & NFS_MOUNT_SOFTERR) + break; } while (err < 0 && !nfs_error_is_fatal(err)); nfs_io_completion_put(ioc); @@ -757,25 +763,17 @@ out_err: static void nfs_inode_add_request(struct nfs_page *req) { struct folio *folio = nfs_page_to_folio(req); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct nfs_inode *nfsi = NFS_I(mapping->host); WARN_ON_ONCE(req->wb_this_page != req); /* Lock the request! */ nfs_lock_request(req); - - /* - * Swap-space should not get truncated. Hence no need to plug the race - * with invalidate/truncate. - */ - spin_lock(&mapping->private_lock); - if (likely(!folio_test_swapcache(folio))) { - set_bit(PG_MAPPED, &req->wb_flags); - folio_set_private(folio); - folio->private = req; - } - spin_unlock(&mapping->private_lock); + spin_lock(&mapping->i_private_lock); + set_bit(PG_MAPPED, &req->wb_flags); + folio_attach_private(folio, req); + spin_unlock(&mapping->i_private_lock); atomic_long_inc(&nfsi->nrequests); /* this a head request for a page group - mark it as having an * extra reference so sub groups can follow suit. @@ -792,18 +790,19 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); - if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; - spin_lock(&mapping->private_lock); - if (likely(folio && !folio_test_swapcache(folio))) { - folio->private = NULL; - folio_clear_private(folio); + spin_lock(&mapping->i_private_lock); + if (likely(folio)) { + folio_detach_private(folio); clear_bit(PG_MAPPED, &req->wb_head->wb_flags); } - spin_unlock(&mapping->private_lock); + spin_unlock(&mapping->i_private_lock); } + nfs_page_group_unlock(req); if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { atomic_long_dec(&nfsi->nrequests); @@ -818,38 +817,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req) filemap_dirty_folio(folio_mapping(folio), folio); } -/* - * nfs_page_search_commits_for_head_request_locked - * - * Search through commit lists on @inode for the head request for @folio. - * Must be called while holding the inode (which is cinfo) lock. - * - * Returns the head request if found, or NULL if not found. - */ -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct folio *folio) -{ - struct nfs_page *freq, *t; - struct nfs_commit_info cinfo; - struct inode *inode = &nfsi->vfs_inode; - - nfs_init_cinfo_from_inode(&cinfo, inode); - - /* search through pnfs commit lists */ - freq = pnfs_search_commit_reqs(inode, &cinfo, folio); - if (freq) - return freq->wb_head; - - /* Linearly search the commit list for the correct request */ - list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { - if (nfs_page_to_folio(freq) == folio) - return freq->wb_head; - } - - return NULL; -} - /** * nfs_request_add_commit_list_locked - add request to a commit list * @req: pointer to a struct nfs_page @@ -956,7 +923,7 @@ static void nfs_folio_clear_commit(struct folio *folio) long nr = folio_nr_pages(folio); node_stat_mod_folio(folio, NR_WRITEBACK, -nr); - wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb, + wb_stat_mod(&inode_to_bdi(folio->mapping->host)->wb, WB_WRITEBACK, -nr); } } @@ -1141,7 +1108,7 @@ out_flushme: */ nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); - error = nfs_wb_folio(folio_file_mapping(folio)->host, folio); + error = nfs_wb_folio(folio->mapping->host, folio); return (error < 0) ? ERR_PTR(error) : NULL; } @@ -1217,7 +1184,7 @@ int nfs_flush_incompatible(struct file *file, struct folio *folio) nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_folio(folio_file_mapping(folio)->host, folio); + status = nfs_wb_folio(folio->mapping->host, folio); } while (status == 0); return status; } @@ -1291,7 +1258,7 @@ out: */ static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_inode *nfsi = NFS_I(inode); if (nfs_have_delegated_attributes(inode)) @@ -1312,7 +1279,7 @@ static bool is_whole_file_wrlock(struct file_lock *fl) { return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX && - fl->fl_type == F_WRLCK; + lock_is_write(fl); } /* If we know the page is up to date, and we're not using byte range locks (or @@ -1330,12 +1297,15 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio, struct file_lock_context *flctx = locks_inode_context(inode); struct file_lock *fl; int ret; + unsigned int mntflags = NFS_SERVER(inode)->flags; + if (mntflags & NFS_MOUNT_NO_ALIGNWRITE) + return 0; if (file->f_flags & O_DSYNC) return 0; if (!nfs_folio_write_uptodate(folio, pagelen)) return 0; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (nfs_have_write_delegation(inode)) return 1; if (!flctx || (list_empty_careful(&flctx->flc_flock) && list_empty_careful(&flctx->flc_posix))) @@ -1346,13 +1316,13 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio, spin_lock(&flctx->flc_lock); if (!list_empty(&flctx->flc_posix)) { fl = list_first_entry(&flctx->flc_posix, struct file_lock, - fl_list); + c.flc_list); if (is_whole_file_wrlock(fl)) ret = 1; } else if (!list_empty(&flctx->flc_flock)) { fl = list_first_entry(&flctx->flc_flock, struct file_lock, - fl_list); - if (fl->fl_type == F_WRLCK) + c.flc_list); + if (lock_is_write(fl)) ret = 1; } spin_unlock(&flctx->flc_lock); @@ -1369,7 +1339,7 @@ int nfs_update_folio(struct file *file, struct folio *folio, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; unsigned int pagelen = nfs_folio_length(folio); int status = 0; @@ -1377,14 +1347,18 @@ int nfs_update_folio(struct file *file, struct folio *folio, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count, - (long long)(folio_file_pos(folio) + offset)); + (long long)(folio_pos(folio) + offset)); if (!count) goto out; if (nfs_can_extend_write(file, folio, pagelen)) { - count = max(count + offset, pagelen); - offset = 0; + unsigned int end = count + offset; + + offset = round_down(offset, PAGE_SIZE); + if (end < pagelen) + end = min(round_up(end, PAGE_SIZE), pagelen); + count = end - offset; } status = nfs_writepage_setup(ctx, folio, offset, count); @@ -1529,6 +1503,13 @@ void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) struct nfs_fattr *fattr = &hdr->fattr; struct inode *inode = hdr->inode; + if (nfs_have_delegated_mtime(inode)) { + spin_lock(&inode->i_lock); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); + spin_unlock(&inode->i_lock); + return; + } + spin_lock(&inode->i_lock); nfs_writeback_check_extend(hdr, fattr); nfs_post_op_update_inode_force_wcc_locked(inode, fattr); @@ -1685,7 +1666,8 @@ EXPORT_SYMBOL_GPL(nfs_commitdata_release); int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, - int how, int flags) + int how, int flags, + struct nfsd_file *localio) { struct rpc_task *task; int priority = flush_task_priority(how); @@ -1714,6 +1696,9 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, dprintk("NFS: initiated commit call\n"); + if (localio) + return nfs_local_commit(localio, data, call_ops, how); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -1813,6 +1798,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo) { struct nfs_commit_data *data; + struct nfsd_file *localio; unsigned short task_flags = 0; /* another commit raced with us */ @@ -1829,9 +1815,12 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, nfs_init_commit(data, head, NULL, cinfo); if (NFS_SERVER(inode)->nfs_client->cl_minorversion) task_flags = RPC_TASK_MOVEABLE; + + localio = nfs_local_open_fh(NFS_SERVER(inode)->nfs_client, data->cred, + data->args.fh, data->context->mode); return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), data->mds_ops, how, - RPC_TASK_CRED_NOREF | task_flags); + RPC_TASK_CRED_NOREF | task_flags, localio); } /* @@ -1852,7 +1841,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; - struct nfs_server *nfss; struct folio *folio; while (!list_empty(&data->pages)) { @@ -1895,9 +1883,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Latency breaker */ cond_resched(); } - nfss = NFS_SERVER(data->inode); - if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - nfss->write_congested = 0; nfs_init_cinfo(&cinfo, data->inode, data->dreq); nfs_commit_end(cinfo.mds); @@ -2088,17 +2073,17 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) */ int nfs_wb_folio(struct inode *inode, struct folio *folio) { - loff_t range_start = folio_file_pos(folio); - loff_t range_end = range_start + (loff_t)folio_size(folio) - 1; + loff_t range_start = folio_pos(folio); + size_t len = folio_size(folio); struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 0, .range_start = range_start, - .range_end = range_end, + .range_end = range_start + len - 1, }; int ret; - trace_nfs_writeback_folio(inode, folio); + trace_nfs_writeback_folio(inode, range_start, len); for (;;) { folio_wait_writeback(folio); @@ -2116,7 +2101,7 @@ int nfs_wb_folio(struct inode *inode, struct folio *folio) goto out_error; } out_error: - trace_nfs_writeback_folio_done(inode, folio, ret); + trace_nfs_writeback_folio_done(inode, range_start, len, ret); return ret; } @@ -2135,10 +2120,10 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst, if (folio_test_private(src)) return -EBUSY; - if (folio_test_fscache(src)) { + if (folio_test_private_2(src)) { /* [DEPRECATED] */ if (mode == MIGRATE_ASYNC) return -EBUSY; - folio_wait_fscache(src); + folio_wait_private_2(src); } return migrate_folio(mapping, dst, src, mode); |