From c8b8e32d700fe943a935e435ae251364d016c497 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Apr 2016 08:51:58 -0700 Subject: direct-io: eliminate the offset argument to ->direct_IO Including blkdev_direct_IO and dax_do_io. It has to be ki_pos to actually work, so eliminate the superflous argument. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/nfs_fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/nfs_fs.h') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67300f8e5f2f..cede8f6a7e2d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -445,10 +445,9 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) /* * linux/fs/nfs/direct.c */ -extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, - struct iov_iter *iter, - loff_t pos); + struct iov_iter *iter); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter); -- cgit v1.2.3 From 884be175351e73c515303118150f195dd611787c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 23:56:31 -0400 Subject: nfs: per-name sillyunlink exclusion use d_alloc_parallel() for sillyunlink/lookup exclusion and explicit rwsem (nfs_rmdir() being a writer and nfs_call_unlink() - a reader) for rmdir/sillyunlink one. That ought to make lookup/readdir/!O_CREAT atomic_open really parallel on NFS. Signed-off-by: Al Viro --- fs/nfs/dir.c | 9 +-- fs/nfs/inode.c | 4 +- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfstrace.h | 2 +- fs/nfs/unlink.c | 193 +++++++++++++----------------------------------- include/linux/nfs_fs.h | 11 +-- include/linux/nfs_xdr.h | 4 +- 7 files changed, 61 insertions(+), 164 deletions(-) (limited to 'include/linux/nfs_fs.h') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cd3c754e3f89..aaf7bd0cbae2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -909,7 +909,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; - nfs_block_sillyrename(dentry); if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) @@ -945,7 +944,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; } while (!desc->eof); out: - nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); @@ -1398,7 +1396,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ trace_nfs_lookup_enter(dir, dentry, flags); - nfs_block_sillyrename(parent); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); if (error == -ENOENT) goto no_entry; @@ -1423,7 +1420,6 @@ no_entry: } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unblock_sillyrename: - nfs_unblock_sillyrename(parent); trace_nfs_lookup_exit(dir, dentry, flags, error); nfs4_label_free(label); out: @@ -1535,9 +1531,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, goto out; trace_nfs_atomic_open_enter(dir, ctx, open_flags); - nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened); - nfs_unblock_sillyrename(dentry->d_parent); if (IS_ERR(inode)) { err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); @@ -1781,7 +1775,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) trace_nfs_rmdir_enter(dir, dentry); if (d_really_is_positive(dentry)) { - nfs_wait_on_sillyrename(dentry); + down_write(&NFS_I(d_inode(dentry))->rmdir_sem); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ switch (error) { @@ -1791,6 +1785,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) case -ENOENT: nfs_dentry_handle_enoent(dentry); } + up_write(&NFS_I(d_inode(dentry))->rmdir_sem); } else error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); trace_nfs_rmdir_exit(dir, dentry, error); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 738c84a42eb0..52e7d6869e3b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1958,9 +1958,7 @@ static void init_once(void *foo) nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); - atomic_set(&nfsi->silly_count, 1); - INIT_HLIST_HEAD(&nfsi->silly_list); - init_waitqueue_head(&nfsi->waitqueue); + init_rwsem(&nfsi->rmdir_sem); nfs4_init_once(nfsi); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7a7ac1dafa02..084e8570da18 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3777,7 +3777,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) { - nfs4_setup_sequence(NFS_SERVER(data->dir), + nfs4_setup_sequence(NFS_SB(data->dentry->d_sb), &data->args.seq_args, &data->res.seq_res, task); diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 9f80a086b612..0b9e5cc9a747 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -702,7 +702,7 @@ TRACE_EVENT(nfs_sillyrename_unlink, ), TP_fast_assign( - struct inode *dir = data->dir; + struct inode *dir = d_inode(data->dentry->d_parent); size_t len = data->args.name.len; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 7d6deaccf89b..1868246f56e6 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -30,45 +30,11 @@ static void nfs_free_unlinkdata(struct nfs_unlinkdata *data) { - iput(data->dir); put_rpccred(data->cred); kfree(data->args.name.name); kfree(data); } -#define NAME_ALLOC_LEN(len) ((len+16) & ~15) -/** - * nfs_copy_dname - copy dentry name to data structure - * @dentry: pointer to dentry - * @data: nfs_unlinkdata - */ -static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data) -{ - char *str; - int len = dentry->d_name.len; - - str = kmemdup(dentry->d_name.name, NAME_ALLOC_LEN(len), GFP_KERNEL); - if (!str) - return -ENOMEM; - data->args.name.len = len; - data->args.name.name = str; - return 0; -} - -static void nfs_free_dname(struct nfs_unlinkdata *data) -{ - kfree(data->args.name.name); - data->args.name.name = NULL; - data->args.name.len = 0; -} - -static void nfs_dec_sillycount(struct inode *dir) -{ - struct nfs_inode *nfsi = NFS_I(dir); - if (atomic_dec_return(&nfsi->silly_count) == 1) - wake_up(&nfsi->waitqueue); -} - /** * nfs_async_unlink_done - Sillydelete post-processing * @task: rpc_task of the sillydelete @@ -78,7 +44,7 @@ static void nfs_dec_sillycount(struct inode *dir) static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - struct inode *dir = data->dir; + struct inode *dir = d_inode(data->dentry->d_parent); trace_nfs_sillyrename_unlink(data, task->tk_status); if (!NFS_PROTO(dir)->unlink_done(task, dir)) @@ -95,17 +61,21 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) static void nfs_async_unlink_release(void *calldata) { struct nfs_unlinkdata *data = calldata; - struct super_block *sb = data->dir->i_sb; + struct dentry *dentry = data->dentry; + struct super_block *sb = dentry->d_sb; - nfs_dec_sillycount(data->dir); + up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); + d_lookup_done(dentry); nfs_free_unlinkdata(data); + dput(dentry); nfs_sb_deactive(sb); } static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data); + struct inode *dir = d_inode(data->dentry->d_parent); + NFS_PROTO(dir)->unlink_rpc_prepare(task, data); } static const struct rpc_call_ops nfs_unlink_ops = { @@ -114,7 +84,7 @@ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_prepare = nfs_unlink_prepare, }; -static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) +static void nfs_do_call_unlink(struct nfs_unlinkdata *data) { struct rpc_message msg = { .rpc_argp = &data->args, @@ -129,10 +99,31 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n .flags = RPC_TASK_ASYNC, }; struct rpc_task *task; + struct inode *dir = d_inode(data->dentry->d_parent); + nfs_sb_active(dir->i_sb); + data->args.fh = NFS_FH(dir); + nfs_fattr_init(data->res.dir_attr); + + NFS_PROTO(dir)->unlink_setup(&msg, dir); + + task_setup_data.rpc_client = NFS_CLIENT(dir); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task_async(task); +} + +static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +{ + struct inode *dir = d_inode(dentry->d_parent); struct dentry *alias; - alias = d_lookup(parent, &data->args.name); - if (alias != NULL) { + down_read_non_owner(&NFS_I(dir)->rmdir_sem); + alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); + if (IS_ERR(alias)) { + up_read_non_owner(&NFS_I(dir)->rmdir_sem); + return 0; + } + if (!d_in_lookup(alias)) { int ret; void *devname_garbage = NULL; @@ -140,10 +131,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * Hey, we raced with lookup... See if we need to transfer * the sillyrename information to the aliased dentry. */ - nfs_free_dname(data); - ret = nfs_copy_dname(alias, data); spin_lock(&alias->d_lock); - if (ret == 0 && d_really_is_positive(alias) && + if (d_really_is_positive(alias) && !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { devname_garbage = alias->d_fsdata; alias->d_fsdata = data; @@ -152,8 +141,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n } else ret = 0; spin_unlock(&alias->d_lock); - nfs_dec_sillycount(dir); dput(alias); + up_read_non_owner(&NFS_I(dir)->rmdir_sem); /* * If we'd displaced old cached devname, free it. At that * point dentry is definitely not a root, so we won't need @@ -162,95 +151,18 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n kfree(devname_garbage); return ret; } - data->dir = igrab(dir); - if (!data->dir) { - nfs_dec_sillycount(dir); - return 0; - } - nfs_sb_active(dir->i_sb); - data->args.fh = NFS_FH(dir); - nfs_fattr_init(data->res.dir_attr); - - NFS_PROTO(dir)->unlink_setup(&msg, dir); - - task_setup_data.rpc_client = NFS_CLIENT(dir); - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task_async(task); + data->dentry = alias; + nfs_do_call_unlink(data); return 1; } -static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) -{ - struct dentry *parent; - struct inode *dir; - int ret = 0; - - - parent = dget_parent(dentry); - if (parent == NULL) - goto out_free; - dir = d_inode(parent); - /* Non-exclusive lock protects against concurrent lookup() calls */ - spin_lock(&dir->i_lock); - if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { - /* Deferred delete */ - hlist_add_head(&data->list, &NFS_I(dir)->silly_list); - spin_unlock(&dir->i_lock); - ret = 1; - goto out_dput; - } - spin_unlock(&dir->i_lock); - ret = nfs_do_call_unlink(parent, dir, data); -out_dput: - dput(parent); -out_free: - return ret; -} - -void nfs_wait_on_sillyrename(struct dentry *dentry) -{ - struct nfs_inode *nfsi = NFS_I(d_inode(dentry)); - - wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1); -} - -void nfs_block_sillyrename(struct dentry *dentry) -{ - struct nfs_inode *nfsi = NFS_I(d_inode(dentry)); - - wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1); -} - -void nfs_unblock_sillyrename(struct dentry *dentry) -{ - struct inode *dir = d_inode(dentry); - struct nfs_inode *nfsi = NFS_I(dir); - struct nfs_unlinkdata *data; - - atomic_inc(&nfsi->silly_count); - wake_up(&nfsi->waitqueue); - spin_lock(&dir->i_lock); - while (!hlist_empty(&nfsi->silly_list)) { - if (!atomic_inc_not_zero(&nfsi->silly_count)) - break; - data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list); - hlist_del(&data->list); - spin_unlock(&dir->i_lock); - if (nfs_do_call_unlink(dentry, dir, data) == 0) - nfs_free_unlinkdata(data); - spin_lock(&dir->i_lock); - } - spin_unlock(&dir->i_lock); -} - /** * nfs_async_unlink - asynchronous unlinking of a file * @dir: parent directory of dentry * @dentry: dentry to unlink */ static int -nfs_async_unlink(struct inode *dir, struct dentry *dentry) +nfs_async_unlink(struct dentry *dentry, struct qstr *name) { struct nfs_unlinkdata *data; int status = -ENOMEM; @@ -259,13 +171,18 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) goto out; + data->args.name.name = kstrdup(name->name, GFP_KERNEL); + if (!data->args.name.name) + goto out_free; + data->args.name.len = name->len; data->cred = rpc_lookup_cred(); if (IS_ERR(data->cred)) { status = PTR_ERR(data->cred); - goto out_free; + goto out_free_name; } data->res.dir_attr = &data->dir_attr; + init_waitqueue_head(&data->wq); status = -EBUSY; spin_lock(&dentry->d_lock); @@ -285,6 +202,8 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) out_unlock: spin_unlock(&dentry->d_lock); put_rpccred(data->cred); +out_free_name: + kfree(data->args.name.name); out_free: kfree(data); out: @@ -303,17 +222,15 @@ out: void nfs_complete_unlink(struct dentry *dentry, struct inode *inode) { - struct nfs_unlinkdata *data = NULL; + struct nfs_unlinkdata *data; spin_lock(&dentry->d_lock); - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; - data = dentry->d_fsdata; - dentry->d_fsdata = NULL; - } + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + data = dentry->d_fsdata; + dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); - if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)) nfs_free_unlinkdata(data); } @@ -560,18 +477,10 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) /* queue unlink first. Can't do this from rpc_release as it * has to allocate memory */ - error = nfs_async_unlink(dir, dentry); + error = nfs_async_unlink(dentry, &sdentry->d_name); if (error) goto out_dput; - /* populate unlinkdata with the right dname */ - error = nfs_copy_dname(sdentry, - (struct nfs_unlinkdata *)dentry->d_fsdata); - if (error) { - nfs_cancel_async_unlink(dentry); - goto out_dput; - } - /* run the rename task, undo unlink if it fails */ task = nfs_async_rename(dir, dir, dentry, sdentry, nfs_complete_sillyrename); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67300f8e5f2f..fa167f25465d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,11 +163,9 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; - /* Number of in-flight sillydelete RPC calls */ - atomic_t silly_count; - /* List of deferred sillydelete requests */ - struct hlist_head silly_list; - wait_queue_head_t waitqueue; + /* Readers: in-flight sillydelete RPC calls */ + /* Writers: rmdir */ + struct rw_semaphore rmdir_sem; #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; @@ -492,9 +490,6 @@ extern void nfs_release_automount_timer(void); * linux/fs/nfs/unlink.c */ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); -extern void nfs_wait_on_sillyrename(struct dentry *dentry); -extern void nfs_block_sillyrename(struct dentry *dentry); -extern void nfs_unblock_sillyrename(struct dentry *dentry); /* * linux/fs/nfs/write.c diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d320906cf13e..ee8491dadbf3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1468,10 +1468,10 @@ struct nfs_pgio_completion_ops { }; struct nfs_unlinkdata { - struct hlist_node list; struct nfs_removeargs args; struct nfs_removeres res; - struct inode *dir; + struct dentry *dentry; + wait_queue_head_t wq; struct rpc_cred *cred; struct nfs_fattr dir_attr; long timeout; -- cgit v1.2.3 From 6b56a89833fa7903595c8d138bb4927187315cba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Jun 2016 18:23:01 -0400 Subject: NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer filemap_datawrite() and friends already deal just fine with livelock. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 8 -------- fs/nfs/nfstrace.h | 1 - fs/nfs/write.c | 11 ----------- include/linux/nfs_fs.h | 1 - 4 files changed, 21 deletions(-) (limited to 'include/linux/nfs_fs.h') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d39d9f9da7d..29d7477a62e8 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -359,14 +359,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file, mapping->host->i_ino, len, (long long) pos); start: - /* - * Prevent starvation issues if someone is doing a consistency - * sync-to-disk - */ - ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, - nfs_wait_bit_killable, TASK_KILLABLE); - if (ret) - return ret; /* * Wait for O_DIRECT to complete */ diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 0b9e5cc9a747..fe80a1c26340 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -37,7 +37,6 @@ { 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \ { 1 << NFS_INO_STALE, "STALE" }, \ { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ - { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e1c74d3db64d..980d44f3a84c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -657,16 +657,9 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; - unsigned long *bitlock = &NFS_I(inode)->flags; struct nfs_pageio_descriptor pgio; int err; - /* Stop dirtying of new pages while we sync */ - err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING, - nfs_wait_bit_killable, TASK_KILLABLE); - if (err) - goto out_err; - nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, @@ -674,10 +667,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); - clear_bit_unlock(NFS_INO_FLUSHING, bitlock); - smp_mb__after_atomic(); - wake_up_bit(bitlock, NFS_INO_FLUSHING); - if (err < 0) goto out_err; err = pgio.pg_error; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d71278c3c5bd..120dd04b553c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -205,7 +205,6 @@ struct nfs_inode { #define NFS_INO_STALE (1) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ -#define NFS_INO_FLUSHING (4) /* inode is flushing out data */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ -- cgit v1.2.3 From a5864c999de6703f7ce908f72337568520c6cad3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Jun 2016 17:07:19 -0400 Subject: NFS: Do not serialise O_DIRECT reads and writes Allow dio requests to be scheduled in parallel, but ensuring that they do not conflict with buffered I/O. Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/direct.c | 41 +++----------- fs/nfs/file.c | 12 ++-- fs/nfs/internal.h | 8 +++ fs/nfs/io.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 1 + 6 files changed, 174 insertions(+), 37 deletions(-) create mode 100644 fs/nfs/io.c (limited to 'include/linux/nfs_fs.h') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8664417955a2..6abdda209642 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ - direct.o pagelist.o read.o symlink.o unlink.o \ + io.o direct.o pagelist.o read.o symlink.o unlink.o \ write.o namespace.o mount_clnt.o nfstrace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0169eca8eb42..6d0e88096440 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -578,17 +578,12 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (!count) goto out; - inode_lock(inode); - result = nfs_sync_mapping(mapping); - if (result) - goto out_unlock; - task_io_account_read(count); result = -ENOMEM; dreq = nfs_direct_req_alloc(); if (dreq == NULL) - goto out_unlock; + goto out; dreq->inode = inode; dreq->bytes_left = dreq->max_count = count; @@ -603,10 +598,12 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + nfs_start_io_direct(inode); + NFS_I(inode)->read_io += count; result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); - inode_unlock(inode); + nfs_end_io_direct(inode); if (!result) { result = nfs_direct_wait(dreq); @@ -614,13 +611,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) iocb->ki_pos += result; } - nfs_direct_req_release(dreq); - return result; - out_release: nfs_direct_req_release(dreq); -out_unlock: - inode_unlock(inode); out: return result; } @@ -1008,25 +1000,12 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) pos = iocb->ki_pos; end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT; - inode_lock(inode); - - result = nfs_sync_mapping(mapping); - if (result) - goto out_unlock; - - if (mapping->nrpages) { - result = invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - if (result) - goto out_unlock; - } - task_io_account_write(count); result = -ENOMEM; dreq = nfs_direct_req_alloc(); if (!dreq) - goto out_unlock; + goto out; dreq->inode = inode; dreq->bytes_left = dreq->max_count = count; @@ -1041,6 +1020,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + nfs_start_io_direct(inode); + result = nfs_direct_write_schedule_iovec(dreq, iter, pos); if (mapping->nrpages) { @@ -1048,7 +1029,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) pos >> PAGE_SHIFT, end); } - inode_unlock(inode); + nfs_end_io_direct(inode); if (!result) { result = nfs_direct_wait(dreq); @@ -1058,13 +1039,9 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) generic_write_sync(iocb, result); } } - nfs_direct_req_release(dreq); - return result; - out_release: nfs_direct_req_release(dreq); -out_unlock: - inode_unlock(inode); +out: return result; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 46cf0afe3c0f..9f8da9e1b23f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -170,12 +170,14 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) iocb->ki_filp, iov_iter_count(to), (unsigned long) iocb->ki_pos); - result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping); + nfs_start_io_read(inode); + result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { result = generic_file_read_iter(iocb, to); if (result > 0) nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); } + nfs_end_io_read(inode); return result; } EXPORT_SYMBOL_GPL(nfs_file_read); @@ -191,12 +193,14 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", filp, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_mapping_protected(inode, filp->f_mapping); + nfs_start_io_read(inode); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) { res = generic_file_splice_read(filp, ppos, pipe, count, flags); if (res > 0) nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res); } + nfs_end_io_read(inode); return res; } EXPORT_SYMBOL_GPL(nfs_file_splice_read); @@ -645,14 +649,14 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) goto out; } - inode_lock(inode); + nfs_start_io_write(inode); result = generic_write_checks(iocb, from); if (result > 0) { current->backing_dev_info = inode_to_bdi(inode); result = generic_perform_write(file, from, iocb->ki_pos); current->backing_dev_info = NULL; } - inode_unlock(inode); + nfs_end_io_write(inode); if (result <= 0) goto out; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0eb5c924886d..159b64ede82a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -411,6 +411,14 @@ extern void __exit unregister_nfs_fs(void); extern bool nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); +/* io.c */ +extern void nfs_start_io_read(struct inode *inode); +extern void nfs_end_io_read(struct inode *inode); +extern void nfs_start_io_write(struct inode *inode); +extern void nfs_end_io_write(struct inode *inode); +extern void nfs_start_io_direct(struct inode *inode); +extern void nfs_end_io_direct(struct inode *inode); + /* namespace.c */ #define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, diff --git a/fs/nfs/io.c b/fs/nfs/io.c new file mode 100644 index 000000000000..1fc5d1ce327e --- /dev/null +++ b/fs/nfs/io.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016 Trond Myklebust + * + * I/O and data path helper functionality. + */ + +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* Call with exclusively locked inode->i_rwsem */ +static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) +{ + if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { + clear_bit(NFS_INO_ODIRECT, &nfsi->flags); + inode_dio_wait(inode); + } +} + +/** + * nfs_start_io_read - declare the file is being used for buffered reads + * @inode - file inode + * + * Declare that a buffered read operation is about to start, and ensure + * that we block all direct I/O. + * On exit, the function ensures that the NFS_INO_ODIRECT flag is unset, + * and holds a shared lock on inode->i_rwsem to ensure that the flag + * cannot be changed. + * In practice, this means that buffered read operations are allowed to + * execute in parallel, thanks to the shared lock, whereas direct I/O + * operations need to wait to grab an exclusive lock in order to set + * NFS_INO_ODIRECT. + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. + */ +void +nfs_start_io_read(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + /* Be an optimist! */ + down_read(&inode->i_rwsem); + if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0) + return; + up_read(&inode->i_rwsem); + /* Slow path.... */ + down_write(&inode->i_rwsem); + nfs_block_o_direct(nfsi, inode); + downgrade_write(&inode->i_rwsem); +} + +/** + * nfs_end_io_read - declare that the buffered read operation is done + * @inode - file inode + * + * Declare that a buffered read operation is done, and release the shared + * lock on inode->i_rwsem. + */ +void +nfs_end_io_read(struct inode *inode) +{ + up_read(&inode->i_rwsem); +} + +/** + * nfs_start_io_write - declare the file is being used for buffered writes + * @inode - file inode + * + * Declare that a buffered read operation is about to start, and ensure + * that we block all direct I/O. + */ +void +nfs_start_io_write(struct inode *inode) +{ + down_write(&inode->i_rwsem); + nfs_block_o_direct(NFS_I(inode), inode); +} + +/** + * nfs_end_io_write - declare that the buffered write operation is done + * @inode - file inode + * + * Declare that a buffered write operation is done, and release the + * lock on inode->i_rwsem. + */ +void +nfs_end_io_write(struct inode *inode) +{ + up_write(&inode->i_rwsem); +} + +/* Call with exclusively locked inode->i_rwsem */ +static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) +{ + if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { + set_bit(NFS_INO_ODIRECT, &nfsi->flags); + nfs_wb_all(inode); + } +} + +/** + * nfs_end_io_direct - declare the file is being used for direct i/o + * @inode - file inode + * + * Declare that a direct I/O operation is about to start, and ensure + * that we block all buffered I/O. + * On exit, the function ensures that the NFS_INO_ODIRECT flag is set, + * and holds a shared lock on inode->i_rwsem to ensure that the flag + * cannot be changed. + * In practice, this means that direct I/O operations are allowed to + * execute in parallel, thanks to the shared lock, whereas buffered I/O + * operations need to wait to grab an exclusive lock in order to clear + * NFS_INO_ODIRECT. + * Note that buffered writes and truncates both take a write lock on + * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. + */ +void +nfs_start_io_direct(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + /* Be an optimist! */ + down_read(&inode->i_rwsem); + if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0) + return; + up_read(&inode->i_rwsem); + /* Slow path.... */ + down_write(&inode->i_rwsem); + nfs_block_buffered(nfsi, inode); + downgrade_write(&inode->i_rwsem); +} + +/** + * nfs_end_io_direct - declare that the direct i/o operation is done + * @inode - file inode + * + * Declare that a direct I/O operation is done, and release the shared + * lock on inode->i_rwsem. + */ +void +nfs_end_io_direct(struct inode *inode) +{ + up_read(&inode->i_rwsem); +} diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 120dd04b553c..225d17d35277 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -210,6 +210,7 @@ struct nfs_inode { #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ +#define NFS_INO_ODIRECT (12) /* I/O setting is O_DIRECT */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3 From be527494e02b89e03485955b30de6c1e976a07eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2016 08:19:36 -0400 Subject: NFS: Remove unused function nfs_revalidate_mapping_protected() Clean up... Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 38 ++++---------------------------------- include/linux/nfs_fs.h | 1 - 2 files changed, 4 insertions(+), 35 deletions(-) (limited to 'include/linux/nfs_fs.h') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6c0618eb5d57..0e0500f2bb6b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1131,14 +1131,12 @@ out: } /** - * __nfs_revalidate_mapping - Revalidate the pagecache + * nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping - * @may_lock - take inode->i_mutex? */ -static int __nfs_revalidate_mapping(struct inode *inode, - struct address_space *mapping, - bool may_lock) +int nfs_revalidate_mapping(struct inode *inode, + struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); unsigned long *bitlock = &nfsi->flags; @@ -1187,12 +1185,7 @@ static int __nfs_revalidate_mapping(struct inode *inode, nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); - if (may_lock) { - inode_lock(inode); - ret = nfs_invalidate_mapping(inode, mapping); - inode_unlock(inode); - } else - ret = nfs_invalidate_mapping(inode, mapping); + ret = nfs_invalidate_mapping(inode, mapping); trace_nfs_invalidate_mapping_exit(inode, ret); clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); @@ -1202,29 +1195,6 @@ out: return ret; } -/** - * nfs_revalidate_mapping - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - */ -int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) -{ - return __nfs_revalidate_mapping(inode, mapping, false); -} - -/** - * nfs_revalidate_mapping_protected - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - * - * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex - * while invalidating the mapping. - */ -int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping) -{ - return __nfs_revalidate_mapping(inode, mapping, true); -} - static bool nfs_file_has_writers(struct nfs_inode *nfsi) { struct inode *inode = &nfsi->vfs_inode; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 225d17d35277..810124b33327 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -351,7 +351,6 @@ extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *ino extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); extern int nfs_revalidate_mapping_rcu(struct inode *inode); -extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *); extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, -- cgit v1.2.3