From dc602dd706cb64036132a7903ead1c67d9a7bcb9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Dec 2015 11:44:06 -0500 Subject: NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs The flexfiles layout in particular, seems to want to poke around in the O_DIRECT flags when retransmitting. This patch sets up an interface to allow it to call back into O_DIRECT to handle retransmission correctly. It also fixes a potential bug whereby we could change the behaviour of O_DIRECT if an error is already pending. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 11bbae44f4cb..e89dbb14138c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1460,6 +1460,7 @@ struct nfs_pgio_completion_ops { void (*error_cleanup)(struct list_head *head); void (*init_hdr)(struct nfs_pgio_header *hdr); void (*completion)(struct nfs_pgio_header *hdr); + void (*reschedule_io)(struct nfs_pgio_header *hdr); }; struct nfs_unlinkdata { -- cgit v1.2.3 From af7cf057933f01dc7f33ddfb5e436ad598ed17ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Sep 2015 20:34:05 -0400 Subject: NFS: Allow multiple commit requests in flight per file Allow synchronous RPC calls to wait for pending RPC calls to finish, but also allow asynchronous ones to just fire off another commit. With this patch, the xfstests generic/074 test completes in 226s instead of 242s Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 6 ----- fs/nfs/file.c | 2 +- fs/nfs/nfstrace.h | 1 - fs/nfs/pnfs_nfs.c | 5 +--- fs/nfs/write.c | 70 +++++++++++++++++++++++-------------------------- include/linux/nfs_fs.h | 1 - include/linux/nfs_xdr.h | 1 - 7 files changed, 35 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e73693f75dee..14f77df79c25 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -721,14 +721,8 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_direct_write_complete(dreq, data->inode); } -static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) -{ - /* There is no lock to clear */ -} - static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { .completion = nfs_direct_commit_complete, - .error_cleanup = nfs_direct_error_cleanup, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 93e236429c5d..e6ef80ec699c 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page, * so it will not block due to pages that will shortly be freeable. */ nfsi = NFS_I(mapping->host); - if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { + if (atomic_read(&nfsi->commit_info.rpcs_out)) { *writeback = true; return; } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 59f838cdc009..9f80a086b612 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -39,7 +39,6 @@ { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ - { 1 << NFS_INO_COMMIT, "COMMIT" }, \ { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 24655b807d44..3c8e3a44e6ea 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, } else { nfs_retry_commit(mds_pages, NULL, cinfo, 0); pnfs_generic_retry_commit(cinfo, 0); - cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } } nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); - if (nreq == 0) { - cinfo->completion_ops->error_cleanup(NFS_I(inode)); + if (nreq == 0) goto out; - } atomic_add(nreq, &cinfo->mds->rpcs_out); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0aa3e6b3db70..ae29f082c9c2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include @@ -1535,27 +1537,29 @@ static void nfs_writeback_result(struct rpc_task *task, } } +static int nfs_wait_atomic_killable(atomic_t *key) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + freezable_schedule_unsafe(); + return 0; +} -static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) +static int wait_on_commit(struct nfs_mds_commit_info *cinfo) { - int ret; + return wait_on_atomic_t(&cinfo->rpcs_out, + nfs_wait_atomic_killable, TASK_KILLABLE); +} - if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) - return 1; - if (!may_wait) - return 0; - ret = out_of_line_wait_on_bit_lock(&nfsi->flags, - NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - return (ret < 0) ? ret : 1; +static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) +{ + atomic_inc(&cinfo->rpcs_out); } -static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) { - clear_bit(NFS_INO_COMMIT, &nfsi->flags); - smp_mb__after_atomic(); - wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); + if (atomic_dec_and_test(&cinfo->rpcs_out)) + wake_up_atomic_t(&cinfo->rpcs_out); } void nfs_commitdata_release(struct nfs_commit_data *data) @@ -1693,7 +1697,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, data->mds_ops, how, 0); out_bad: nfs_retry_commit(head, NULL, cinfo, 0); - cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } @@ -1755,8 +1758,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); nfs_init_cinfo(&cinfo, data->inode, data->dreq); - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) - nfs_commit_clear_lock(NFS_I(data->inode)); + nfs_commit_end(cinfo.mds); } static void nfs_commit_release(void *calldata) @@ -1775,7 +1777,6 @@ static const struct rpc_call_ops nfs_commit_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .completion = nfs_commit_release_pages, - .error_cleanup = nfs_commit_clear_lock, }; int nfs_generic_commit_list(struct inode *inode, struct list_head *head, @@ -1794,30 +1795,25 @@ int nfs_commit_inode(struct inode *inode, int how) LIST_HEAD(head); struct nfs_commit_info cinfo; int may_wait = how & FLUSH_SYNC; + int error = 0; int res; - res = nfs_commit_set_lock(NFS_I(inode), may_wait); - if (res <= 0) - goto out_mark_dirty; nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_commit_begin(cinfo.mds); res = nfs_scan_commit(inode, &head, &cinfo); - if (res) { - int error; - + if (res) error = nfs_generic_commit_list(inode, &head, how, &cinfo); - if (error < 0) - return error; - if (!may_wait) - goto out_mark_dirty; - error = wait_on_bit_action(&NFS_I(inode)->flags, - NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - if (error < 0) - return error; - } else - nfs_commit_clear_lock(NFS_I(inode)); + nfs_commit_end(cinfo.mds); + if (error < 0) + goto out_error; + if (!may_wait) + goto out_mark_dirty; + error = wait_on_commit(cinfo.mds); + if (error < 0) + return error; return res; +out_error: + res = error; /* Note: If we exit without ensuring that the commit is complete, * we must mark the inode as dirty. Otherwise, future calls to * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c0e961474a52..ebf0bd72a42b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -216,7 +216,6 @@ struct nfs_inode { #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ -#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e89dbb14138c..a8905b7d4d7f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1423,7 +1423,6 @@ struct nfs_mds_commit_info { struct nfs_commit_data; struct nfs_inode; struct nfs_commit_completion_ops { - void (*error_cleanup) (struct nfs_inode *nfsi); void (*completion) (struct nfs_commit_data *data); }; -- cgit v1.2.3 From b20135d0b2431900a3a5395970ffb7e4f3767c8b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Dec 2015 09:28:06 -0500 Subject: NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid If the layout segment is invalid, then we should not be adding more write requests to the commit list. Instead, those writes should be replayed after requesting a new layout. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 ++ fs/nfs/direct.c | 12 ++++++++++++ fs/nfs/pnfs.c | 3 +++ fs/nfs/pnfs.h | 6 ++++++ fs/nfs/pnfs_nfs.c | 5 +++++ fs/nfs/write.c | 9 +++++++++ include/linux/nfs_xdr.h | 2 ++ 7 files changed, 39 insertions(+) (limited to 'include') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index e4dbab5dce6e..2be8b252e3b1 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -233,6 +233,8 @@ static u32 initiate_file_draining(struct nfs_client *clp, unlock: spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); + /* Free all lsegs that are attached to commit buckets */ + nfs_commit_inode(ino, 0); pnfs_put_layout_hdr(lo); trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, &args->cbl_stateid, -rv); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 14f77df79c25..a9a93927fe3e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -721,8 +721,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_direct_write_complete(dreq, data->inode); } +static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) +{ + struct nfs_direct_req *dreq = cinfo->dreq; + + spin_lock(&dreq->lock); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); + nfs_mark_request_commit(req, NULL, cinfo, 0); +} + static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { .completion = nfs_direct_commit_complete, + .resched_write = nfs_direct_resched_write, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 360fe95c97b5..6593be7c0129 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -703,6 +703,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, ret = -EAGAIN; spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&lseg_list); + /* Free all lsegs that are attached to commit buckets */ + nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); iput(inode); } @@ -1811,6 +1813,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, pnfs_mark_matching_lsegs_return(lo, &free_me, &range); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&free_me); + nfs_commit_inode(inode, 0); } EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d93c2ebc0fd3..4bd7faf9ce50 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -412,6 +412,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg) return lseg; } +static inline bool +pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg) +{ + return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0; +} + /* Return true if a layout driver is being used for this mountpoint */ static inline int pnfs_enabled_sb(struct nfs_server *nfss) { diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 3c8e3a44e6ea..81ac6480f9e7 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -868,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, buckets = cinfo->ds->buckets; list = &buckets[ds_commit_idx].written; if (list_empty(list)) { + if (!pnfs_is_valid_lseg(lseg)) { + spin_unlock(cinfo->lock); + cinfo->completion_ops->resched_write(cinfo, req); + return; + } /* Non-empty buckets hold a reference on the lseg. That ref * is normally transferred to the COMMIT call and released * there. It could also be released if the last req is pulled diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae29f082c9c2..0aa8d6f23b4c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1676,6 +1676,13 @@ void nfs_retry_commit(struct list_head *page_list, } EXPORT_SYMBOL_GPL(nfs_retry_commit); +static void +nfs_commit_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) +{ + __set_page_dirty_nobuffers(req->wb_page); +} + /* * Commit dirty pages */ @@ -1777,6 +1784,7 @@ static const struct rpc_call_ops nfs_commit_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .completion = nfs_commit_release_pages, + .resched_write = nfs_commit_resched_write, }; int nfs_generic_commit_list(struct inode *inode, struct list_head *head, @@ -1823,6 +1831,7 @@ out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return res; } +EXPORT_SYMBOL_GPL(nfs_commit_inode); int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a8905b7d4d7f..bee3e60a7006 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1420,10 +1420,12 @@ struct nfs_mds_commit_info { struct list_head list; }; +struct nfs_commit_info; struct nfs_commit_data; struct nfs_inode; struct nfs_commit_completion_ops { void (*completion) (struct nfs_commit_data *data); + void (*resched_write) (struct nfs_commit_info *, struct nfs_page *); }; struct nfs_commit_info { -- cgit v1.2.3