From 378520b837cf4da769600b83690d8e825f16a611 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 7 Aug 2014 10:15:02 +0800 Subject: nfs41: add a helper function to set layoutcommit after commit Track lwb in nfs_commit_data so that we can use it to setup layoutcommit in commit_done callback. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 175d5d073ccf..3c5638f381cd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1538,6 +1538,18 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, } EXPORT_SYMBOL_GPL(nfs_initiate_commit); +static loff_t nfs_get_lwb(struct list_head *head) +{ + loff_t lwb = 0; + struct nfs_page *req; + + list_for_each_entry(req, head, wb_list) + if (lwb < (req_offset(req) + req->wb_bytes)) + lwb = req_offset(req) + req->wb_bytes; + + return lwb; +} + /* * Set up the argument/result storage required for the RPC call. */ @@ -1557,6 +1569,9 @@ void nfs_init_commit(struct nfs_commit_data *data, data->inode = inode; data->cred = first->wb_context->cred; data->lseg = lseg; /* reference transferred */ + /* only set lwb for pnfs commit */ + if (lseg) + data->lwb = nfs_get_lwb(&data->pages); data->mds_ops = &nfs_commit_ops; data->completion_ops = cinfo->completion_ops; data->dreq = cinfo->dreq; -- cgit v1.2.3 From 3a3908c8b09d5ec19d543836d4f38d240ae27fe8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Sep 2014 22:21:00 -0700 Subject: NFS: Fix a compile warning when !(CONFIG_NFS_V3 || CONFIG_NFS_V4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc reports: linux/fs/nfs/write.c: In function ‘nfs_page_find_head_request_locked.isra.17’: linux/fs/nfs/write.c:121:64: warning: ‘cinfo.mds’ may be used uninitialized in this function [-Wmaybe-uninitialized] list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { ^ linux/fs/nfs/write.c:110:25: note: ‘cinfo.mds’ was declared here struct nfs_commit_info cinfo; Reported-by: Anna Schumaker Cc: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 74 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 32 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3c5638f381cd..128d97f01d1c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -49,6 +49,9 @@ static const struct nfs_rw_ops nfs_rw_write_ops; static void nfs_clear_request_commit(struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); +static struct nfs_page * +nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, + struct page *page); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -94,38 +97,6 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } -/* - * nfs_page_search_commits_for_head_request_locked - * - * Search through commit lists on @inode for the head request for @page. - * Must be called while holding the inode (which is cinfo) lock. - * - * Returns the head request if found, or NULL if not found. - */ -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct page *page) -{ - struct nfs_page *freq, *t; - struct nfs_commit_info cinfo; - struct inode *inode = &nfsi->vfs_inode; - - nfs_init_cinfo_from_inode(&cinfo, inode); - - /* search through pnfs commit lists */ - freq = pnfs_search_commit_reqs(inode, &cinfo, page); - if (freq) - return freq->wb_head; - - /* Linearly search the commit list for the correct request */ - list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { - if (freq->wb_page == page) - return freq->wb_head; - } - - return NULL; -} - /* * nfs_page_find_head_request_locked - find head request associated with @page * @@ -750,6 +721,38 @@ nfs_mark_request_dirty(struct nfs_page *req) } #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) +/* + * nfs_page_search_commits_for_head_request_locked + * + * Search through commit lists on @inode for the head request for @page. + * Must be called while holding the inode (which is cinfo) lock. + * + * Returns the head request if found, or NULL if not found. + */ +static struct nfs_page * +nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, + struct page *page) +{ + struct nfs_page *freq, *t; + struct nfs_commit_info cinfo; + struct inode *inode = &nfsi->vfs_inode; + + nfs_init_cinfo_from_inode(&cinfo, inode); + + /* search through pnfs commit lists */ + freq = pnfs_search_commit_reqs(inode, &cinfo, page); + if (freq) + return freq->wb_head; + + /* Linearly search the commit list for the correct request */ + list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { + if (freq->wb_page == page) + return freq->wb_head; + } + + return NULL; +} + /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page @@ -868,6 +871,13 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr) } #else +static struct nfs_page * +nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, + struct page *page) +{ + return NULL; +} + static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode) { -- cgit v1.2.3 From f418c64b71590bac8fdebd0969a1eeaffaf036d2 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 3 Sep 2014 12:19:07 -0400 Subject: NFS: Unconditionally enable commit code The goal is to create a generic NFS module with code that does not depend on what versions of NFS are enabled. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 14 ------------ fs/nfs/write.c | 61 -------------------------------------------------- include/linux/nfs_fs.h | 8 ------- 3 files changed, 83 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 65ef6e00deee..dda4b8667c02 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -178,7 +178,6 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /* * nfs_direct_cmp_commit_data_verf - compare verifier for commit data * @dreq - direct request possibly spanning multiple servers @@ -197,7 +196,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, WARN_ON_ONCE(verfp->committed < 0); return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); } -#endif /** * nfs_direct_IO - NFS address space operation for direct I/O @@ -576,7 +574,6 @@ out: return result; } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; @@ -700,17 +697,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ } -#else -static void nfs_direct_write_schedule_work(struct work_struct *work) -{ -} - -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) -{ - nfs_direct_complete(dreq, true); -} -#endif - static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 128d97f01d1c..6786873a2901 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -720,7 +720,6 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /* * nfs_page_search_commits_for_head_request_locked * @@ -870,43 +869,6 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr) return hdr->verf.committed != NFS_FILE_SYNC; } -#else -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct page *page) -{ - return NULL; -} - -static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, - struct inode *inode) -{ -} - -void nfs_init_cinfo(struct nfs_commit_info *cinfo, - struct inode *inode, - struct nfs_direct_req *dreq) -{ -} - -void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) -{ -} - -static void -nfs_clear_request_commit(struct nfs_page *req) -{ -} - -int nfs_write_need_commit(struct nfs_pgio_header *hdr) -{ - return 0; -} - -#endif - static void nfs_write_completion(struct nfs_pgio_header *hdr) { struct nfs_commit_info cinfo; @@ -942,7 +904,6 @@ out: hdr->release(hdr); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -999,19 +960,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, return ret; } -#else -unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) -{ - return 0; -} - -int nfs_scan_commit(struct inode *inode, struct list_head *dst, - struct nfs_commit_info *cinfo) -{ - return 0; -} -#endif - /* * Search for an existing write request, and attempt to update * it to reflect a new dirty region on a given page. @@ -1404,7 +1352,6 @@ static int nfs_writeback_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) if (hdr->res.verf->committed < hdr->args.stable && task->tk_status >= 0) { /* We tried a write call, but the server did not @@ -1426,7 +1373,6 @@ static int nfs_writeback_done(struct rpc_task *task, complain = jiffies + 300 * HZ; } } -#endif /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) @@ -1479,7 +1425,6 @@ static void nfs_writeback_result(struct rpc_task *task, } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; @@ -1803,12 +1748,6 @@ out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } -#else -static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) -{ - return 0; -} -#endif int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5180a7ededec..fd334d4b0d41 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -529,17 +529,9 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); -#else -static inline int -nfs_commit_inode(struct inode *inode, int how) -{ - return 0; -} -#endif static inline int nfs_have_writebacks(struct inode *inode) -- cgit v1.2.3 From e87b4c7a7ac6d895846570dec637744cf7050df3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Sep 2014 16:09:27 +1000 Subject: NFS: don't use STABLE writes during writeback. commit b31268ac793fd300da66b9c28bbf0a200339ab96 FS: Use stable writes when not doing a bulk flush was a bit heavy handed. The particular problem that lead to this patch was that small writes to an O_SYNC file we being written as UNSTABLE writes followed by a commit. This is appropriate for large writes (which require multiple NFS requests) but for small writes (single NFS request), using NFS_FILE_SYNC is more efficient. So that patch causes the code to select between the two methods depending on how many nfs requests get generated. Unfortunately this ends up applying to non O_SYNC writes as well. In particular if you memory-map a file and update random pages, then when they are eventually written out by writeback they will go as NFS_FILE_SYNC. This is inefficient and slows down the application. So: only set FLUSH_COND_STABLE when wbc->sync_mode is WB_SYNC_ALL. With this patch: O_SYNC writes are NFS_FILE_SYNC for single requests, and NFS_UNSTABLE followed by COMMIT for multiple requests Writing immediately before close of fsync follow the same pattern. Non-O_SYNC writes without an fsync of close eventually get flushed out as UNSTABLE and a commit follows eventually as appropriate. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6786873a2901..a623b00530c3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -242,11 +242,14 @@ static void nfs_mark_uptodate(struct nfs_page *req) static int wb_priority(struct writeback_control *wbc) { + int ret = 0; if (wbc->for_reclaim) return FLUSH_HIGHPRI | FLUSH_STABLE; + if (wbc->sync_mode == WB_SYNC_ALL) + ret = FLUSH_COND_STABLE; if (wbc->for_kupdate || wbc->for_background) - return FLUSH_LOWPRI | FLUSH_COND_STABLE; - return FLUSH_COND_STABLE; + ret |= FLUSH_LOWPRI; + return ret; } /* -- cgit v1.2.3 From 9590544694becc64f4874963dbfc4b4d391024b7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 24 Sep 2014 11:28:32 +1000 Subject: NFS: avoid deadlocks with loop-back mounted NFS filesystems. Support for loop-back mounted NFS filesystems is useful when NFS is used to access shared storage in a high-availability cluster. If the node running the NFS server fails, some other node can mount the filesystem and start providing NFS service. If that node already had the filesystem NFS mounted, it will now have it loop-back mounted. nfsd can suffer a deadlock when allocating memory and entering direct reclaim. While direct reclaim does not write to the NFS filesystem it can send and wait for a COMMIT through nfs_release_page(). This patch modifies nfs_release_page() to wait a limited time for the commit to complete - one second. If the commit doesn't complete in this time, nfs_release_page() will fail. This means it might now fail in some cases where it wouldn't before. These cases are only when 'gfp' includes '__GFP_WAIT'. nfs_release_page() is only called by try_to_release_page(), and that can only be called on an NFS page with required 'gfp' flags from - page_cache_pipe_buf_steal() in splice.c - shrink_page_list() in vmscan.c - invalidate_inode_pages2_range() in truncate.c The first two handle failure quite safely. The last is only called after ->launder_page() has been called, and that will have waited for the commit to finish already. So aborting if the commit takes longer than 1 second is perfectly safe. Signed-off-by: NeilBrown Acked-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 26 ++++++++++++++++---------- fs/nfs/write.c | 2 ++ 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 23e5f0ea5c83..325df0aeab05 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -475,17 +475,23 @@ static int nfs_release_page(struct page *page, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not - * doing this memory reclaim for a fs-related allocation. + /* Always try to initiate a 'commit' if relevant, but only + * wait for it if __GFP_WAIT is set and the calling process is + * allowed to block. Even then, only wait 1 second. + * Waiting indefinitely can cause deadlocks when the NFS + * server is on this machine, and there is no particular need + * to wait extensively here. A short wait has the benefit + * that someone else can worry about the freezer. */ - if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && - !(current->flags & PF_FSTRANS)) { - int how = FLUSH_SYNC; - - /* Don't let kswapd deadlock waiting for OOM RPC calls */ - if (current_is_kswapd()) - how = 0; - nfs_commit_inode(mapping->host, how); + if (mapping) { + struct nfs_server *nfss = NFS_SERVER(mapping->host); + nfs_commit_inode(mapping->host, 0); + if ((gfp & __GFP_WAIT) && + !current_is_kswapd() && + !(current->flags & PF_FSTRANS)) { + wait_on_page_bit_killable_timeout(page, PG_private, + HZ); + } } /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a623b00530c3..c063a4e70354 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -705,6 +705,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (likely(!PageSwapCache(head->wb_page))) { set_page_private(head->wb_page, 0); ClearPagePrivate(head->wb_page); + smp_mb__after_atomic(); + wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } nfsi->npages--; -- cgit v1.2.3 From 353db7966288a2f18da22438aeec2b4862c0b241 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 24 Sep 2014 11:28:32 +1000 Subject: NFS: avoid waiting at all in nfs_release_page when congested. If nfs_release_page() is called on a sequence of pages which are all in the same file which is blocked on COMMIT, each page could contribute a 1 second delay which could be come excessive. I have seen delays of as much as 208 seconds. To keep the delay to one second, mark the bdi as write-congested if the commit didn't finished. Once it does finish, the write-congested flag will be cleared by nfs_commit_release_pages(). With this, the longest total delay in try_to_free_pages that I have seen is under 3 seconds. With no waiting in nfs_release_page at all I have seen delays of nearly 1.5 seconds. Signed-off-by: NeilBrown Acked-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 9 +++++++-- fs/nfs/write.c | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 325df0aeab05..24832d040eb8 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -477,7 +477,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp) /* Always try to initiate a 'commit' if relevant, but only * wait for it if __GFP_WAIT is set and the calling process is - * allowed to block. Even then, only wait 1 second. + * allowed to block. Even then, only wait 1 second and only + * if the 'bdi' is not congested. * Waiting indefinitely can cause deadlocks when the NFS * server is on this machine, and there is no particular need * to wait extensively here. A short wait has the benefit @@ -488,9 +489,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp) nfs_commit_inode(mapping->host, 0); if ((gfp & __GFP_WAIT) && !current_is_kswapd() && - !(current->flags & PF_FSTRANS)) { + !(current->flags & PF_FSTRANS) && + !bdi_write_congested(&nfss->backing_dev_info)) { wait_on_page_bit_killable_timeout(page, PG_private, HZ); + if (PagePrivate(page)) + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); } } /* If PagePrivate() is set, then the page is not freeable */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c063a4e70354..12493846a2d3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1611,6 +1611,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; + struct nfs_server *nfss; while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1644,6 +1645,10 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) next: nfs_unlock_and_release_request(req); } + nfss = NFS_SERVER(data->inode); + if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) + clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); + nfs_init_cinfo(&cinfo, data->inode, data->dreq); if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) nfs_commit_clear_lock(NFS_I(data->inode)); -- cgit v1.2.3 From 16c9914069536c77ed358d94b6e247bdc464b7f0 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 3 Nov 2014 15:19:45 -0500 Subject: nfs: remove spurious WARN_ON_ONCE in write path This WARN_ON_ONCE was supposed to catch reference counting bugs, but can trigger in inappropriate situations. This was reproducible using NFSv2 on an architecture with 64K pages -- we verified that it was not a reference counting bug and the warning was safe to ignore. Reported-by: Will Deacon Tested-by: Will Deacon Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 12493846a2d3..f83b02dc9166 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -715,8 +715,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) nfs_release_request(req); - else - WARN_ON_ONCE(1); } static void -- cgit v1.2.3 From cb1410c71e0b6b2eba8c1890645a76ff86169d24 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 12 Nov 2014 12:08:00 -0500 Subject: NFS: fix subtle change in COMMIT behavior Recent work in the pgio layer made it possible for there to be more than one request per page. This caused a subtle change in commit behavior, because write.c:nfs_commit_unstable_pages compares the number of *pages* waiting for writeback against the number of requests on a commit list to choose when to send a COMMIT in a non-blocking flush. This is probably hard to hit in normal operation - you have to be using rsize/wsize < PAGE_SIZE, or pnfs with lots of boundaries that are not page aligned to have a noticeable change in behavior. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/inode.c | 8 ++++---- fs/nfs/pagelist.c | 11 ++++++++--- fs/nfs/write.c | 17 ++++++++++++----- include/linux/nfs_fs.h | 4 ++-- 5 files changed, 27 insertions(+), 15 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 73466b934090..e36a9d78ea49 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->npages != 0) + if (nfsi->nrequests != 0) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00689a8a85e4..2b48ce58a584 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1149,7 +1149,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) { + && nfsi->nrequests == 0) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1192,7 +1192,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->npages == 0) + if (cur_size != new_isize && nfsi->nrequests == 0) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } @@ -1619,7 +1619,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if ((nfsi->npages == 0) || new_isize > cur_isize) { + if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; invalid &= ~NFS_INO_REVAL_PAGECACHE; @@ -1784,7 +1784,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->npages = 0; + nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); atomic_set(&nfsi->silly_count, 1); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ed0db61f8543..2b5e769beb16 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) static inline void nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) { + struct inode *inode; WARN_ON_ONCE(prev == req); if (!prev) { @@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) * nfs_page_group_destroy is called */ kref_get(&req->wb_head->wb_kref); - /* grab extra ref if head request has extra ref from - * the write/commit path to handle handoff between write - * and commit lists */ + /* grab extra ref and bump the request count if head request + * has extra ref from the write/commit path to handle handoff + * between write and commit lists. */ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { + inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); + spin_lock(&inode->i_lock); + NFS_I(inode)->nrequests++; + spin_unlock(&inode->i_lock); } } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f83b02dc9166..d489ff3f438f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (!nfsi->nrequests && + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; /* * Swap-space should not get truncated. Hence no need to plug the race @@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->npages++; + nfsi->nrequests++; /* this a head request for a page group - mark it as having an - * extra reference so sub groups can follow suit */ + * extra reference so sub groups can follow suit. + * This flag also informs pgio layer when to bump nrequests when + * adding subrequests. */ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->npages--; + nfsi->nrequests--; + spin_unlock(&inode->i_lock); + } else { + spin_lock(&inode->i_lock); + nfsi->nrequests--; spin_unlock(&inode->i_lock); } @@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ - if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) + if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1)) goto out_mark_dirty; /* don't wait for the COMMIT response */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c72d1ad41ad4..6d627b92df53 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,7 +163,7 @@ struct nfs_inode { */ __be32 cookieverf[2]; - unsigned long npages; + unsigned long nrequests; struct nfs_mds_commit_info commit_info; /* Open contexts for shared mmap writes */ @@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data); static inline int nfs_have_writebacks(struct inode *inode) { - return NFS_I(inode)->npages != 0; + return NFS_I(inode)->nrequests != 0; } /* -- cgit v1.2.3 From 5a254d08b086d80cbead2ebcee6d2a4b3a15587a Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sun, 23 Nov 2014 12:47:17 +0800 Subject: nfs: replace nfs_add_stats with nfs_inc_stats when add one Signed-off-by: Li RongQing Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index beff2769c5c5..c91a4799c562 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -269,7 +269,7 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - nfs_add_stats(inode, NFSIOS_READPAGES, 1); + nfs_inc_stats(inode, NFSIOS_READPAGES); /* * Try to flush any pending writes to the file.. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d489ff3f438f..af3af685a9e3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -575,7 +575,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + nfs_inc_stats(inode, NFSIOS_WRITEPAGES); nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); -- cgit v1.2.3