From 08fead2ae5a9953d47677416cc5f6bcae448480d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Jul 2017 19:31:10 -0400 Subject: NFS: Ensure we always dereference the page head last This fixes a race with nfs_page_group_sync_on_bit() whereby the call to wake_up_bit() in nfs_page_group_unlock() could occur after the page header had been freed. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index de9066a92c0d..a6f2bbd709ba 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -306,14 +306,11 @@ static void nfs_page_group_destroy(struct kref *kref) { struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + struct nfs_page *head = req->wb_head; struct nfs_page *tmp, *next; - /* subrequests must release the ref on the head request */ - if (req->wb_head != req) - nfs_release_request(req->wb_head); - if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) - return; + goto out; tmp = req; do { @@ -324,6 +321,10 @@ nfs_page_group_destroy(struct kref *kref) nfs_free_request(tmp); tmp = next; } while (tmp != req); +out: + /* subrequests must release the ref on the head request */ + if (head != req) + nfs_release_request(head); } /** -- cgit v1.2.3 From dee83046e73cb7ebbbae955c1ef0f4f55a0f44f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Jul 2017 10:51:02 -0400 Subject: NFS: Remove unuse function nfs_page_group_lock_wait() Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 21 --------------------- include/linux/nfs_page.h | 1 - 2 files changed, 22 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index a6f2bbd709ba..ced7974622dd 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -165,27 +165,6 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock) return -EAGAIN; } -/* - * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it - * @req - a request in the group - * - * This is a blocking call to wait for the group lock to be cleared. - */ -void -nfs_page_group_lock_wait(struct nfs_page *req) -{ - struct nfs_page *head = req->wb_head; - - WARN_ON_ONCE(head != head->wb_head); - - if (!test_bit(PG_HEADLOCK, &head->wb_flags)) - return; - set_bit(PG_CONTENDED1, &head->wb_flags); - smp_mb__after_atomic(); - wait_on_bit(&head->wb_flags, PG_HEADLOCK, - TASK_UNINTERRUPTIBLE); -} - /* * nfs_page_group_unlock - unlock the head of the page group * @req - request in group that is to be unlocked diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d67b67ae6c8b..de1d24cedaa2 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -140,7 +140,6 @@ extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern int nfs_page_group_lock(struct nfs_page *, bool); -extern void nfs_page_group_lock_wait(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); -- cgit v1.2.3 From 1344b7ea172b4911a8ee8a6ff26c5bc6b5abb302 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Jul 2017 10:54:14 -0400 Subject: NFS: Remove unused parameter from nfs_page_group_lock() nfs_page_group_lock() is now always called with the 'nonblock' parameter set to 'false'. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 31 +++++++++++-------------------- fs/nfs/write.c | 6 +++--- include/linux/nfs_page.h | 2 +- 3 files changed, 15 insertions(+), 24 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ced7974622dd..af6731dd4324 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -134,19 +134,14 @@ EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); /* * nfs_page_group_lock - lock the head of the page group * @req - request in group that is to be locked - * @nonblock - if true don't block waiting for lock * - * this lock must be held if modifying the page group list + * this lock must be held when traversing or modifying the page + * group list * - * return 0 on success, < 0 on error: -EDELAY if nonblocking or the - * result from wait_on_bit_lock - * - * NOTE: calling with nonblock=false should always have set the - * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock - * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. + * return 0 on success, < 0 on error */ int -nfs_page_group_lock(struct nfs_page *req, bool nonblock) +nfs_page_group_lock(struct nfs_page *req) { struct nfs_page *head = req->wb_head; @@ -155,14 +150,10 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock) if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) return 0; - if (!nonblock) { - set_bit(PG_CONTENDED1, &head->wb_flags); - smp_mb__after_atomic(); - return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + set_bit(PG_CONTENDED1, &head->wb_flags); + smp_mb__after_atomic(); + return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, TASK_UNINTERRUPTIBLE); - } - - return -EAGAIN; } /* @@ -225,7 +216,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) { bool ret; - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); ret = nfs_page_group_sync_on_bit_locked(req, bit); nfs_page_group_unlock(req); @@ -1016,7 +1007,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, unsigned int bytes_left = 0; unsigned int offset, pgbase; - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); subreq = req; bytes_left = subreq->wb_bytes; @@ -1038,7 +1029,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (mirror->pg_recoalesce) return 0; /* retry add_request for this subreq */ - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); continue; } @@ -1135,7 +1126,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, for (midx = 0; midx < desc->pg_mirror_count; midx++) { if (midx) { - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); /* find the last request */ for (lastreq = req->wb_head; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 20d44ea328b6..0f418d825185 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -271,7 +271,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) unsigned int pos = 0; unsigned int len = nfs_page_length(req->wb_page); - nfs_page_group_lock(req, false); + nfs_page_group_lock(req); do { tmp = nfs_page_group_search_locked(req->wb_head, pos); @@ -480,7 +480,7 @@ try_again: } spin_unlock(&inode->i_lock); - ret = nfs_page_group_lock(head, false); + ret = nfs_page_group_lock(head); if (ret < 0) { nfs_unlock_and_release_request(head); return ERR_PTR(ret); @@ -501,7 +501,7 @@ try_again: nfs_page_group_unlock(head); ret = nfs_wait_on_request(subreq); if (!ret) - ret = nfs_page_group_lock(head, false); + ret = nfs_page_group_lock(head); if (ret < 0) { nfs_unroll_locks(inode, head, subreq); nfs_release_request(subreq); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index de1d24cedaa2..2f4fdafb6746 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,7 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern int nfs_page_group_lock(struct nfs_page *, bool); +extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); -- cgit v1.2.3 From a6b6d5b85abf4914bbceade5dddd54c345c64136 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 15:39:46 -0400 Subject: NFS: Use an atomic_long_t to count the number of requests Rather than forcing us to take the inode->i_lock just in order to bump the number. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/delegation.c | 2 +- fs/nfs/inode.c | 7 +++---- fs/nfs/pagelist.c | 4 +--- fs/nfs/write.c | 18 +++++------------- include/linux/nfs_fs.h | 4 ++-- 6 files changed, 13 insertions(+), 24 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 5427cdf04c5a..14358de173fb 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -51,7 +51,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->nrequests != 0) + if (nfs_have_writebacks(inode)) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d7df5e67b0c1..606dd3871f66 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1089,7 +1089,7 @@ bool nfs4_delegation_flush_on_close(const struct inode *inode) delegation = rcu_dereference(nfsi->delegation); if (delegation == NULL || !(delegation->type & FMODE_WRITE)) goto out; - if (nfsi->nrequests < delegation->pagemod_limit) + if (atomic_long_read(&nfsi->nrequests) < delegation->pagemod_limit) ret = false; out: rcu_read_unlock(); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 34d9ebbc0dfd..0480eb02299a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1285,7 +1285,6 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); unsigned long ret = 0; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) @@ -1315,7 +1314,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->nrequests == 0) { + && !nfs_have_writebacks(inode)) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1823,7 +1822,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if (nfsi->nrequests == 0 || new_isize > cur_isize) { + if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; @@ -2012,7 +2011,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->nrequests = 0; + atomic_long_set(&nfsi->nrequests, 0); nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); init_rwsem(&nfsi->rmdir_sem); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index af6731dd4324..ec97c301899b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -258,9 +258,7 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); - spin_lock(&inode->i_lock); - NFS_I(inode)->nrequests++; - spin_unlock(&inode->i_lock); + atomic_long_inc(&NFS_I(inode)->nrequests); } } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5ab5ca24b48a..08093552f115 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -434,9 +434,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { nfs_release_request(subreq); - spin_lock(&inode->i_lock); - NFS_I(inode)->nrequests--; - spin_unlock(&inode->i_lock); + atomic_long_dec(&NFS_I(inode)->nrequests); } /* subreq is now totally disconnected from page group or any @@ -567,9 +565,7 @@ try_again: if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) { set_bit(PG_INODE_REF, &head->wb_flags); kref_get(&head->wb_kref); - spin_lock(&inode->i_lock); - NFS_I(inode)->nrequests++; - spin_unlock(&inode->i_lock); + atomic_long_inc(&NFS_I(inode)->nrequests); } nfs_page_group_unlock(head); @@ -755,7 +751,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->nrequests && + if (!nfs_have_writebacks(inode) && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; /* @@ -767,7 +763,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->nrequests++; + atomic_long_inc(&nfsi->nrequests); /* this a head request for a page group - mark it as having an * extra reference so sub groups can follow suit. * This flag also informs pgio layer when to bump nrequests when @@ -786,6 +782,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *head; + atomic_long_dec(&nfsi->nrequests); if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { head = req->wb_head; @@ -795,11 +792,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) ClearPagePrivate(head->wb_page); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->nrequests--; - spin_unlock(&inode->i_lock); - } else { - spin_lock(&inode->i_lock); - nfsi->nrequests--; spin_unlock(&inode->i_lock); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 121a702888b4..238fdc4c46df 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -154,7 +154,7 @@ struct nfs_inode { */ __be32 cookieverf[2]; - unsigned long nrequests; + atomic_long_t nrequests; struct nfs_mds_commit_info commit_info; /* Open contexts for shared mmap writes */ @@ -511,7 +511,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data); static inline int nfs_have_writebacks(struct inode *inode) { - return NFS_I(inode)->nrequests != 0; + return atomic_long_read(&NFS_I(inode)->nrequests) != 0; } /* -- cgit v1.2.3 From 2ce209c42c01ca976ad680fea52a8e8b9a53643b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 17:29:29 -0400 Subject: NFS: Wait for requests that are locked on the commit list If a request is on the commit list, but is locked, we will currently skip it, which can lead to livelocking when the commit count doesn't reduce to zero. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ fs/nfs/pnfs_nfs.c | 18 ++++++++++++++---- fs/nfs/write.c | 17 +++++++++++++---- 3 files changed, 29 insertions(+), 8 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ec97c301899b..548ebc7256ff 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -434,6 +434,7 @@ void nfs_release_request(struct nfs_page *req) { kref_put(&req->wb_kref, nfs_page_group_destroy); } +EXPORT_SYMBOL_GPL(nfs_release_request); /** * nfs_wait_on_request - Wait for a request to complete. @@ -452,6 +453,7 @@ nfs_wait_on_request(struct nfs_page *req) return wait_on_bit_io(&req->wb_flags, PG_BUSY, TASK_UNINTERRUPTIBLE); } +EXPORT_SYMBOL_GPL(nfs_wait_on_request); /* * nfs_generic_pg_test - determine if requests can be coalesced diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 4b0a809653d1..303ff171cb5d 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -91,13 +91,23 @@ static int pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max) { - struct nfs_page *req, *tmp; + struct nfs_page *req; int ret = 0; - list_for_each_entry_safe(req, tmp, src, wb_list) { - if (!nfs_lock_request(req)) - continue; + while(!list_empty(src)) { + req = list_first_entry(src, struct nfs_page, wb_list); + kref_get(&req->wb_kref); + if (!nfs_lock_request(req)) { + int status; + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); + status = nfs_wait_on_request(req); + nfs_release_request(req); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); + if (status < 0) + break; + continue; + } nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); nfs_list_add_request(req, dst); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 866702823869..5dd3b212376e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1030,13 +1030,22 @@ int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max) { - struct nfs_page *req, *tmp; + struct nfs_page *req; int ret = 0; - list_for_each_entry_safe(req, tmp, src, wb_list) { - if (!nfs_lock_request(req)) - continue; + while(!list_empty(src)) { + req = list_first_entry(src, struct nfs_page, wb_list); kref_get(&req->wb_kref); + if (!nfs_lock_request(req)) { + int status; + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); + status = nfs_wait_on_request(req); + nfs_release_request(req); + mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); + if (status < 0) + break; + continue; + } nfs_request_remove_commit_list(req, cinfo); nfs_list_add_request(req, dst); ret++; -- cgit v1.2.3