From 23ba98de6dcec665e15c0ca19244379bb0d30932 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 29 Jul 2022 17:01:07 -0400 Subject: nfsd: eliminate the NFSD_FILE_BREAK_* flags We had a report from the spring Bake-a-thon of data corruption in some nfstest_interop tests. Looking at the traces showed the NFS server allowing a v3 WRITE to proceed while a read delegation was still outstanding. Currently, we only set NFSD_FILE_BREAK_* flags if NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc. NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for COMMIT ops, where we need a writeable filehandle but don't need to break read leases. It doesn't make any sense to consult that flag when allocating a file since the file may be used on subsequent calls where we do want to break the lease (and the usage of it here seems to be reverse from what it should be anyway). Also, after calling nfsd_open_break_lease, we don't want to clear the BREAK_* bits. A lease could end up being set on it later (more than once) and we need to be able to break those leases as well. This means that the NFSD_FILE_BREAK_* flags now just mirror NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just drop those flags and unconditionally call nfsd_open_break_lease every time. Reported-by: Olga Kornieskaia Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360 Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd) Cc: # 5.4.x : bb283ca18d1e NFSD: Clean up the show_nf_flags() macro Cc: # 5.4.x Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index a60ead3b227a..081179fb17e8 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -696,8 +696,6 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ - { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) DECLARE_EVENT_CLASS(nfsd_file_class, -- cgit v1.2.3 From 051382885552e12541cc0ebf82092be374a9ed2a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 21 Jun 2022 10:06:23 -0400 Subject: NFSD: Instrument fh_verify() Capture file handles and how they map to local inodes. In particular, NFSv4 PUTFH uses fh_verify() so we can now observe which file handles are the target of OPEN, LOOKUP, RENAME, and so on. Signed-off-by: Chuck Lever --- fs/nfsd/nfsfh.c | 5 +++-- fs/nfsd/trace.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c29baa03dfaf..5e2ed4b2a925 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -331,8 +331,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) struct dentry *dentry; __be32 error; - dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); - if (!fhp->fh_dentry) { error = nfsd_set_fh_dentry(rqstp, fhp); if (error) @@ -340,6 +338,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) } dentry = fhp->fh_dentry; exp = fhp->fh_export; + + trace_nfsd_fh_verify(rqstp, fhp, type, access); + /* * We still have to do all these permission checks, even when * fh_dentry is already set: diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 081179fb17e8..d23ecf7ca9f8 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -171,6 +171,52 @@ TRACE_EVENT(nfsd_compound_encode_err, __entry->opnum, __entry->status) ); +#define show_fs_file_type(x) \ + __print_symbolic(x, \ + { S_IFLNK, "LNK" }, \ + { S_IFREG, "REG" }, \ + { S_IFDIR, "DIR" }, \ + { S_IFCHR, "CHR" }, \ + { S_IFBLK, "BLK" }, \ + { S_IFIFO, "FIFO" }, \ + { S_IFSOCK, "SOCK" }) + +TRACE_EVENT(nfsd_fh_verify, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + int access + ), + TP_ARGS(rqstp, fhp, type, access), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + __field(u32, xid) + __field(u32, fh_hash) + __field(void *, inode) + __field(unsigned long, type) + __field(unsigned long, access) + ), + TP_fast_assign( + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local, + rqstp->rq_xprt->xpt_locallen); + __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->inode = d_inode(fhp->fh_dentry); + __entry->type = type; + __entry->access = access; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x inode=%p type=%s access=%s", + __entry->xid, __entry->fh_hash, __entry->inode, + show_fs_file_type(__entry->type), + show_nfsd_may_flags(__entry->access) + ) +); DECLARE_EVENT_CLASS(nfsd_fh_err_class, TP_PROTO(struct svc_rqst *rqstp, -- cgit v1.2.3 From 94660cc19c75083af046b0f8362e3d3bc2eba21d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:38 -0400 Subject: NFSD: Report the number of items evicted by the LRU walk Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 13 ++++++++++--- fs/nfsd/trace.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 029fb4cfa224..e21dcc5e90ab 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -46,6 +46,7 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); +static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { struct work_struct work; @@ -446,6 +447,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, goto out_skip; list_lru_isolate_move(lru, &nf->nf_lru, head); + this_cpu_inc(nfsd_file_evictions); return LRU_REMOVED; out_skip: return LRU_SKIP; @@ -476,9 +478,11 @@ static void nfsd_file_gc(void) { LIST_HEAD(dispose); + unsigned long ret; - list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, - &dispose, LONG_MAX); + ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, + &dispose, LONG_MAX); + trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_gc_dispose_list(&dispose); } @@ -503,6 +507,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); + trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_gc_dispose_list(&dispose); return ret; } @@ -1065,7 +1070,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long hits = 0, acquisitions = 0, releases = 0; + unsigned long hits = 0, acquisitions = 0, releases = 0, evictions = 0; unsigned int i, count = 0, longest = 0; unsigned long lru = 0, total_age = 0; @@ -1089,6 +1094,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) acquisitions += per_cpu(nfsd_file_acquisitions, i); releases += per_cpu(nfsd_file_releases, i); total_age += per_cpu(nfsd_file_total_age, i); + evictions += per_cpu(nfsd_file_evictions, i); } seq_printf(m, "total entries: %u\n", count); @@ -1097,6 +1103,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); seq_printf(m, "releases: %lu\n", releases); + seq_printf(m, "evictions: %lu\n", evictions); if (releases) seq_printf(m, "mean age (ms): %ld\n", total_age / releases); else diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index d23ecf7ca9f8..22f57fe7a6f7 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -895,6 +895,35 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->nlink, __entry->mode, __entry->mask) ); +DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, + TP_PROTO( + unsigned long removed, + unsigned long remaining + ), + TP_ARGS(removed, remaining), + TP_STRUCT__entry( + __field(unsigned long, removed) + __field(unsigned long, remaining) + ), + TP_fast_assign( + __entry->removed = removed; + __entry->remaining = remaining; + ), + TP_printk("%lu entries removed, %lu remaining", + __entry->removed, __entry->remaining) +); + +#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name) \ +DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ + TP_PROTO( \ + unsigned long removed, \ + unsigned long remaining \ + ), \ + TP_ARGS(removed, remaining)) + +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); + #include "cache.h" TRACE_DEFINE_ENUM(RC_DROPIT); -- cgit v1.2.3 From c46203acddd9b9200dbc53d0603c97355fd3a03b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:11 -0400 Subject: NFSD: Trace filecache LRU activity Observe the operation of garbage collection and the lifetime of filecache items. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 44 +++++++++++++++++++++++++++++++------------- fs/nfsd/trace.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 13 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f58685f1b120..e831a7b4f916 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -261,6 +261,18 @@ nfsd_file_flush(struct nfsd_file *nf) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } +static void nfsd_file_lru_add(struct nfsd_file *nf) +{ + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_add(nf); +} + +static void nfsd_file_lru_remove(struct nfsd_file *nf) +{ + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_del(nf); +} + static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -280,8 +292,7 @@ nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_do_unhash(nf); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); + nfsd_file_lru_remove(nf); return true; } return false; @@ -444,27 +455,34 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * counter. Here we check the counter and then test and clear the flag. * That order is deliberate to ensure that we can do this locklessly. */ - if (refcount_read(&nf->nf_ref) > 1) - goto out_skip; + if (refcount_read(&nf->nf_ref) > 1) { + trace_nfsd_file_gc_in_use(nf); + return LRU_SKIP; + } /* * Don't throw out files that are still undergoing I/O or * that have uncleared errors pending. */ - if (nfsd_file_check_writeback(nf)) - goto out_skip; + if (nfsd_file_check_writeback(nf)) { + trace_nfsd_file_gc_writeback(nf); + return LRU_SKIP; + } - if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_skip; + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { + trace_nfsd_file_gc_referenced(nf); + return LRU_SKIP; + } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - goto out_skip; + if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + trace_nfsd_file_gc_hashed(nf); + return LRU_SKIP; + } list_lru_isolate_move(lru, &nf->nf_lru, head); this_cpu_inc(nfsd_file_evictions); + trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; -out_skip: - return LRU_SKIP; } /* @@ -1017,7 +1035,7 @@ open_file: refcount_inc(&nf->nf_ref); __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - list_lru_add(&nfsd_file_lru, &nf->nf_lru); + nfsd_file_lru_add(nf); hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); ++nfsd_file_hashtbl[hashval].nfb_count; nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 22f57fe7a6f7..64a6c577d7b9 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -895,6 +895,45 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->nlink, __entry->mode, __entry->mask) ); +DECLARE_EVENT_CLASS(nfsd_file_gc_class, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(void *, nf_inode) + __field(void *, nf_file) + __field(int, nf_ref) + __field(unsigned long, nf_flags) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + ), + TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + __entry->nf_file + ) +); + +#define DEFINE_NFSD_FILE_GC_EVENT(name) \ +DEFINE_EVENT(nfsd_file_gc_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf \ + ), \ + TP_ARGS(nf)) + +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); + DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, TP_PROTO( unsigned long removed, -- cgit v1.2.3 From 4a0e73e635e3f36b616ad5c943e3d23debe4632f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:17 -0400 Subject: NFSD: Leave open files out of the filecache LRU There have been reports of problems when running fstests generic/531 against Linux NFS servers with NFSv4. The NFS server that hosts the test's SCRATCH_DEV suffers from CPU soft lock-ups during the test. Analysis shows that: fs/nfsd/filecache.c 482 ret = list_lru_walk(&nfsd_file_lru, 483 nfsd_file_lru_cb, 484 &head, LONG_MAX); causes nfsd_file_gc() to walk the entire length of the filecache LRU list every time it is called (which is quite frequently). The walk holds a spinlock the entire time that prevents other nfsd threads from accessing the filecache. What's more, for NFSv4 workloads, none of the items that are visited during this walk may be evicted, since they are all files that are held OPEN by NFS clients. Address this by ensuring that open files are not kept on the LRU list. Reported-by: Frank van der Linden Reported-by: Wang Yugui Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=386 Suggested-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 24 +++++++++++++++++++----- fs/nfsd/trace.h | 2 ++ 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e831a7b4f916..69d1ab6cfc14 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -263,6 +263,7 @@ nfsd_file_flush(struct nfsd_file *nf) static void nfsd_file_lru_add(struct nfsd_file *nf) { + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) trace_nfsd_file_lru_add(nf); } @@ -292,7 +293,6 @@ nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_do_unhash(nf); - nfsd_file_lru_remove(nf); return true; } return false; @@ -313,6 +313,7 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp if (refcount_dec_not_one(&nf->nf_ref)) return true; + nfsd_file_lru_remove(nf); list_add(&nf->nf_lru, dispose); return true; } @@ -324,6 +325,7 @@ nfsd_file_put_noref(struct nfsd_file *nf) if (refcount_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); + nfsd_file_lru_remove(nf); nfsd_file_free(nf); } } @@ -333,7 +335,7 @@ nfsd_file_put(struct nfsd_file *nf) { might_sleep(); - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + nfsd_file_lru_add(nf); if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); @@ -433,8 +435,18 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) } } -/* +/** + * nfsd_file_lru_cb - Examine an entry on the LRU list + * @item: LRU entry to examine + * @lru: controlling LRU + * @lock: LRU list lock (unused) + * @arg: dispose list + * * Note this can deadlock with nfsd_file_cache_purge. + * + * Return values: + * %LRU_REMOVED: @item was removed from the LRU + * %LRU_SKIP: @item cannot be evicted */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, @@ -456,8 +468,9 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * That order is deliberate to ensure that we can do this locklessly. */ if (refcount_read(&nf->nf_ref) > 1) { + list_lru_isolate(lru, &nf->nf_lru); trace_nfsd_file_gc_in_use(nf); - return LRU_SKIP; + return LRU_REMOVED; } /* @@ -1014,6 +1027,7 @@ wait_for_construction: goto retry; } + nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); @@ -1035,7 +1049,6 @@ open_file: refcount_inc(&nf->nf_ref); __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - nfsd_file_lru_add(nf); hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); ++nfsd_file_hashtbl[hashval].nfb_count; nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, @@ -1060,6 +1073,7 @@ open_file: */ if (status != nfs_ok || inode->i_nlink == 0) { bool do_free; + nfsd_file_lru_remove(nf); spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); do_free = nfsd_file_unhash(nf); spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 64a6c577d7b9..9507a6c8eeee 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -927,7 +927,9 @@ DEFINE_EVENT(nfsd_file_gc_class, name, \ TP_ARGS(nf)) DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); -- cgit v1.2.3 From 54f7df7094b329ca35d9f9808692bb16c48b13e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:37 -0400 Subject: NFSD: No longer record nf_hashval in the trace log I'm about to replace nfsd_file_hashtbl with an rhashtable. The individual hash values will no longer be visible or relevant, so remove them from the tracepoints. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 15 ++++++++------- fs/nfsd/trace.h | 45 +++++++++++++++++++++------------------------ 2 files changed, 29 insertions(+), 31 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 23be87dac1a1..4de27c0f9ec9 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -589,7 +589,7 @@ nfsd_file_close_inode_sync(struct inode *inode) LIST_HEAD(dispose); __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); + trace_nfsd_file_close_inode_sync(inode, !list_empty(&dispose)); nfsd_file_dispose_list_sync(&dispose); } @@ -609,7 +609,7 @@ nfsd_file_close_inode(struct inode *inode) LIST_HEAD(dispose); __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); + trace_nfsd_file_close_inode(inode, !list_empty(&dispose)); nfsd_file_dispose_list_delayed(&dispose); } @@ -963,7 +963,7 @@ nfsd_file_is_cached(struct inode *inode) } } rcu_read_unlock(); - trace_nfsd_file_is_cached(inode, hashval, (int)ret); + trace_nfsd_file_is_cached(inode, (int)ret); return ret; } @@ -995,9 +995,8 @@ retry: new = nfsd_file_alloc(inode, may_flags, hashval, net); if (!new) { - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, - NULL, nfserr_jukebox); - return nfserr_jukebox; + status = nfserr_jukebox; + goto out_status; } spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); @@ -1035,8 +1034,10 @@ out: nf = NULL; } - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); +out_status: + trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); return status; + open_file: nf = new; /* Take reference for the hashtable */ diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 9507a6c8eeee..9a146d71ad9e 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -748,7 +748,6 @@ DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), TP_ARGS(nf), TP_STRUCT__entry( - __field(unsigned int, nf_hashval) __field(void *, nf_inode) __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -756,15 +755,13 @@ DECLARE_EVENT_CLASS(nfsd_file_class, __field(struct file *, nf_file) ), TP_fast_assign( - __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p", - __entry->nf_hashval, + TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p", __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), @@ -784,15 +781,18 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_put); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); TRACE_EVENT(nfsd_file_acquire, - TP_PROTO(struct svc_rqst *rqstp, unsigned int hash, - struct inode *inode, unsigned int may_flags, - struct nfsd_file *nf, __be32 status), + TP_PROTO( + struct svc_rqst *rqstp, + struct inode *inode, + unsigned int may_flags, + struct nfsd_file *nf, + __be32 status + ), - TP_ARGS(rqstp, hash, inode, may_flags, nf, status), + TP_ARGS(rqstp, inode, may_flags, nf, status), TP_STRUCT__entry( __field(u32, xid) - __field(unsigned int, hash) __field(void *, inode) __field(unsigned long, may_flags) __field(int, nf_ref) @@ -804,7 +804,6 @@ TRACE_EVENT(nfsd_file_acquire, TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; @@ -814,8 +813,8 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x hash=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", - __entry->xid, __entry->hash, __entry->inode, + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", + __entry->xid, __entry->inode, show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nfsd_may_flags(__entry->nf_may), @@ -826,7 +825,6 @@ TRACE_EVENT(nfsd_file_open, TP_PROTO(struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( - __field(unsigned int, nf_hashval) __field(void *, nf_inode) /* cannot be dereferenced */ __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -834,15 +832,13 @@ TRACE_EVENT(nfsd_file_open, __field(void *, nf_file) /* cannot be dereferenced */ ), TP_fast_assign( - __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p", - __entry->nf_hashval, + TP_printk("inode=%p ref=%d flags=%s may=%s file=%p", __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), @@ -851,26 +847,27 @@ TRACE_EVENT(nfsd_file_open, ) DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO(struct inode *inode, unsigned int hash, int found), - TP_ARGS(inode, hash, found), + TP_PROTO( + struct inode *inode, + int found + ), + TP_ARGS(inode, found), TP_STRUCT__entry( __field(struct inode *, inode) - __field(unsigned int, hash) __field(int, found) ), TP_fast_assign( __entry->inode = inode; - __entry->hash = hash; __entry->found = found; ), - TP_printk("hash=0x%x inode=%p found=%d", __entry->hash, - __entry->inode, __entry->found) + TP_printk("inode=%p found=%d", + __entry->inode, __entry->found) ); #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO(struct inode *inode, unsigned int hash, int found), \ - TP_ARGS(inode, hash, found)) + TP_PROTO(struct inode *inode, int found), \ + TP_ARGS(inode, found)) DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); -- cgit v1.2.3 From a845511007a63467fee575353c706806c21218b1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:57 -0400 Subject: NFSD: Refactor __nfsd_file_close_inode() The code that computes the hashval is the same in both callers. To prevent them from going stale, reframe the documenting comments to remove descriptions of the underlying hash table structure, which is about to be replaced. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 40 +++++++++++++++++++++------------------- fs/nfsd/trace.h | 44 +++++++++++++++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 30 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a9034d87f006..1ff58978421c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -559,39 +559,44 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -static void -__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, - struct list_head *dispose) +/* + * Find all cache items across all net namespaces that match @inode and + * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). + */ +static unsigned int +__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) { + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, + NFSD_FILE_HASH_BITS); + unsigned int count = 0; struct nfsd_file *nf; struct hlist_node *tmp; spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { - if (inode == nf->nf_inode) + if (inode == nf->nf_inode) { nfsd_file_unhash_and_release_locked(nf, dispose); + count++; + } } spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + return count; } /** * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. Also ensure that any of the - * fputs also have their final __fput done as well. + * Unhash and put, then flush and fput all cache items associated with @inode. */ void nfsd_file_close_inode_sync(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode_sync(inode, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode_sync(inode, count); nfsd_file_dispose_list_sync(&dispose); } @@ -599,19 +604,16 @@ nfsd_file_close_inode_sync(struct inode *inode) * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. + * Unhash and put all cache item associated with @inode. */ static void nfsd_file_close_inode(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode(inode, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode(inode, count); nfsd_file_dispose_list_delayed(&dispose); } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 9a146d71ad9e..79bf985bfc7d 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -848,30 +848,52 @@ TRACE_EVENT(nfsd_file_open, DECLARE_EVENT_CLASS(nfsd_file_search_class, TP_PROTO( - struct inode *inode, - int found + const struct inode *inode, + unsigned int count ), - TP_ARGS(inode, found), + TP_ARGS(inode, count), TP_STRUCT__entry( - __field(struct inode *, inode) - __field(int, found) + __field(const struct inode *, inode) + __field(unsigned int, count) ), TP_fast_assign( __entry->inode = inode; - __entry->found = found; + __entry->count = count; ), - TP_printk("inode=%p found=%d", - __entry->inode, __entry->found) + TP_printk("inode=%p count=%u", + __entry->inode, __entry->count) ); #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO(struct inode *inode, int found), \ - TP_ARGS(inode, found)) + TP_PROTO( \ + const struct inode *inode, \ + unsigned int count \ + ), \ + TP_ARGS(inode, count)) DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); + +TRACE_EVENT(nfsd_file_is_cached, + TP_PROTO( + const struct inode *inode, + int found + ), + TP_ARGS(inode, found), + TP_STRUCT__entry( + __field(const struct inode *, inode) + __field(int, found) + ), + TP_fast_assign( + __entry->inode = inode; + __entry->found = found; + ), + TP_printk("inode=%p is %scached", + __entry->inode, + __entry->found ? "" : "not " + ) +); TRACE_EVENT(nfsd_file_fsnotify_handle_event, TP_PROTO(struct inode *inode, u32 mask), -- cgit v1.2.3 From ce502f81ba884c1fe45dc0ebddbcaaa4ec0fc5fb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:30 -0400 Subject: NFSD: Convert the filecache to use rhashtable Enable the filecache hash table to start small, then grow with the workload. Smaller server deployments benefit because there should be lower memory utilization. Larger server deployments should see improved scaling with the number of open files. Suggested-by: Jeff Layton Suggested-by: Dave Chinner Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 265 +++++++++++++++++++++++----------------------------- fs/nfsd/trace.h | 63 ++++++++++++- 2 files changed, 179 insertions(+), 149 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a5ae5b4620d0..aed3cb6bf865 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -62,7 +62,6 @@ static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; -static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; static struct rhashtable nfsd_file_rhash_tbl ____cacheline_aligned_in_smp; @@ -198,7 +197,7 @@ static const struct rhashtable_params nfsd_file_rhash_params = { static void nfsd_file_schedule_laundrette(void) { - if ((atomic_long_read(&nfsd_filecache_count) == 0) || + if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) return; @@ -298,7 +297,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) } static struct nfsd_file * -nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net) +nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) { struct nfsd_file *nf; @@ -309,11 +308,14 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net) nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); - nf->nf_net = net; + nf->nf_net = key->net; nf->nf_flags = 0; - nf->nf_inode = inode; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = may & NFSD_FILE_MAY_MASK; + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); + __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + nf->nf_inode = key->inode; + /* nf_ref is pre-incremented for hash table */ + refcount_set(&nf->nf_ref, 2); + nf->nf_may = key->need; nf->nf_mark = NULL; trace_nfsd_file_alloc(nf); } @@ -399,40 +401,21 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf) } static void -nfsd_file_do_unhash(struct nfsd_file *nf) +nfsd_file_hash_remove(struct nfsd_file *nf) { - struct inode *inode = nf->nf_inode; - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); - - lockdep_assert_held(&nfsd_file_hashtbl[hashval].nfb_lock); - trace_nfsd_file_unhash(nf); if (nfsd_file_check_write_error(nf)) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); - --nfsd_file_hashtbl[hashval].nfb_count; - hlist_del_rcu(&nf->nf_node); - atomic_long_dec(&nfsd_filecache_count); -} - -static void -nfsd_file_hash_remove(struct nfsd_file *nf) -{ - struct inode *inode = nf->nf_inode; - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); - - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - nfsd_file_do_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, + nfsd_file_rhash_params); } static bool nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_do_unhash(nf); + nfsd_file_hash_remove(nf); return true; } return false; @@ -442,9 +425,9 @@ nfsd_file_unhash(struct nfsd_file *nf) * Return true if the file was unhashed. */ static bool -nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) +nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) { - trace_nfsd_file_unhash_and_release_locked(nf); + trace_nfsd_file_unhash_and_dispose(nf); if (!nfsd_file_unhash(nf)) return false; /* keep final reference for nfsd_file_lru_dispose */ @@ -703,20 +686,23 @@ static struct shrinker nfsd_file_shrinker = { static unsigned int __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); - unsigned int count = 0; - struct nfsd_file *nf; - struct hlist_node *tmp; - - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { - if (inode == nf->nf_inode) { - nfsd_file_unhash_and_release_locked(nf, dispose); - count++; - } - } - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + unsigned int count = 0; + struct nfsd_file *nf; + + rcu_read_lock(); + do { + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (!nf) + break; + nfsd_file_unhash_and_dispose(nf, dispose); + count++; + } while (1); + rcu_read_unlock(); return count; } @@ -924,30 +910,35 @@ out_err: static void __nfsd_file_cache_purge(struct net *net) { - unsigned int i; - struct nfsd_file *nf; - struct hlist_node *next; + struct rhashtable_iter iter; + struct nfsd_file *nf; LIST_HEAD(dispose); bool del; - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; + rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); + do { + rhashtable_walk_start(&iter); - spin_lock(&nfb->nfb_lock); - hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { + nf = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(nf)) { if (net && nf->nf_net != net) continue; - del = nfsd_file_unhash_and_release_locked(nf, &dispose); + del = nfsd_file_unhash_and_dispose(nf, &dispose); /* * Deadlock detected! Something marked this entry as * unhased, but hasn't removed it from the hash list. */ WARN_ON_ONCE(!del); + + nf = rhashtable_walk_next(&iter); } - spin_unlock(&nfb->nfb_lock); - nfsd_file_dispose_list(&dispose); - } + + rhashtable_walk_stop(&iter); + } while (nf == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); + + nfsd_file_dispose_list(&dispose); } static struct nfsd_fcache_disposal * @@ -1052,56 +1043,29 @@ nfsd_file_cache_shutdown(void) } } -static struct nfsd_file * -nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, - unsigned int hashval, struct net *net) -{ - struct nfsd_file *nf; - unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { - if (nf->nf_may != need) - continue; - if (nf->nf_inode != inode) - continue; - if (nf->nf_net != net) - continue; - if (!nfsd_match_cred(nf->nf_cred, current_cred())) - continue; - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - continue; - if (nfsd_file_get(nf) != NULL) - return nf; - } - return NULL; -} - /** - * nfsd_file_is_cached - are there any cached open files for this fh? - * @inode: inode of the file to check + * nfsd_file_is_cached - are there any cached open files for this inode? + * @inode: inode to check + * + * The lookup matches inodes in all net namespaces and is atomic wrt + * nfsd_file_acquire(). * - * Scan the hashtable for open files that match this fh. Returns true if there - * are any, and false if not. + * Return values: + * %true: filecache contains at least one file matching this inode + * %false: filecache contains no files matching this inode */ bool nfsd_file_is_cached(struct inode *inode) { - bool ret = false; - struct nfsd_file *nf; - unsigned int hashval; - - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); - - rcu_read_lock(); - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node) { - if (inode == nf->nf_inode) { - ret = true; - break; - } - } - rcu_read_unlock(); + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + bool ret = false; + + if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params) != NULL) + ret = true; trace_nfsd_file_is_cached(inode, (int)ret); return ret; } @@ -1110,39 +1074,51 @@ static __be32 nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf, bool open) { - __be32 status; - struct net *net = SVC_NET(rqstp); + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_FULL, + .need = may_flags & NFSD_FILE_MAY_MASK, + .net = SVC_NET(rqstp), + }; struct nfsd_file *nf, *new; - struct inode *inode; - unsigned int hashval; bool retry = true; + __be32 status; - /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; + key.inode = d_inode(fhp->fh_dentry); + key.cred = get_current_cred(); - inode = d_inode(fhp->fh_dentry); - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); retry: - rcu_read_lock(); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); - rcu_read_unlock(); + /* Avoid allocation if the item is already in cache */ + nf = rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (nf) + nf = nfsd_file_get(nf); if (nf) goto wait_for_construction; - new = nfsd_file_alloc(inode, may_flags, net); + new = nfsd_file_alloc(&key, may_flags); if (!new) { status = nfserr_jukebox; goto out_status; } - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); - if (nf == NULL) + nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl, + &key, &new->nf_rhash, + nfsd_file_rhash_params); + if (!nf) { + nf = new; + goto open_file; + } + if (IS_ERR(nf)) + goto insert_err; + nf = nfsd_file_get(nf); + if (nf == NULL) { + nf = new; goto open_file; - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + } nfsd_file_slab_free(&new->nf_rcu); wait_for_construction: @@ -1150,6 +1126,7 @@ wait_for_construction: /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); if (!retry) { status = nfserr_jukebox; goto out; @@ -1174,22 +1151,11 @@ out: } out_status: - trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); + put_cred(key.cred); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: - nf = new; - /* Take reference for the hashtable */ - refcount_inc(&nf->nf_ref); - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); - ++nfsd_file_hashtbl[hashval].nfb_count; - nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, - nfsd_file_hashtbl[hashval].nfb_count); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - atomic_long_inc(&nfsd_filecache_count); - nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) { if (open) { @@ -1204,19 +1170,20 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || inode->i_nlink == 0) { - bool do_free; - nfsd_file_lru_remove(nf); - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - do_free = nfsd_file_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (do_free) + if (status != nfs_ok || key.inode->i_nlink == 0) + if (nfsd_file_unhash(nf)) nfsd_file_put_noref(nf); - } clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); goto out; + +insert_err: + nfsd_file_slab_free(&new->nf_rcu); + trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, PTR_ERR(nf)); + nf = NULL; + status = nfserr_jukebox; + goto out_status; } /** @@ -1262,21 +1229,23 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { unsigned long releases = 0, pages_flushed = 0, evictions = 0; unsigned long hits = 0, acquisitions = 0; - unsigned int i, count = 0, longest = 0; + unsigned int i, count = 0, buckets = 0; unsigned long lru = 0, total_age = 0; - /* - * No need for spinlocks here since we're not terribly interested in - * accuracy. We do take the nfsd_mutex simply to ensure that we - * don't end up racing with server shutdown - */ + /* Serialize with server shutdown */ mutex_lock(&nfsd_mutex); if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - count += nfsd_file_hashtbl[i].nfb_count; - longest = max(longest, nfsd_file_hashtbl[i].nfb_count); - } + struct bucket_table *tbl; + struct rhashtable *ht; + lru = list_lru_count(&nfsd_file_lru); + + rcu_read_lock(); + ht = &nfsd_file_rhash_tbl; + count = atomic_read(&ht->nelems); + tbl = rht_dereference_rcu(ht->tbl, ht); + buckets = tbl->size; + rcu_read_unlock(); } mutex_unlock(&nfsd_mutex); @@ -1290,7 +1259,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) } seq_printf(m, "total entries: %u\n", count); - seq_printf(m, "longest chain: %u\n", longest); + seq_printf(m, "hash buckets: %u\n", buckets); seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 79bf985bfc7d..5d634361debf 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -778,7 +778,7 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); TRACE_EVENT(nfsd_file_acquire, TP_PROTO( @@ -821,6 +821,67 @@ TRACE_EVENT(nfsd_file_acquire, __entry->nf_file, __entry->status) ); +TRACE_EVENT(nfsd_file_insert_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + long error + ), + TP_ARGS(rqstp, inode, may_flags, error), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(long, error) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->error = error; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->error + ) +); + +TRACE_EVENT(nfsd_file_cons_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + const struct nfsd_file *nf + ), + TP_ARGS(rqstp, inode, may_flags, nf), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(unsigned int, nf_ref) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(const void *, nf_file) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) +); + TRACE_EVENT(nfsd_file_open, TP_PROTO(struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), -- cgit v1.2.3 From be0230069fcbf7d332d010b57c1d0cfd623a84d6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:43 -0400 Subject: NFSD: Separate tracepoints for acquire and create These tracepoints collect different information: the create case does not open a file, so there's no nf_file available. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 9 +++++---- fs/nfsd/nfs4state.c | 1 + fs/nfsd/trace.h | 54 +++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 52 insertions(+), 12 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index bfa4df7daff3..df54f882752c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1040,7 +1040,7 @@ nfsd_file_is_cached(struct inode *inode) } static __be32 -nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, +nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf, bool open) { struct nfsd_file_lookup_key key = { @@ -1121,7 +1121,8 @@ out: out_status: put_cred(key.cred); - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + if (open) + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: @@ -1169,7 +1170,7 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); } /** @@ -1186,7 +1187,7 @@ __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9409a0dc1b76..3a05c095dfe5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5104,6 +5104,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, goto out_put_access; nf->nf_file = open->op_filp; open->op_filp = NULL; + trace_nfsd_file_create(rqstp, access, nf); } spin_lock(&fp->fi_lock); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 5d634361debf..6bbfb987271d 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -782,10 +782,10 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); TRACE_EVENT(nfsd_file_acquire, TP_PROTO( - struct svc_rqst *rqstp, - struct inode *inode, + const struct svc_rqst *rqstp, + const struct inode *inode, unsigned int may_flags, - struct nfsd_file *nf, + const struct nfsd_file *nf, __be32 status ), @@ -793,12 +793,12 @@ TRACE_EVENT(nfsd_file_acquire, TP_STRUCT__entry( __field(u32, xid) - __field(void *, inode) + __field(const void *, inode) __field(unsigned long, may_flags) - __field(int, nf_ref) + __field(unsigned int, nf_ref) __field(unsigned long, nf_flags) __field(unsigned long, nf_may) - __field(struct file *, nf_file) + __field(const void *, nf_file) __field(u32, status) ), @@ -813,12 +813,50 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u", __entry->xid, __entry->inode, show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nfsd_may_flags(__entry->nf_may), - __entry->nf_file, __entry->status) + __entry->nf_file, __entry->status + ) +); + +TRACE_EVENT(nfsd_file_create, + TP_PROTO( + const struct svc_rqst *rqstp, + unsigned int may_flags, + const struct nfsd_file *nf + ), + + TP_ARGS(rqstp, may_flags, nf), + + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(const void *, nf_file) + __field(unsigned long, may_flags) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->may_flags = may_flags; + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + ), + + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->nf_inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->nf_ref, show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) ); TRACE_EVENT(nfsd_file_insert_err, -- cgit v1.2.3 From b40a2839470cd62ed68c4a32d72a18ee8975b1ac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:49 -0400 Subject: NFSD: Move nfsd_file_trace_alloc() tracepoint Avoid recording the allocation of an nfsd_file item that is immediately released because a matching item was already inserted in the hash. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 2 +- fs/nfsd/trace.h | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'fs/nfsd/trace.h') diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index df54f882752c..ac38b41c89c6 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -303,7 +303,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) refcount_set(&nf->nf_ref, 2); nf->nf_may = key->need; nf->nf_mark = NULL; - trace_nfsd_file_alloc(nf); } return nf; } @@ -1126,6 +1125,7 @@ out_status: return status; open_file: + trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) { if (open) { diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 6bbfb987271d..9ebd67d461f9 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -774,12 +774,35 @@ DEFINE_EVENT(nfsd_file_class, name, \ TP_PROTO(struct nfsd_file *nf), \ TP_ARGS(nf)) -DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); +TRACE_EVENT(nfsd_file_alloc, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_flags = nf->nf_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_may = nf->nf_may; + ), + TP_printk("inode=%p ref=%u flags=%s may=%s", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may) + ) +); + TRACE_EVENT(nfsd_file_acquire, TP_PROTO( const struct svc_rqst *rqstp, -- cgit v1.2.3