From 182d919b84902eece162c63ed3d476c8016b4197 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Feb 2015 23:47:31 +0000 Subject: FS-Cache: Count culled objects and objects rejected due to lack of space Count the number of objects that get culled by the cache backend and the number of objects that the cache backend declines to instantiate due to lack of space in the cache. These numbers are made available through /proc/fs/fscache/stats Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- Documentation/filesystems/caching/backend-api.txt | 23 +++++++++++++++++++++++ Documentation/filesystems/caching/fscache.txt | 4 ++++ 2 files changed, 27 insertions(+) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index 277d1e810670..c0bd5677271b 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -676,6 +676,29 @@ FS-Cache provides some utilities that a cache backend may make use of: as possible. + (*) Indicate that a stale object was found and discarded: + + void fscache_object_retrying_stale(struct fscache_object *object); + + This is called to indicate that the lookup procedure found an object in + the cache that the netfs decided was stale. The object has been + discarded from the cache and the lookup will be performed again. + + + (*) Indicate that the caching backend killed an object: + + void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why); + + This is called to indicate that the cache backend preemptively killed an + object. The why parameter should be set to indicate the reason: + + FSCACHE_OBJECT_IS_STALE - the object was stale and needs discarding. + FSCACHE_OBJECT_NO_SPACE - there was insufficient cache space + FSCACHE_OBJECT_WAS_RETIRED - the object was retired when relinquished. + FSCACHE_OBJECT_WAS_CULLED - the object was culled to make space. + + (*) Get and release references on a retrieval record: void fscache_get_retrieval(struct fscache_retrieval *op); diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 770267af5b3e..66fa7fbccfa4 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -303,6 +303,10 @@ proc files. wrp=N Number of in-progress write_page() cache ops ucp=N Number of in-progress uncache_page() cache ops dsp=N Number of in-progress dissociate_pages() cache ops + CacheEv nsp=N Number of object lookups/creations rejected due to lack of space + stl=N Number of stale objects deleted + rtr=N Number of objects retired when relinquished + cul=N Number of objects culled (*) /proc/fs/fscache/histogram -- cgit v1.2.3 From 03cdd0e4b9a98ae995b81cd8f58e992ec3f44ae2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Feb 2015 13:21:15 +0000 Subject: FS-Cache: Count the number of initialised operations Count and display through /proc/fs/fscache/stats the number of initialised operations. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- Documentation/filesystems/caching/fscache.txt | 3 ++- fs/fscache/internal.h | 1 + fs/fscache/operation.c | 1 + fs/fscache/stats.c | 4 +++- 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 66fa7fbccfa4..50f0a5757f48 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -284,8 +284,9 @@ proc files. enq=N Number of times async ops queued for processing can=N Number of async ops cancelled rej=N Number of async ops rejected due to object lookup/create failure + ini=N Number of async ops initialised dfr=N Number of async ops queued for deferred release - rel=N Number of async ops released + rel=N Number of async ops released (should equal ini=N when idle) gc=N Number of deferred-release async ops garbage collected CacheOp alo=N Number of in-progress alloc_object() cache ops luo=N Number of in-progress lookup_object() cache ops diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 87c4544ec912..a63225116db6 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -165,6 +165,7 @@ extern atomic_t fscache_n_op_pend; extern atomic_t fscache_n_op_run; extern atomic_t fscache_n_op_enqueue; extern atomic_t fscache_n_op_deferred_release; +extern atomic_t fscache_n_op_initialised; extern atomic_t fscache_n_op_release; extern atomic_t fscache_n_op_gc; extern atomic_t fscache_n_op_cancelled; diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 61a6e78b85fa..9761df4fc2ab 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -39,6 +39,7 @@ void fscache_operation_init(struct fscache_operation *op, op->processor = processor; op->release = release; INIT_LIST_HEAD(&op->pend_link); + fscache_stat(&fscache_n_op_initialised); } EXPORT_SYMBOL(fscache_operation_init); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 3a722e8f2307..7cfa0aacdf6d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -23,6 +23,7 @@ atomic_t fscache_n_op_run; atomic_t fscache_n_op_enqueue; atomic_t fscache_n_op_requeue; atomic_t fscache_n_op_deferred_release; +atomic_t fscache_n_op_initialised; atomic_t fscache_n_op_release; atomic_t fscache_n_op_gc; atomic_t fscache_n_op_cancelled; @@ -251,7 +252,8 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_op_enqueue), atomic_read(&fscache_n_op_cancelled), atomic_read(&fscache_n_op_rejected)); - seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", + seq_printf(m, "Ops : ini=%u dfr=%u rel=%u gc=%u\n", + atomic_read(&fscache_n_op_initialised), atomic_read(&fscache_n_op_deferred_release), atomic_read(&fscache_n_op_release), atomic_read(&fscache_n_op_gc)); -- cgit v1.2.3 From 8a81252b774b53e628a8a0fe18e2b8fc236d92cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jun 2015 15:54:08 +0200 Subject: fs/file.c: don't acquire files->file_lock in fd_install() Mateusz Guzik reported : Currently obtaining a new file descriptor results in locking fdtable twice - once in order to reserve a slot and second time to fill it. Holding the spinlock in __fd_install() is needed in case a resize is done, or to prevent a resize. Mateusz provided an RFC patch and a micro benchmark : http://people.redhat.com/~mguzik/pipebench.c A resize is an unlikely operation in a process lifetime, as table size is at least doubled at every resize. We can use RCU instead of the spinlock. __fd_install() must wait if a resize is in progress. The resize must block new __fd_install() callers from starting, and wait that ongoing install are finished (synchronize_sched()) resize should be attempted by a single thread to not waste resources. rcu_sched variant is used, as __fd_install() and expand_fdtable() run from process context. It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R) CPU E5-2696 v2 @ 2.50GHz Signed-off-by: Eric Dumazet Reported-by: Mateusz Guzik Acked-by: Mateusz Guzik Tested-by: Mateusz Guzik Signed-off-by: Al Viro --- Documentation/filesystems/porting | 4 +++ fs/file.c | 67 ++++++++++++++++++++++++++++----------- include/linux/fdtable.h | 3 ++ 3 files changed, 55 insertions(+), 19 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 3eae250254d5..ec5456113072 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -500,3 +500,7 @@ in your dentry operations instead. dentry, it does not get nameidata at all and it gets called only when cookie is non-NULL. Note that link body isn't available anymore, so if you need it, store it as cookie. +-- +[mandatory] + __fd_install() & fd_install() can now sleep. Callers should not + hold a spinlock or other resources that do not allow a schedule. diff --git a/fs/file.c b/fs/file.c index 93c5f89c248b..3d2eb4c542a4 100644 --- a/fs/file.c +++ b/fs/file.c @@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr) spin_unlock(&files->file_lock); new_fdt = alloc_fdtable(nr); + + /* make sure all __fd_install() have seen resize_in_progress + * or have finished their rcu_read_lock_sched() section. + */ + if (atomic_read(&files->count) > 1) + synchronize_sched(); + spin_lock(&files->file_lock); if (!new_fdt) return -ENOMEM; @@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr) __free_fdtable(new_fdt); return -EMFILE; } - /* - * Check again since another task may have expanded the fd table while - * we dropped the lock - */ cur_fdt = files_fdtable(files); - if (nr >= cur_fdt->max_fds) { - /* Continue as planned */ - copy_fdtable(new_fdt, cur_fdt); - rcu_assign_pointer(files->fdt, new_fdt); - if (cur_fdt != &files->fdtab) - call_rcu(&cur_fdt->rcu, free_fdtable_rcu); - } else { - /* Somebody else expanded, so undo our attempt */ - __free_fdtable(new_fdt); - } + BUG_ON(nr < cur_fdt->max_fds); + copy_fdtable(new_fdt, cur_fdt); + rcu_assign_pointer(files->fdt, new_fdt); + if (cur_fdt != &files->fdtab) + call_rcu(&cur_fdt->rcu, free_fdtable_rcu); + /* coupled with smp_rmb() in __fd_install() */ + smp_wmb(); return 1; } @@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr) * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_files(struct files_struct *files, int nr) + __releases(files->file_lock) + __acquires(files->file_lock) { struct fdtable *fdt; + int expanded = 0; +repeat: fdt = files_fdtable(files); /* Do we need to expand? */ if (nr < fdt->max_fds) - return 0; + return expanded; /* Can we expand? */ if (nr >= sysctl_nr_open) return -EMFILE; + if (unlikely(files->resize_in_progress)) { + spin_unlock(&files->file_lock); + expanded = 1; + wait_event(files->resize_wait, !files->resize_in_progress); + spin_lock(&files->file_lock); + goto repeat; + } + /* All good, so we try */ - return expand_fdtable(files, nr); + files->resize_in_progress = true; + expanded = expand_fdtable(files, nr); + files->resize_in_progress = false; + + wake_up_all(&files->resize_wait); + return expanded; } static inline void __set_close_on_exec(int fd, struct fdtable *fdt) @@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); + newf->resize_in_progress = false; + init_waitqueue_head(&newf->resize_wait); newf->next_fd = 0; new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; @@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd, struct file *file) { struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); + + might_sleep(); + rcu_read_lock_sched(); + + while (unlikely(files->resize_in_progress)) { + rcu_read_unlock_sched(); + wait_event(files->resize_wait, !files->resize_in_progress); + rcu_read_lock_sched(); + } + /* coupled with smp_wmb() in expand_fdtable() */ + smp_rmb(); + fdt = rcu_dereference_sched(files->fdt); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); + rcu_read_unlock_sched(); } void fd_install(unsigned int fd, struct file *file) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 230f87bdf5ad..fbb88740634a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -47,6 +47,9 @@ struct files_struct { * read mostly part */ atomic_t count; + bool resize_in_progress; + wait_queue_head_t resize_wait; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* -- cgit v1.2.3 From 44f4c054cae646a9296da05cdfe7d6e786f73d46 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Jul 2015 10:40:38 -0400 Subject: dax: Add block size note to documentation For block devices which are small enough, mkfs will default to creating a filesystem with block sizes smaller than page size. Signed-off-by: Matthew Wilcox Signed-off-by: Al Viro --- Documentation/filesystems/dax.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index baf41118660d..7af2851d667c 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt @@ -18,8 +18,10 @@ Usage ----- If you have a block device which supports DAX, you can make a filesystem -on it as usual. When mounting it, use the -o dax option manually -or add 'dax' to the options in /etc/fstab. +on it as usual. The DAX code currently only supports files with a block +size equal to your kernel's PAGE_SIZE, so you may need to specify a block +size when creating the filesystem. When mounting it, use the "-o dax" +option on the command line or add 'dax' to the options in /etc/fstab. Implementation Tips for Block Driver Writers -- cgit v1.2.3