diff options
author | Dipankar Sarma <dipankar@in.ibm.com> | 2005-09-10 00:04:13 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-10 00:57:55 +0400 |
commit | ab2af1f5005069321c5d130f09cce577b03f43ef (patch) | |
tree | 73a70ba486f522cd9eeeef376ede2b5a1c1b473b /fs/file_table.c | |
parent | 6e72ad2c581de121cc7e772469e2a8f6b1fd4379 (diff) | |
download | linux-ab2af1f5005069321c5d130f09cce577b03f43ef.tar.xz |
[PATCH] files: files struct with RCU
Patch to eliminate struct files_struct.file_lock spinlock on the reader side
and use rcu refcounting rcuref_xxx api for the f_count refcounter. The
updates to the fdtable are done by allocating a new fdtable structure and
setting files->fdt to point to the new structure. The fdtable structure is
protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that
are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A
global list of fdtable to be freed is not scalable, so we use a per-cpu list.
If keventd is already handling the current cpu's work, we use a timer to defer
queueing of that work.
Since the last publication, this patch has been re-written to avoid using
explicit memory barriers and use rcu_assign_pointer(), rcu_dereference()
premitives instead. This required that the fd information is kept in a
separate structure (fdtable) and updated atomically.
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/file_table.c')
-rw-r--r-- | fs/file_table.c | 40 |
1 files changed, 29 insertions, 11 deletions
diff --git a/fs/file_table.c b/fs/file_table.c index 43e9e1737de2..86ec8ae985b4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -14,6 +14,7 @@ #include <linux/fs.h> #include <linux/security.h> #include <linux/eventpoll.h> +#include <linux/rcupdate.h> #include <linux/mount.h> #include <linux/cdev.h> #include <linux/fsnotify.h> @@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags) spin_unlock_irqrestore(&filp_count_lock, flags); } -static inline void file_free(struct file *f) +static inline void file_free_rcu(struct rcu_head *head) { + struct file *f = container_of(head, struct file, f_rcuhead); kmem_cache_free(filp_cachep, f); } +static inline void file_free(struct file *f) +{ + call_rcu(&f->f_rcuhead, file_free_rcu); +} + /* Find an unused file structure and return a pointer to it. * Returns NULL, if there are no more free file structures or * we run out of memory. @@ -110,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp); void fastcall fput(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) + if (rcuref_dec_and_test(&file->f_count)) __fput(file); } @@ -156,11 +163,17 @@ struct file fastcall *fget(unsigned int fd) struct file *file; struct files_struct *files = current->files; - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); - if (file) - get_file(file); - spin_unlock(&files->file_lock); + if (file) { + if (!rcuref_inc_lf(&file->f_count)) { + /* File object ref couldn't be taken */ + rcu_read_unlock(); + return NULL; + } + } + rcu_read_unlock(); + return file; } @@ -182,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) if (likely((atomic_read(&files->count) == 1))) { file = fcheck_files(files, fd); } else { - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - get_file(file); - *fput_needed = 1; + if (rcuref_inc_lf(&file->f_count)) + *fput_needed = 1; + else + /* Didn't get the reference, someone's freed */ + file = NULL; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); } + return file; } void put_filp(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) { + if (rcuref_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); @@ -257,4 +274,5 @@ void __init files_init(unsigned long mempages) files_stat.max_files = n; if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; + files_defer_init(); } |