diff options
Diffstat (limited to 'fs/file_table.c')
| -rw-r--r-- | fs/file_table.c | 85 | 
1 files changed, 73 insertions, 12 deletions
diff --git a/fs/file_table.c b/fs/file_table.c index a305d9e2d1b2..701985e4ccda 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -23,6 +23,8 @@  #include <linux/lglock.h>  #include <linux/percpu_counter.h>  #include <linux/percpu.h> +#include <linux/hardirq.h> +#include <linux/task_work.h>  #include <linux/ima.h>  #include <linux/atomic.h> @@ -41,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly;  static struct percpu_counter nr_files __cacheline_aligned_in_smp; -static inline void file_free_rcu(struct rcu_head *head) +static void file_free_rcu(struct rcu_head *head)  {  	struct file *f = container_of(head, struct file, f_u.fu_rcuhead); @@ -215,7 +217,7 @@ static void drop_file_write_access(struct file *file)  		return;  	if (file_check_writeable(file) != 0)  		return; -	mnt_drop_write(mnt); +	__mnt_drop_write(mnt);  	file_release_write(file);  } @@ -251,7 +253,6 @@ static void __fput(struct file *file)  	}  	fops_put(file->f_op);  	put_pid(file->f_owner.pid); -	file_sb_list_del(file);  	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)  		i_readcount_dec(inode);  	if (file->f_mode & FMODE_WRITE) @@ -263,10 +264,77 @@ static void __fput(struct file *file)  	mntput(mnt);  } +static DEFINE_SPINLOCK(delayed_fput_lock); +static LIST_HEAD(delayed_fput_list); +static void delayed_fput(struct work_struct *unused) +{ +	LIST_HEAD(head); +	spin_lock_irq(&delayed_fput_lock); +	list_splice_init(&delayed_fput_list, &head); +	spin_unlock_irq(&delayed_fput_lock); +	while (!list_empty(&head)) { +		struct file *f = list_first_entry(&head, struct file, f_u.fu_list); +		list_del_init(&f->f_u.fu_list); +		__fput(f); +	} +} + +static void ____fput(struct callback_head *work) +{ +	__fput(container_of(work, struct file, f_u.fu_rcuhead)); +} + +/* + * If kernel thread really needs to have the final fput() it has done + * to complete, call this.  The only user right now is the boot - we + * *do* need to make sure our writes to binaries on initramfs has + * not left us with opened struct file waiting for __fput() - execve() + * won't work without that.  Please, don't add more callers without + * very good reasons; in particular, never call that with locks + * held and never call that from a thread that might need to do + * some work on any kind of umount. + */ +void flush_delayed_fput(void) +{ +	delayed_fput(NULL); +} + +static DECLARE_WORK(delayed_fput_work, delayed_fput); +  void fput(struct file *file)  { -	if (atomic_long_dec_and_test(&file->f_count)) +	if (atomic_long_dec_and_test(&file->f_count)) { +		struct task_struct *task = current; +		file_sb_list_del(file); +		if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) { +			unsigned long flags; +			spin_lock_irqsave(&delayed_fput_lock, flags); +			list_add(&file->f_u.fu_list, &delayed_fput_list); +			schedule_work(&delayed_fput_work); +			spin_unlock_irqrestore(&delayed_fput_lock, flags); +			return; +		} +		init_task_work(&file->f_u.fu_rcuhead, ____fput); +		task_work_add(task, &file->f_u.fu_rcuhead, true); +	} +} + +/* + * synchronous analog of fput(); for kernel threads that might be needed + * in some umount() (and thus can't use flush_delayed_fput() without + * risking deadlocks), need to wait for completion of __fput() and know + * for this specific struct file it won't involve anything that would + * need them.  Use only if you really need it - at the very least, + * don't blindly convert fput() by kernel thread to that. + */ +void __fput_sync(struct file *file) +{ +	if (atomic_long_dec_and_test(&file->f_count)) { +		struct task_struct *task = current; +		file_sb_list_del(file); +		BUG_ON(!(task->flags & PF_KTHREAD));  		__fput(file); +	}  }  EXPORT_SYMBOL(fput); @@ -483,10 +551,8 @@ void mark_files_ro(struct super_block *sb)  {  	struct file *f; -retry:  	lg_global_lock(&files_lglock);  	do_file_list_for_each_entry(sb, f) { -		struct vfsmount *mnt;  		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))  		       continue;  		if (!file_count(f)) @@ -499,12 +565,7 @@ retry:  		if (file_check_writeable(f) != 0)  			continue;  		file_release_write(f); -		mnt = mntget(f->f_path.mnt); -		/* This can sleep, so we can't hold the spinlock. */ -		lg_global_unlock(&files_lglock); -		mnt_drop_write(mnt); -		mntput(mnt); -		goto retry; +		mnt_drop_write_file(f);  	} while_file_list_for_each_entry;  	lg_global_unlock(&files_lglock);  }  | 
