diff options
Diffstat (limited to 'fs/notify')
| -rw-r--r-- | fs/notify/dnotify/dnotify.c | 8 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify.c | 8 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify.h | 2 | ||||
| -rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 165 | ||||
| -rw-r--r-- | fs/notify/fdinfo.c | 6 | ||||
| -rw-r--r-- | fs/notify/fsnotify.c | 91 | ||||
| -rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 2 | ||||
| -rw-r--r-- | fs/notify/mark.c | 4 |
8 files changed, 204 insertions, 82 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index c4cdaf5fa7ed..9fb73bafd41d 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -308,6 +308,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } + error = file_f_owner_allocate(filp); + if (error) + goto out_err; + /* new fsnotify mark, we expect most fcntl calls to add a new mark */ new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL); if (!new_dn_mark) { @@ -315,10 +319,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } - error = file_f_owner_allocate(filp); - if (error) - goto out_err; - /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 3083643b864b..bfe884d624e7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -454,7 +454,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, dwords = fh_len >> 2; type = exportfs_encode_fid(inode, buf, &dwords); err = -EINVAL; - if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2) + /* + * Unlike file_handle, type and len of struct fanotify_fh are u8. + * Traditionally, filesystem return handle_type < 0xff, but there + * is no enforecement for that in vfs. + */ + BUILD_BUG_ON(MAX_HANDLE_SZ > 0xff || FILEID_INVALID > 0xff); + if (type <= 0 || type >= FILEID_INVALID || fh_len != dwords << 2) goto out_err; fh->type = type; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index b78308975082..39e60218df7c 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -441,7 +441,9 @@ struct fanotify_perm_event { size_t count; u32 response; /* userspace answer to the event */ unsigned short state; /* state of the event */ + unsigned short watchdog_cnt; /* already scanned by watchdog? */ int fd; /* fd we passed to userspace for this event */ + pid_t recv_pid; /* pid of task receiving the event */ union { struct fanotify_response_info_header hdr; struct fanotify_response_info_audit_rule audit_rule; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b192ee068a7a..d0b9b984002f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -50,6 +50,7 @@ /* configurable via /proc/sys/fs/fanotify/ */ static int fanotify_max_queued_events __read_mostly; +static int perm_group_timeout __read_mostly; #ifdef CONFIG_SYSCTL @@ -85,6 +86,14 @@ static const struct ctl_table fanotify_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO }, + { + .procname = "watchdog_timeout", + .data = &perm_group_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, }; static void __init fanotify_sysctls_init(void) @@ -95,6 +104,91 @@ static void __init fanotify_sysctls_init(void) #define fanotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ +static LIST_HEAD(perm_group_list); +static DEFINE_SPINLOCK(perm_group_lock); +static void perm_group_watchdog(struct work_struct *work); +static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog); + +static void perm_group_watchdog_schedule(void) +{ + schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout)); +} + +static void perm_group_watchdog(struct work_struct *work) +{ + struct fsnotify_group *group; + struct fanotify_perm_event *event; + struct task_struct *task; + pid_t failed_pid = 0; + + guard(spinlock)(&perm_group_lock); + if (list_empty(&perm_group_list)) + return; + + list_for_each_entry(group, &perm_group_list, + fanotify_data.perm_grp_list) { + /* + * Ok to test without lock, racing with an addition is + * fine, will deal with it next round + */ + if (list_empty(&group->fanotify_data.access_list)) + continue; + + spin_lock(&group->notification_lock); + list_for_each_entry(event, &group->fanotify_data.access_list, + fae.fse.list) { + if (likely(event->watchdog_cnt == 0)) { + event->watchdog_cnt = 1; + } else if (event->watchdog_cnt == 1) { + /* Report on event only once */ + event->watchdog_cnt = 2; + + /* Do not report same pid repeatedly */ + if (event->recv_pid == failed_pid) + continue; + + failed_pid = event->recv_pid; + rcu_read_lock(); + task = find_task_by_pid_ns(event->recv_pid, + &init_pid_ns); + pr_warn_ratelimited( + "PID %u (%s) failed to respond to fanotify queue for more than %d seconds\n", + event->recv_pid, + task ? task->comm : NULL, + perm_group_timeout); + rcu_read_unlock(); + } + } + spin_unlock(&group->notification_lock); + } + perm_group_watchdog_schedule(); +} + +static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group) +{ + if (!list_empty(&group->fanotify_data.perm_grp_list)) { + /* Perm event watchdog can no longer scan this group. */ + spin_lock(&perm_group_lock); + list_del_init(&group->fanotify_data.perm_grp_list); + spin_unlock(&perm_group_lock); + } +} + +static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group) +{ + if (!perm_group_timeout) + return; + + spin_lock(&perm_group_lock); + if (list_empty(&group->fanotify_data.perm_grp_list)) { + /* Add to perm_group_list for monitoring by watchdog. */ + if (list_empty(&perm_group_list)) + perm_group_watchdog_schedule(); + list_add_tail(&group->fanotify_data.perm_grp_list, &perm_group_list); + } + spin_unlock(&perm_group_lock); +} + /* * All flags that may be specified in parameter event_f_flags of fanotify_init. * @@ -953,6 +1047,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, spin_lock(&group->notification_lock); list_add_tail(&event->fse.list, &group->fanotify_data.access_list); + FANOTIFY_PERM(event)->recv_pid = current->pid; spin_unlock(&group->notification_lock); } } @@ -1012,6 +1107,8 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ fsnotify_group_stop_queueing(group); + fanotify_perm_watchdog_group_remove(group); + /* * Process all permission events on access_list and notification queue * and simulate reply from userspace. @@ -1465,6 +1562,10 @@ out: fsnotify_group_unlock(group); fsnotify_put_mark(fsn_mark); + + if (!ret && (mask & FANOTIFY_PERM_EVENTS)) + fanotify_perm_watchdog_group_add(group); + return ret; } @@ -1496,16 +1597,20 @@ static struct hlist_head *fanotify_alloc_merge_hash(void) return hash; } +DEFINE_CLASS(fsnotify_group, + struct fsnotify_group *, + if (!IS_ERR_OR_NULL(_T)) fsnotify_destroy_group(_T), + fsnotify_alloc_group(ops, flags), + const struct fsnotify_ops *ops, int flags) + /* fanotify syscalls */ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { struct user_namespace *user_ns = current_user_ns(); - struct fsnotify_group *group; int f_flags, fd; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int internal_flags = 0; - struct file *file; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -1589,42 +1694,36 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (flags & FAN_NONBLOCK) f_flags |= O_NONBLOCK; - /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ - group = fsnotify_alloc_group(&fanotify_fsnotify_ops, + CLASS(fsnotify_group, group)(&fanotify_fsnotify_ops, FSNOTIFY_GROUP_USER); - if (IS_ERR(group)) { + /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ + if (IS_ERR(group)) return PTR_ERR(group); - } /* Enforce groups limits per user in all containing user ns */ group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(), UCOUNT_FANOTIFY_GROUPS); - if (!group->fanotify_data.ucounts) { - fd = -EMFILE; - goto out_destroy_group; - } + if (!group->fanotify_data.ucounts) + return -EMFILE; group->fanotify_data.flags = flags | internal_flags; group->memcg = get_mem_cgroup_from_mm(current->mm); group->user_ns = get_user_ns(user_ns); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); - if (!group->fanotify_data.merge_hash) { - fd = -ENOMEM; - goto out_destroy_group; - } + if (!group->fanotify_data.merge_hash) + return -ENOMEM; group->overflow_event = fanotify_alloc_overflow_event(); - if (unlikely(!group->overflow_event)) { - fd = -ENOMEM; - goto out_destroy_group; - } + if (unlikely(!group->overflow_event)) + return -ENOMEM; if (force_o_largefile()) event_f_flags |= O_LARGEFILE; group->fanotify_data.f_flags = event_f_flags; init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); + INIT_LIST_HEAD(&group->fanotify_data.perm_grp_list); switch (class) { case FAN_CLASS_NOTIF: group->priority = FSNOTIFY_PRIO_NORMAL; @@ -1636,8 +1735,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->priority = FSNOTIFY_PRIO_PRE_CONTENT; break; default: - fd = -EINVAL; - goto out_destroy_group; + return -EINVAL; } BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE)); @@ -1648,27 +1746,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } if (flags & FAN_ENABLE_AUDIT) { - fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) - goto out_destroy_group; - } - - fd = get_unused_fd_flags(f_flags); - if (fd < 0) - goto out_destroy_group; - - file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, - f_flags, FMODE_NONOTIFY); - if (IS_ERR(file)) { - put_unused_fd(fd); - fd = PTR_ERR(file); - goto out_destroy_group; + return -EPERM; } - fd_install(fd, file); - return fd; -out_destroy_group: - fsnotify_destroy_group(group); + fd = FD_ADD(f_flags, + anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, + group, f_flags, FMODE_NONOTIFY)); + if (fd >= 0) + retain_and_null_ptr(group); return fd; } @@ -1999,7 +2085,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt->mnt_sb; } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { + ret = -EINVAL; mntns = mnt_ns_from_dentry(path.dentry); + if (!mntns) + goto path_put_and_out; user_ns = mntns->user_ns; obj = mntns; } diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 1161eabf11ee..9cc7eb863643 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -17,6 +17,7 @@ #include "fanotify/fanotify.h" #include "fdinfo.h" #include "fsnotify.h" +#include "../internal.h" #if defined(CONFIG_PROC_FS) @@ -46,7 +47,12 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f->handle_bytes >> 2; + if (!super_trylock_shared(inode->i_sb)) + return; + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); + up_read(&inode->i_sb->s_umount); + if ((ret == FILEID_INVALID) || (ret < 0)) return; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index e2b4f17a48bb..d27ff5e5f165 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -52,7 +52,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb) * the inode cannot have any associated watches. */ spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { + if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE | I_NEW)) { spin_unlock(&inode->i_lock); continue; } @@ -66,7 +66,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb) * removed all zero refcount inodes, in any case. Test to * be sure. */ - if (!atomic_read(&inode->i_count)) { + if (!icount_read(inode)) { spin_unlock(&inode->i_lock); continue; } @@ -199,8 +199,8 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, } /* Are there any inode/mount/sb objects that watch for these events? */ -static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, - __u32 mask) +static inline __u32 fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, + __u32 mask) { __u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask | READ_ONCE(inode->i_sb->s_fsnotify_mask); @@ -656,20 +656,20 @@ EXPORT_SYMBOL_GPL(fsnotify); #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* - * At open time we check fsnotify_sb_has_priority_watchers() and set the - * FMODE_NONOTIFY_ mode bits accordignly. + * At open time we check fsnotify_sb_has_priority_watchers(), call the open perm + * hook and set the FMODE_NONOTIFY_ mode bits accordignly. * Later, fsnotify permission hooks do not check if there are permission event * watches, but that there were permission event watches at open time. */ -void file_set_fsnotify_mode_from_watchers(struct file *file) +int fsnotify_open_perm_and_set_mode(struct file *file) { struct dentry *dentry = file->f_path.dentry, *parent; struct super_block *sb = dentry->d_sb; - __u32 mnt_mask, p_mask; + __u32 mnt_mask, p_mask = 0; /* Is it a file opened by fanotify? */ if (FMODE_FSNOTIFY_NONE(file->f_mode)) - return; + return 0; /* * Permission events is a super set of pre-content events, so if there @@ -679,45 +679,64 @@ void file_set_fsnotify_mode_from_watchers(struct file *file) if (likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT))) { file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); - return; + return 0; } /* - * If there are permission event watchers but no pre-content event - * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + * OK, there are some permission event watchers. Check if anybody is + * watching for permission events on *this* file. */ - if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || - likely(!fsnotify_sb_has_priority_watchers(sb, - FSNOTIFY_PRIO_PRE_CONTENT))) { - file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); - return; + mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); + p_mask = fsnotify_object_watched(d_inode(dentry), mnt_mask, + ALL_FSNOTIFY_PERM_EVENTS); + if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { + parent = dget_parent(dentry); + p_mask |= fsnotify_inode_watches_children(d_inode(parent)); + dput(parent); } /* - * OK, there are some pre-content watchers. Check if anybody is - * watching for pre-content events on *this* file. + * Legacy FAN_ACCESS_PERM events have very high performance overhead, + * so unlikely to be used in the wild. If they are used there will be + * no optimizations at all. */ - mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); - if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, - FSNOTIFY_PRE_CONTENT_EVENTS))) { - /* Enable pre-content events */ + if (unlikely(p_mask & FS_ACCESS_PERM)) { + /* Enable all permission and pre-content events */ file_set_fsnotify_mode(file, 0); - return; + goto open_perm; } - /* Is parent watching for pre-content events on this file? */ - if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { - parent = dget_parent(dentry); - p_mask = fsnotify_inode_watches_children(d_inode(parent)); - dput(parent); - if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) { - /* Enable pre-content events */ - file_set_fsnotify_mode(file, 0); - return; - } + /* + * Pre-content events are only supported on regular files. + * If there are pre-content event watchers and no permission access + * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + * That is the common case with HSM service. + */ + if (d_is_reg(dentry) && (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)) { + file_set_fsnotify_mode(file, FMODE_NONOTIFY | + FMODE_NONOTIFY_PERM); + goto open_perm; } - /* Nobody watching for pre-content events from this file */ - file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); + + /* Nobody watching permission and pre-content events on this file */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); + +open_perm: + /* + * Send open perm events depending on object masks and regardless of + * FMODE_NONOTIFY_PERM. + */ + if (file->f_flags & __FMODE_EXEC && p_mask & FS_OPEN_EXEC_PERM) { + int ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); + + if (ret) + return ret; + } + + if (p_mask & FS_OPEN_PERM) + return fsnotify_path(&file->f_path, FS_OPEN_PERM); + + return 0; } #endif diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index cd7d11b0eb08..7c326ec2e8a8 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -10,7 +10,7 @@ * Copyright 2006 Hewlett-Packard Development Company, L.P. * * Copyright (C) 2009 Eric Paris <Red Hat Inc> - * inotify was largely rewriten to make use of the fsnotify infrastructure + * inotify was largely rewritten to make use of the fsnotify infrastructure */ #include <linux/dcache.h> /* d_unlinked */ diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 798340db69d7..55a03bb05aa1 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -428,7 +428,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) conn->destroy_next = connector_destroy_list; connector_destroy_list = conn; spin_unlock(&destroy_lock); - queue_work(system_unbound_wq, &connector_reaper_work); + queue_work(system_dfl_wq, &connector_reaper_work); } /* * Note that we didn't update flags telling whether inode cares about @@ -439,7 +439,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) spin_lock(&destroy_lock); list_add(&mark->g_list, &destroy_list); spin_unlock(&destroy_lock); - queue_delayed_work(system_unbound_wq, &reaper_work, + queue_delayed_work(system_dfl_wq, &reaper_work, FSNOTIFY_REAPER_DELAY); } EXPORT_SYMBOL_GPL(fsnotify_put_mark); |
