summaryrefslogtreecommitdiff
path: root/fs/notify
diff options
context:
space:
mode:
Diffstat (limited to 'fs/notify')
-rw-r--r--fs/notify/dnotify/dnotify.c8
-rw-r--r--fs/notify/fanotify/fanotify.c8
-rw-r--r--fs/notify/fanotify/fanotify.h2
-rw-r--r--fs/notify/fanotify/fanotify_user.c165
-rw-r--r--fs/notify/fdinfo.c6
-rw-r--r--fs/notify/fsnotify.c91
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/mark.c4
8 files changed, 204 insertions, 82 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index c4cdaf5fa7ed..9fb73bafd41d 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -308,6 +308,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
+ error = file_f_owner_allocate(filp);
+ if (error)
+ goto out_err;
+
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
if (!new_dn_mark) {
@@ -315,10 +319,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
- error = file_f_owner_allocate(filp);
- if (error)
- goto out_err;
-
/* set up the new_fsn_mark and new_dn_mark */
new_fsn_mark = &new_dn_mark->fsn_mark;
fsnotify_init_mark(new_fsn_mark, dnotify_group);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 3083643b864b..bfe884d624e7 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -454,7 +454,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
dwords = fh_len >> 2;
type = exportfs_encode_fid(inode, buf, &dwords);
err = -EINVAL;
- if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2)
+ /*
+ * Unlike file_handle, type and len of struct fanotify_fh are u8.
+ * Traditionally, filesystem return handle_type < 0xff, but there
+ * is no enforecement for that in vfs.
+ */
+ BUILD_BUG_ON(MAX_HANDLE_SZ > 0xff || FILEID_INVALID > 0xff);
+ if (type <= 0 || type >= FILEID_INVALID || fh_len != dwords << 2)
goto out_err;
fh->type = type;
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index b78308975082..39e60218df7c 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -441,7 +441,9 @@ struct fanotify_perm_event {
size_t count;
u32 response; /* userspace answer to the event */
unsigned short state; /* state of the event */
+ unsigned short watchdog_cnt; /* already scanned by watchdog? */
int fd; /* fd we passed to userspace for this event */
+ pid_t recv_pid; /* pid of task receiving the event */
union {
struct fanotify_response_info_header hdr;
struct fanotify_response_info_audit_rule audit_rule;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index b192ee068a7a..d0b9b984002f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -50,6 +50,7 @@
/* configurable via /proc/sys/fs/fanotify/ */
static int fanotify_max_queued_events __read_mostly;
+static int perm_group_timeout __read_mostly;
#ifdef CONFIG_SYSCTL
@@ -85,6 +86,14 @@ static const struct ctl_table fanotify_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO
},
+ {
+ .procname = "watchdog_timeout",
+ .data = &perm_group_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
};
static void __init fanotify_sysctls_init(void)
@@ -95,6 +104,91 @@ static void __init fanotify_sysctls_init(void)
#define fanotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
+static LIST_HEAD(perm_group_list);
+static DEFINE_SPINLOCK(perm_group_lock);
+static void perm_group_watchdog(struct work_struct *work);
+static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog);
+
+static void perm_group_watchdog_schedule(void)
+{
+ schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout));
+}
+
+static void perm_group_watchdog(struct work_struct *work)
+{
+ struct fsnotify_group *group;
+ struct fanotify_perm_event *event;
+ struct task_struct *task;
+ pid_t failed_pid = 0;
+
+ guard(spinlock)(&perm_group_lock);
+ if (list_empty(&perm_group_list))
+ return;
+
+ list_for_each_entry(group, &perm_group_list,
+ fanotify_data.perm_grp_list) {
+ /*
+ * Ok to test without lock, racing with an addition is
+ * fine, will deal with it next round
+ */
+ if (list_empty(&group->fanotify_data.access_list))
+ continue;
+
+ spin_lock(&group->notification_lock);
+ list_for_each_entry(event, &group->fanotify_data.access_list,
+ fae.fse.list) {
+ if (likely(event->watchdog_cnt == 0)) {
+ event->watchdog_cnt = 1;
+ } else if (event->watchdog_cnt == 1) {
+ /* Report on event only once */
+ event->watchdog_cnt = 2;
+
+ /* Do not report same pid repeatedly */
+ if (event->recv_pid == failed_pid)
+ continue;
+
+ failed_pid = event->recv_pid;
+ rcu_read_lock();
+ task = find_task_by_pid_ns(event->recv_pid,
+ &init_pid_ns);
+ pr_warn_ratelimited(
+ "PID %u (%s) failed to respond to fanotify queue for more than %d seconds\n",
+ event->recv_pid,
+ task ? task->comm : NULL,
+ perm_group_timeout);
+ rcu_read_unlock();
+ }
+ }
+ spin_unlock(&group->notification_lock);
+ }
+ perm_group_watchdog_schedule();
+}
+
+static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
+{
+ if (!list_empty(&group->fanotify_data.perm_grp_list)) {
+ /* Perm event watchdog can no longer scan this group. */
+ spin_lock(&perm_group_lock);
+ list_del_init(&group->fanotify_data.perm_grp_list);
+ spin_unlock(&perm_group_lock);
+ }
+}
+
+static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
+{
+ if (!perm_group_timeout)
+ return;
+
+ spin_lock(&perm_group_lock);
+ if (list_empty(&group->fanotify_data.perm_grp_list)) {
+ /* Add to perm_group_list for monitoring by watchdog. */
+ if (list_empty(&perm_group_list))
+ perm_group_watchdog_schedule();
+ list_add_tail(&group->fanotify_data.perm_grp_list, &perm_group_list);
+ }
+ spin_unlock(&perm_group_lock);
+}
+
/*
* All flags that may be specified in parameter event_f_flags of fanotify_init.
*
@@ -953,6 +1047,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
spin_lock(&group->notification_lock);
list_add_tail(&event->fse.list,
&group->fanotify_data.access_list);
+ FANOTIFY_PERM(event)->recv_pid = current->pid;
spin_unlock(&group->notification_lock);
}
}
@@ -1012,6 +1107,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
*/
fsnotify_group_stop_queueing(group);
+ fanotify_perm_watchdog_group_remove(group);
+
/*
* Process all permission events on access_list and notification queue
* and simulate reply from userspace.
@@ -1465,6 +1562,10 @@ out:
fsnotify_group_unlock(group);
fsnotify_put_mark(fsn_mark);
+
+ if (!ret && (mask & FANOTIFY_PERM_EVENTS))
+ fanotify_perm_watchdog_group_add(group);
+
return ret;
}
@@ -1496,16 +1597,20 @@ static struct hlist_head *fanotify_alloc_merge_hash(void)
return hash;
}
+DEFINE_CLASS(fsnotify_group,
+ struct fsnotify_group *,
+ if (!IS_ERR_OR_NULL(_T)) fsnotify_destroy_group(_T),
+ fsnotify_alloc_group(ops, flags),
+ const struct fsnotify_ops *ops, int flags)
+
/* fanotify syscalls */
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{
struct user_namespace *user_ns = current_user_ns();
- struct fsnotify_group *group;
int f_flags, fd;
unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
unsigned int class = flags & FANOTIFY_CLASS_BITS;
unsigned int internal_flags = 0;
- struct file *file;
pr_debug("%s: flags=%x event_f_flags=%x\n",
__func__, flags, event_f_flags);
@@ -1589,42 +1694,36 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if (flags & FAN_NONBLOCK)
f_flags |= O_NONBLOCK;
- /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
- group = fsnotify_alloc_group(&fanotify_fsnotify_ops,
+ CLASS(fsnotify_group, group)(&fanotify_fsnotify_ops,
FSNOTIFY_GROUP_USER);
- if (IS_ERR(group)) {
+ /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
+ if (IS_ERR(group))
return PTR_ERR(group);
- }
/* Enforce groups limits per user in all containing user ns */
group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(),
UCOUNT_FANOTIFY_GROUPS);
- if (!group->fanotify_data.ucounts) {
- fd = -EMFILE;
- goto out_destroy_group;
- }
+ if (!group->fanotify_data.ucounts)
+ return -EMFILE;
group->fanotify_data.flags = flags | internal_flags;
group->memcg = get_mem_cgroup_from_mm(current->mm);
group->user_ns = get_user_ns(user_ns);
group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
- if (!group->fanotify_data.merge_hash) {
- fd = -ENOMEM;
- goto out_destroy_group;
- }
+ if (!group->fanotify_data.merge_hash)
+ return -ENOMEM;
group->overflow_event = fanotify_alloc_overflow_event();
- if (unlikely(!group->overflow_event)) {
- fd = -ENOMEM;
- goto out_destroy_group;
- }
+ if (unlikely(!group->overflow_event))
+ return -ENOMEM;
if (force_o_largefile())
event_f_flags |= O_LARGEFILE;
group->fanotify_data.f_flags = event_f_flags;
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
+ INIT_LIST_HEAD(&group->fanotify_data.perm_grp_list);
switch (class) {
case FAN_CLASS_NOTIF:
group->priority = FSNOTIFY_PRIO_NORMAL;
@@ -1636,8 +1735,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->priority = FSNOTIFY_PRIO_PRE_CONTENT;
break;
default:
- fd = -EINVAL;
- goto out_destroy_group;
+ return -EINVAL;
}
BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE));
@@ -1648,27 +1746,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
}
if (flags & FAN_ENABLE_AUDIT) {
- fd = -EPERM;
if (!capable(CAP_AUDIT_WRITE))
- goto out_destroy_group;
- }
-
- fd = get_unused_fd_flags(f_flags);
- if (fd < 0)
- goto out_destroy_group;
-
- file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group,
- f_flags, FMODE_NONOTIFY);
- if (IS_ERR(file)) {
- put_unused_fd(fd);
- fd = PTR_ERR(file);
- goto out_destroy_group;
+ return -EPERM;
}
- fd_install(fd, file);
- return fd;
-out_destroy_group:
- fsnotify_destroy_group(group);
+ fd = FD_ADD(f_flags,
+ anon_inode_getfile_fmode("[fanotify]", &fanotify_fops,
+ group, f_flags, FMODE_NONOTIFY));
+ if (fd >= 0)
+ retain_and_null_ptr(group);
return fd;
}
@@ -1999,7 +2085,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
user_ns = path.mnt->mnt_sb->s_user_ns;
obj = path.mnt->mnt_sb;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ ret = -EINVAL;
mntns = mnt_ns_from_dentry(path.dentry);
+ if (!mntns)
+ goto path_put_and_out;
user_ns = mntns->user_ns;
obj = mntns;
}
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 1161eabf11ee..9cc7eb863643 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -17,6 +17,7 @@
#include "fanotify/fanotify.h"
#include "fdinfo.h"
#include "fsnotify.h"
+#include "../internal.h"
#if defined(CONFIG_PROC_FS)
@@ -46,7 +47,12 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
size = f->handle_bytes >> 2;
+ if (!super_trylock_shared(inode->i_sb))
+ return;
+
ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size);
+ up_read(&inode->i_sb->s_umount);
+
if ((ret == FILEID_INVALID) || (ret < 0))
return;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index e2b4f17a48bb..d27ff5e5f165 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,7 +52,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
* the inode cannot have any associated watches.
*/
spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+ if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE | I_NEW)) {
spin_unlock(&inode->i_lock);
continue;
}
@@ -66,7 +66,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
* removed all zero refcount inodes, in any case. Test to
* be sure.
*/
- if (!atomic_read(&inode->i_count)) {
+ if (!icount_read(inode)) {
spin_unlock(&inode->i_lock);
continue;
}
@@ -199,8 +199,8 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask,
}
/* Are there any inode/mount/sb objects that watch for these events? */
-static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
- __u32 mask)
+static inline __u32 fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
+ __u32 mask)
{
__u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask |
READ_ONCE(inode->i_sb->s_fsnotify_mask);
@@ -656,20 +656,20 @@ EXPORT_SYMBOL_GPL(fsnotify);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/*
- * At open time we check fsnotify_sb_has_priority_watchers() and set the
- * FMODE_NONOTIFY_ mode bits accordignly.
+ * At open time we check fsnotify_sb_has_priority_watchers(), call the open perm
+ * hook and set the FMODE_NONOTIFY_ mode bits accordignly.
* Later, fsnotify permission hooks do not check if there are permission event
* watches, but that there were permission event watches at open time.
*/
-void file_set_fsnotify_mode_from_watchers(struct file *file)
+int fsnotify_open_perm_and_set_mode(struct file *file)
{
struct dentry *dentry = file->f_path.dentry, *parent;
struct super_block *sb = dentry->d_sb;
- __u32 mnt_mask, p_mask;
+ __u32 mnt_mask, p_mask = 0;
/* Is it a file opened by fanotify? */
if (FMODE_FSNOTIFY_NONE(file->f_mode))
- return;
+ return 0;
/*
* Permission events is a super set of pre-content events, so if there
@@ -679,45 +679,64 @@ void file_set_fsnotify_mode_from_watchers(struct file *file)
if (likely(!fsnotify_sb_has_priority_watchers(sb,
FSNOTIFY_PRIO_CONTENT))) {
file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM);
- return;
+ return 0;
}
/*
- * If there are permission event watchers but no pre-content event
- * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that.
+ * OK, there are some permission event watchers. Check if anybody is
+ * watching for permission events on *this* file.
*/
- if ((!d_is_dir(dentry) && !d_is_reg(dentry)) ||
- likely(!fsnotify_sb_has_priority_watchers(sb,
- FSNOTIFY_PRIO_PRE_CONTENT))) {
- file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM);
- return;
+ mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask);
+ p_mask = fsnotify_object_watched(d_inode(dentry), mnt_mask,
+ ALL_FSNOTIFY_PERM_EVENTS);
+ if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) {
+ parent = dget_parent(dentry);
+ p_mask |= fsnotify_inode_watches_children(d_inode(parent));
+ dput(parent);
}
/*
- * OK, there are some pre-content watchers. Check if anybody is
- * watching for pre-content events on *this* file.
+ * Legacy FAN_ACCESS_PERM events have very high performance overhead,
+ * so unlikely to be used in the wild. If they are used there will be
+ * no optimizations at all.
*/
- mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask);
- if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask,
- FSNOTIFY_PRE_CONTENT_EVENTS))) {
- /* Enable pre-content events */
+ if (unlikely(p_mask & FS_ACCESS_PERM)) {
+ /* Enable all permission and pre-content events */
file_set_fsnotify_mode(file, 0);
- return;
+ goto open_perm;
}
- /* Is parent watching for pre-content events on this file? */
- if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) {
- parent = dget_parent(dentry);
- p_mask = fsnotify_inode_watches_children(d_inode(parent));
- dput(parent);
- if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) {
- /* Enable pre-content events */
- file_set_fsnotify_mode(file, 0);
- return;
- }
+ /*
+ * Pre-content events are only supported on regular files.
+ * If there are pre-content event watchers and no permission access
+ * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that.
+ * That is the common case with HSM service.
+ */
+ if (d_is_reg(dentry) && (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)) {
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY |
+ FMODE_NONOTIFY_PERM);
+ goto open_perm;
}
- /* Nobody watching for pre-content events from this file */
- file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM);
+
+ /* Nobody watching permission and pre-content events on this file */
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM);
+
+open_perm:
+ /*
+ * Send open perm events depending on object masks and regardless of
+ * FMODE_NONOTIFY_PERM.
+ */
+ if (file->f_flags & __FMODE_EXEC && p_mask & FS_OPEN_EXEC_PERM) {
+ int ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM);
+
+ if (ret)
+ return ret;
+ }
+
+ if (p_mask & FS_OPEN_PERM)
+ return fsnotify_path(&file->f_path, FS_OPEN_PERM);
+
+ return 0;
}
#endif
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index cd7d11b0eb08..7c326ec2e8a8 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -10,7 +10,7 @@
* Copyright 2006 Hewlett-Packard Development Company, L.P.
*
* Copyright (C) 2009 Eric Paris <Red Hat Inc>
- * inotify was largely rewriten to make use of the fsnotify infrastructure
+ * inotify was largely rewritten to make use of the fsnotify infrastructure
*/
#include <linux/dcache.h> /* d_unlinked */
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 798340db69d7..55a03bb05aa1 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -428,7 +428,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
conn->destroy_next = connector_destroy_list;
connector_destroy_list = conn;
spin_unlock(&destroy_lock);
- queue_work(system_unbound_wq, &connector_reaper_work);
+ queue_work(system_dfl_wq, &connector_reaper_work);
}
/*
* Note that we didn't update flags telling whether inode cares about
@@ -439,7 +439,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
spin_lock(&destroy_lock);
list_add(&mark->g_list, &destroy_list);
spin_unlock(&destroy_lock);
- queue_delayed_work(system_unbound_wq, &reaper_work,
+ queue_delayed_work(system_dfl_wq, &reaper_work,
FSNOTIFY_REAPER_DELAY);
}
EXPORT_SYMBOL_GPL(fsnotify_put_mark);