summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 21:56:27 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 21:56:27 +0300
commit14bd41e41899cbd1de4bb5ddfa46c85b08091a69 (patch)
tree0e19281131adc23e9f078a09e7de661dcdf28e4a
parentd652d5f1eeeb06046009f4fcb9b4542249526916 (diff)
parentfecc4559780d52d174ea05e3bf543669165389c3 (diff)
downloadlinux-14bd41e41899cbd1de4bb5ddfa46c85b08091a69.tar.xz
Merge tag 'fsnotify_for_v5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fsnotify updates from Jan Kara: "A few fsnotify fixes from Amir fixing fallout from big fsnotify overhaul a few months back and an improvement of defaults limiting maximum number of inotify watches from Waiman" * tag 'fsnotify_for_v5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: fsnotify: fix events reported to watching parent and child inotify: convert to handle_inode_event() interface fsnotify: generalize handle_inode_event() inotify: Increase default inotify.max_user_watches limit to 1048576
-rw-r--r--fs/nfsd/filecache.c2
-rw-r--r--fs/notify/dnotify/dnotify.c2
-rw-r--r--fs/notify/fanotify/fanotify.c7
-rw-r--r--fs/notify/fsnotify.c107
-rw-r--r--fs/notify/inotify/inotify.h9
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c51
-rw-r--r--fs/notify/inotify/inotify_user.c31
-rw-r--r--include/linux/fsnotify_backend.h9
-rw-r--r--kernel/audit_fsnotify.c2
-rw-r--r--kernel/audit_tree.c2
-rw-r--r--kernel/audit_watch.c2
11 files changed, 120 insertions, 104 deletions
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index d77c624c61f6..53fcbf79bdca 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -600,7 +600,7 @@ static struct notifier_block nfsd_file_lease_notifier = {
static int
nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
struct inode *inode, struct inode *dir,
- const struct qstr *name)
+ const struct qstr *name, u32 cookie)
{
trace_nfsd_file_fsnotify_handle_event(inode, mask);
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 5486aaca60b0..e85e13c50d6d 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -72,7 +72,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
*/
static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
- const struct qstr *name)
+ const struct qstr *name, u32 cookie)
{
struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9167884a61ec..1192c9953620 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -268,12 +268,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
continue;
/*
- * If the event is for a child and this mark is on a parent not
+ * If the event is on a child and this mark is on a parent not
* watching children, don't send it!
*/
- if (event_mask & FS_EVENT_ON_CHILD &&
- type == FSNOTIFY_OBJ_TYPE_INODE &&
- !(mark->mask & FS_EVENT_ON_CHILD))
+ if (type == FSNOTIFY_OBJ_TYPE_PARENT &&
+ !(mark->mask & FS_EVENT_ON_CHILD))
continue;
marks_mask |= mark->mask;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 8d3ad5ef2925..30d422b8c0fc 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -152,6 +152,13 @@ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
if (mask & FS_ISDIR)
return false;
+ /*
+ * All events that are possible on child can also may be reported with
+ * parent/name info to inode/sb/mount. Otherwise, a watching parent
+ * could result in events reported with unexpected name info to sb/mount.
+ */
+ BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);
+
/* Did either inode/sb/mount subscribe for events with parent/name? */
marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
@@ -232,47 +239,76 @@ notify:
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
+static int fsnotify_handle_inode_event(struct fsnotify_group *group,
+ struct fsnotify_mark *inode_mark,
+ u32 mask, const void *data, int data_type,
+ struct inode *dir, const struct qstr *name,
+ u32 cookie)
+{
+ const struct path *path = fsnotify_data_path(data, data_type);
+ struct inode *inode = fsnotify_data_inode(data, data_type);
+ const struct fsnotify_ops *ops = group->ops;
+
+ if (WARN_ON_ONCE(!ops->handle_inode_event))
+ return 0;
+
+ if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+ path && d_unlinked(path->dentry))
+ return 0;
+
+ /* Check interest of this mark in case event was sent with two marks */
+ if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS))
+ return 0;
+
+ return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
+}
+
static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
const void *data, int data_type,
struct inode *dir, const struct qstr *name,
u32 cookie, struct fsnotify_iter_info *iter_info)
{
struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
- struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
- struct inode *inode = fsnotify_data_inode(data, data_type);
- const struct fsnotify_ops *ops = group->ops;
+ struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info);
int ret;
- if (WARN_ON_ONCE(!ops->handle_inode_event))
- return 0;
-
if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
return 0;
- /*
- * An event can be sent on child mark iterator instead of inode mark
- * iterator because of other groups that have interest of this inode
- * and have marks on both parent and child. We can simplify this case.
- */
- if (!inode_mark) {
- inode_mark = child_mark;
- child_mark = NULL;
+ if (parent_mark) {
+ /*
+ * parent_mark indicates that the parent inode is watching
+ * children and interested in this event, which is an event
+ * possible on child. But is *this mark* watching children and
+ * interested in this event?
+ */
+ if (parent_mark->mask & FS_EVENT_ON_CHILD) {
+ ret = fsnotify_handle_inode_event(group, parent_mark, mask,
+ data, data_type, dir, name, 0);
+ if (ret)
+ return ret;
+ }
+ if (!inode_mark)
+ return 0;
+ }
+
+ if (mask & FS_EVENT_ON_CHILD) {
+ /*
+ * Some events can be sent on both parent dir and child marks
+ * (e.g. FS_ATTRIB). If both parent dir and child are
+ * watching, report the event once to parent dir with name (if
+ * interested) and once to child without name (if interested).
+ * The child watcher is expecting an event without a file name
+ * and without the FS_EVENT_ON_CHILD flag.
+ */
+ mask &= ~FS_EVENT_ON_CHILD;
dir = NULL;
name = NULL;
}
- ret = ops->handle_inode_event(inode_mark, mask, inode, dir, name);
- if (ret || !child_mark)
- return ret;
-
- /*
- * Some events can be sent on both parent dir and child marks
- * (e.g. FS_ATTRIB). If both parent dir and child are watching,
- * report the event once to parent dir with name and once to child
- * without name.
- */
- return ops->handle_inode_event(child_mark, mask, inode, NULL, NULL);
+ return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
+ dir, name, cookie);
}
static int send_to_group(__u32 mask, const void *data, int data_type,
@@ -430,7 +466,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
struct fsnotify_iter_info iter_info = {};
struct super_block *sb;
struct mount *mnt = NULL;
- struct inode *child = NULL;
+ struct inode *parent = NULL;
int ret = 0;
__u32 test_mask, marks_mask;
@@ -442,11 +478,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
inode = dir;
} else if (mask & FS_EVENT_ON_CHILD) {
/*
- * Event on child - report on TYPE_INODE to dir if it is
- * watching children and on TYPE_CHILD to child.
+ * Event on child - report on TYPE_PARENT to dir if it is
+ * watching children and on TYPE_INODE to child.
*/
- child = inode;
- inode = dir;
+ parent = dir;
}
sb = inode->i_sb;
@@ -460,7 +495,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
if (!sb->s_fsnotify_marks &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
- (!child || !child->i_fsnotify_marks))
+ (!parent || !parent->i_fsnotify_marks))
return 0;
marks_mask = sb->s_fsnotify_mask;
@@ -468,8 +503,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
marks_mask |= mnt->mnt_fsnotify_mask;
if (inode)
marks_mask |= inode->i_fsnotify_mask;
- if (child)
- marks_mask |= child->i_fsnotify_mask;
+ if (parent)
+ marks_mask |= parent->i_fsnotify_mask;
/*
@@ -492,9 +527,9 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
fsnotify_first_mark(&inode->i_fsnotify_marks);
}
- if (child) {
- iter_info.marks[FSNOTIFY_OBJ_TYPE_CHILD] =
- fsnotify_first_mark(&child->i_fsnotify_marks);
+ if (parent) {
+ iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] =
+ fsnotify_first_mark(&parent->i_fsnotify_marks);
}
/*
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 4327d0e9c364..2007e3711916 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -24,11 +24,10 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
-extern int inotify_handle_event(struct fsnotify_group *group, u32 mask,
- const void *data, int data_type,
- struct inode *dir,
- const struct qstr *file_name, u32 cookie,
- struct fsnotify_iter_info *iter_info);
+extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
+ u32 mask, struct inode *inode,
+ struct inode *dir,
+ const struct qstr *name, u32 cookie);
extern const struct fsnotify_ops inotify_fsnotify_ops;
extern struct kmem_cache *inotify_inode_mark_cachep;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 9ddcbadc98e2..1901d799909b 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -55,25 +55,21 @@ static int inotify_merge(struct list_head *list,
return event_compare(last_event, event);
}
-static int inotify_one_event(struct fsnotify_group *group, u32 mask,
- struct fsnotify_mark *inode_mark,
- const struct path *path,
- const struct qstr *file_name, u32 cookie)
+int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
+ struct inode *inode, struct inode *dir,
+ const struct qstr *name, u32 cookie)
{
struct inotify_inode_mark *i_mark;
struct inotify_event_info *event;
struct fsnotify_event *fsn_event;
+ struct fsnotify_group *group = inode_mark->group;
int ret;
int len = 0;
int alloc_len = sizeof(struct inotify_event_info);
struct mem_cgroup *old_memcg;
- if ((inode_mark->mask & FS_EXCL_UNLINK) &&
- path && d_unlinked(path->dentry))
- return 0;
-
- if (file_name) {
- len = file_name->len;
+ if (name) {
+ len = name->len;
alloc_len += len + 1;
}
@@ -117,7 +113,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
event->sync_cookie = cookie;
event->name_len = len;
if (len)
- strcpy(event->name, file_name->name);
+ strcpy(event->name, name->name);
ret = fsnotify_add_event(group, fsn_event, inotify_merge);
if (ret) {
@@ -131,37 +127,6 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
return 0;
}
-int inotify_handle_event(struct fsnotify_group *group, u32 mask,
- const void *data, int data_type, struct inode *dir,
- const struct qstr *file_name, u32 cookie,
- struct fsnotify_iter_info *iter_info)
-{
- const struct path *path = fsnotify_data_path(data, data_type);
- struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
- struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
- int ret = 0;
-
- if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
- return 0;
-
- /*
- * Some events cannot be sent on both parent and child marks
- * (e.g. IN_CREATE). Those events are always sent on inode_mark.
- * For events that are possible on both parent and child (e.g. IN_OPEN),
- * event is sent on inode_mark with name if the parent is watching and
- * is sent on child_mark without name if child is watching.
- * If both parent and child are watching, report the event with child's
- * name here and report another event without child's name below.
- */
- if (inode_mark)
- ret = inotify_one_event(group, mask, inode_mark, path,
- file_name, cookie);
- if (ret || !child_mark)
- return ret;
-
- return inotify_one_event(group, mask, child_mark, path, NULL, 0);
-}
-
static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
{
inotify_ignored_and_remove_idr(fsn_mark, group);
@@ -227,7 +192,7 @@ static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
}
const struct fsnotify_ops inotify_fsnotify_ops = {
- .handle_event = inotify_handle_event,
+ .handle_inode_event = inotify_handle_inode_event,
.free_group_priv = inotify_free_group_priv,
.free_event = inotify_free_event,
.freeing_mark = inotify_freeing_mark,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 186722ba3894..59c177011a0f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -37,6 +37,15 @@
#include <asm/ioctls.h>
+/*
+ * An inotify watch requires allocating an inotify_inode_mark structure as
+ * well as pinning the watched inode. Doubling the size of a VFS inode
+ * should be more than enough to cover the additional filesystem inode
+ * size increase.
+ */
+#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \
+ 2 * sizeof(struct inode))
+
/* configurable via /proc/sys/fs/inotify/ */
static int inotify_max_queued_events __read_mostly;
@@ -486,14 +495,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark *i_mark;
- struct fsnotify_iter_info iter_info = { };
-
- fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE,
- fsn_mark);
/* Queue ignore event for the watch */
- inotify_handle_event(group, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE,
- NULL, NULL, 0, &iter_info);
+ inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL,
+ 0);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
/* remove this mark from the idr */
@@ -801,6 +806,18 @@ out:
*/
static int __init inotify_user_setup(void)
{
+ unsigned long watches_max;
+ struct sysinfo si;
+
+ si_meminfo(&si);
+ /*
+ * Allow up to 1% of addressable memory to be allocated for inotify
+ * watches (per user) limited to the range [8192, 1048576].
+ */
+ watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) /
+ INOTIFY_WATCH_COST;
+ watches_max = clamp(watches_max, 8192UL, 1048576UL);
+
BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
@@ -827,7 +844,7 @@ static int __init inotify_user_setup(void)
inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
- init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
+ init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;
return 0;
}
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index f8529a3a2923..a2e42d3cd87c 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -137,6 +137,7 @@ struct mem_cgroup;
* if @file_name is not NULL, this is the directory that
* @file_name is relative to.
* @file_name: optional file name associated with event
+ * @cookie: inotify rename cookie
*
* free_group_priv - called when a group refcnt hits 0 to clean up the private union
* freeing_mark - called when a mark is being destroyed for some reason. The group
@@ -151,7 +152,7 @@ struct fsnotify_ops {
struct fsnotify_iter_info *iter_info);
int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask,
struct inode *inode, struct inode *dir,
- const struct qstr *file_name);
+ const struct qstr *file_name, u32 cookie);
void (*free_group_priv)(struct fsnotify_group *group);
void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
void (*free_event)(struct fsnotify_event *event);
@@ -277,7 +278,7 @@ static inline const struct path *fsnotify_data_path(const void *data,
enum fsnotify_obj_type {
FSNOTIFY_OBJ_TYPE_INODE,
- FSNOTIFY_OBJ_TYPE_CHILD,
+ FSNOTIFY_OBJ_TYPE_PARENT,
FSNOTIFY_OBJ_TYPE_VFSMOUNT,
FSNOTIFY_OBJ_TYPE_SB,
FSNOTIFY_OBJ_TYPE_COUNT,
@@ -285,7 +286,7 @@ enum fsnotify_obj_type {
};
#define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE)
-#define FSNOTIFY_OBJ_TYPE_CHILD_FL (1U << FSNOTIFY_OBJ_TYPE_CHILD)
+#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT)
#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB)
#define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
@@ -330,7 +331,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
}
FSNOTIFY_ITER_FUNCS(inode, INODE)
-FSNOTIFY_ITER_FUNCS(child, CHILD)
+FSNOTIFY_ITER_FUNCS(parent, PARENT)
FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
FSNOTIFY_ITER_FUNCS(sb, SB)
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index bfcfcd61adb6..5b3f01da172b 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -154,7 +154,7 @@ static void audit_autoremove_mark_rule(struct audit_fsnotify_mark *audit_mark)
/* Update mark data in audit rules based on fsnotify events. */
static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
- const struct qstr *dname)
+ const struct qstr *dname, u32 cookie)
{
struct audit_fsnotify_mark *audit_mark;
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 83e1c07fc99e..6c91902f4f45 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -1037,7 +1037,7 @@ static void evict_chunk(struct audit_chunk *chunk)
static int audit_tree_handle_event(struct fsnotify_mark *mark, u32 mask,
struct inode *inode, struct inode *dir,
- const struct qstr *file_name)
+ const struct qstr *file_name, u32 cookie)
{
return 0;
}
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 246e5ba704c0..2acf7ca49154 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -466,7 +466,7 @@ void audit_remove_watch_rule(struct audit_krule *krule)
/* Update watch data in audit rules based on fsnotify events. */
static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
struct inode *inode, struct inode *dir,
- const struct qstr *dname)
+ const struct qstr *dname, u32 cookie)
{
struct audit_parent *parent;