summaryrefslogtreecommitdiff
path: root/fs/notify
diff options
context:
space:
mode:
Diffstat (limited to 'fs/notify')
-rw-r--r--fs/notify/dnotify/dnotify.c26
-rw-r--r--fs/notify/fanotify/fanotify.c48
-rw-r--r--fs/notify/fanotify/fanotify.h20
-rw-r--r--fs/notify/fanotify/fanotify_user.c284
-rw-r--r--fs/notify/fdinfo.c24
-rw-r--r--fs/notify/fsnotify.c72
-rw-r--r--fs/notify/fsnotify.h39
-rw-r--r--fs/notify/group.c11
-rw-r--r--fs/notify/inotify/inotify_user.c19
-rw-r--r--fs/notify/mark.c228
10 files changed, 447 insertions, 324 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index ebdcc25df0f7..0539c2a328c7 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -29,7 +29,6 @@ static struct ctl_table dnotify_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {}
};
static void __init dnotify_sysctl_init(void)
{
@@ -39,9 +38,9 @@ static void __init dnotify_sysctl_init(void)
#define dnotify_sysctl_init() do { } while (0)
#endif
-static struct kmem_cache *dnotify_struct_cache __read_mostly;
-static struct kmem_cache *dnotify_mark_cache __read_mostly;
-static struct fsnotify_group *dnotify_group __read_mostly;
+static struct kmem_cache *dnotify_struct_cache __ro_after_init;
+static struct kmem_cache *dnotify_mark_cache __ro_after_init;
+static struct fsnotify_group *dnotify_group __ro_after_init;
/*
* dnotify will attach one of these to each inode (i_fsnotify_marks) which
@@ -111,7 +110,7 @@ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
prev = &dn->dn_next;
continue;
}
- fown = &dn->dn_filp->f_owner;
+ fown = file_f_owner(dn->dn_filp);
send_sigio(fown, dn->dn_fd, POLL_MSG);
if (dn->dn_mask & FS_DN_MULTISHOT)
prev = &dn->dn_next;
@@ -163,7 +162,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
if (!S_ISDIR(inode->i_mode))
return;
- fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
+ fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group);
if (!fsn_mark)
return;
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -265,7 +264,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
struct dnotify_struct *dn;
struct inode *inode;
fl_owner_t id = current->files;
- struct file *f;
+ struct file *f = NULL;
int destroy = 0, error = 0;
__u32 mask;
@@ -310,6 +309,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
+ error = file_f_owner_allocate(filp);
+ if (error)
+ goto out_err;
+
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
if (!new_dn_mark) {
@@ -327,7 +330,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
fsnotify_group_lock(dnotify_group);
/* add the new_fsn_mark or find an old one. */
- fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
+ fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group);
if (fsn_mark) {
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
spin_lock(&fsn_mark->lock);
@@ -345,7 +348,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
}
rcu_read_lock();
- f = lookup_fd_rcu(fd);
+ f = lookup_fdget_rcu(fd);
rcu_read_unlock();
/* if (f != filp) means that we lost a race and another task/thread
@@ -392,6 +395,8 @@ out_err:
fsnotify_put_mark(new_fsn_mark);
if (dn)
kmem_cache_free(dnotify_struct_cache, dn);
+ if (f)
+ fput(f);
return error;
}
@@ -401,8 +406,7 @@ static int __init dnotify_init(void)
SLAB_PANIC|SLAB_ACCOUNT);
dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
- dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops,
- FSNOTIFY_GROUP_NOFS);
+ dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, 0);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
dnotify_sysctl_init();
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9dac7f6e72d2..bb00e1e16838 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -29,12 +29,6 @@ static unsigned int fanotify_hash_path(const struct path *path)
hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS);
}
-static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
- __kernel_fsid_t *fsid2)
-{
- return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
-}
-
static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid)
{
return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^
@@ -234,8 +228,10 @@ static int fanotify_get_response(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- ret = wait_event_killable(group->fanotify_data.access_waitq,
- event->state == FAN_EVENT_ANSWERED);
+ ret = wait_event_state(group->fanotify_data.access_waitq,
+ event->state == FAN_EVENT_ANSWERED,
+ (TASK_KILLABLE|TASK_FREEZABLE));
+
/* Signal pending? */
if (ret < 0) {
spin_lock(&group->notification_lock);
@@ -445,7 +441,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
dwords = fh_len >> 2;
type = exportfs_encode_fid(inode, buf, &dwords);
err = -EINVAL;
- if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2)
+ /*
+ * Unlike file_handle, type and len of struct fanotify_fh are u8.
+ * Traditionally, filesystem return handle_type < 0xff, but there
+ * is no enforecement for that in vfs.
+ */
+ BUILD_BUG_ON(MAX_HANDLE_SZ > 0xff || FILEID_INVALID > 0xff);
+ if (type <= 0 || type >= FILEID_INVALID || fh_len != dwords << 2)
goto out_err;
fh->type = type;
@@ -838,9 +840,8 @@ out:
}
/*
- * Get cached fsid of the filesystem containing the object from any connector.
- * All connectors are supposed to have the same fsid, but we do not verify that
- * here.
+ * Get cached fsid of the filesystem containing the object from any mark.
+ * All marks are supposed to have the same fsid, but we do not verify that here.
*/
static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
{
@@ -849,18 +850,11 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
__kernel_fsid_t fsid = {};
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
- struct fsnotify_mark_connector *conn;
-
- conn = READ_ONCE(mark->connector);
- /* Mark is just getting destroyed or created? */
- if (!conn)
- continue;
- if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID))
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID))
continue;
- /* Pairs with smp_wmb() in fsnotify_add_mark_list() */
- smp_rmb();
- fsid = conn->fsid;
- if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
+ fsid = FANOTIFY_MARK(mark)->fsid;
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) &&
+ WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
continue;
return fsid;
}
@@ -942,12 +936,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
return 0;
}
- if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
+ if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS))
fsid = fanotify_get_fsid(iter_info);
- /* Racing with mark destruction or creation? */
- if (!fsid.val[0] && !fsid.val[1])
- return 0;
- }
event = fanotify_alloc_event(group, mask, data, data_type, dir,
file_name, &fsid, match_mask);
@@ -1068,7 +1058,7 @@ static void fanotify_freeing_mark(struct fsnotify_mark *mark,
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
{
- kmem_cache_free(fanotify_mark_cache, fsn_mark);
+ kmem_cache_free(fanotify_mark_cache, FANOTIFY_MARK(fsn_mark));
}
const struct fsnotify_ops fanotify_fsnotify_ops = {
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index e8a3c28c5d12..e5ab33cae6a7 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -275,9 +275,9 @@ static inline void fanotify_init_event(struct fanotify_event *event,
#define FANOTIFY_INLINE_FH(name, size) \
struct { \
- struct fanotify_fh (name); \
+ struct fanotify_fh name; \
/* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \
- unsigned char _inline_fh_buf[(size)]; \
+ unsigned char _inline_fh_buf[size]; \
}
struct fanotify_fid_event {
@@ -489,6 +489,22 @@ static inline unsigned int fanotify_event_hash_bucket(
return event->hash & FANOTIFY_HTABLE_MASK;
}
+struct fanotify_mark {
+ struct fsnotify_mark fsn_mark;
+ __kernel_fsid_t fsid;
+};
+
+static inline struct fanotify_mark *FANOTIFY_MARK(struct fsnotify_mark *mark)
+{
+ return container_of(mark, struct fanotify_mark, fsn_mark);
+}
+
+static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
+ __kernel_fsid_t *fsid2)
+{
+ return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
+}
+
static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
{
unsigned int mflags = 0;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index f4798d613dc2..8e2d43fc6f7c 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -23,7 +23,7 @@
#include <asm/ioctls.h>
-#include "../../mount.h"
+#include "../fsnotify.h"
#include "../fdinfo.h"
#include "fanotify.h"
@@ -86,7 +86,6 @@ static struct ctl_table fanotify_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO
},
- { }
};
static void __init fanotify_sysctls_init(void)
@@ -112,10 +111,10 @@ static void __init fanotify_sysctls_init(void)
extern const struct fsnotify_ops fanotify_fsnotify_ops;
-struct kmem_cache *fanotify_mark_cache __read_mostly;
-struct kmem_cache *fanotify_fid_event_cachep __read_mostly;
-struct kmem_cache *fanotify_path_event_cachep __read_mostly;
-struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
+struct kmem_cache *fanotify_mark_cache __ro_after_init;
+struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
+struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
+struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
#define FANOTIFY_EVENT_ALIGN 4
#define FANOTIFY_FID_INFO_HDR_LEN \
@@ -1018,17 +1017,17 @@ static int fanotify_find_path(int dfd, const char __user *filename,
struct fd f = fdget(dfd);
ret = -EBADF;
- if (!f.file)
+ if (!fd_file(f))
goto out;
ret = -ENOTDIR;
if ((flags & FAN_MARK_ONLYDIR) &&
- !(S_ISDIR(file_inode(f.file)->i_mode))) {
+ !(S_ISDIR(file_inode(fd_file(f))->i_mode))) {
fdput(f);
goto out;
}
- *path = f.file->f_path;
+ *path = fd_file(f)->f_path;
path_get(path);
fdput(f);
} else {
@@ -1088,7 +1087,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
}
static int fanotify_remove_mark(struct fsnotify_group *group,
- fsnotify_connp_t *connp, __u32 mask,
+ void *obj, unsigned int obj_type, __u32 mask,
unsigned int flags, __u32 umask)
{
struct fsnotify_mark *fsn_mark = NULL;
@@ -1096,7 +1095,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
int destroy_mark;
fsnotify_group_lock(group);
- fsn_mark = fsnotify_find_mark(connp, group);
+ fsn_mark = fsnotify_find_mark(obj, obj_type, group);
if (!fsn_mark) {
fsnotify_group_unlock(group);
return -ENOENT;
@@ -1117,30 +1116,6 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
return 0;
}
-static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
- struct vfsmount *mnt, __u32 mask,
- unsigned int flags, __u32 umask)
-{
- return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
- mask, flags, umask);
-}
-
-static int fanotify_remove_sb_mark(struct fsnotify_group *group,
- struct super_block *sb, __u32 mask,
- unsigned int flags, __u32 umask)
-{
- return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask,
- flags, umask);
-}
-
-static int fanotify_remove_inode_mark(struct fsnotify_group *group,
- struct inode *inode, __u32 mask,
- unsigned int flags, __u32 umask)
-{
- return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask,
- flags, umask);
-}
-
static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
unsigned int fan_flags)
{
@@ -1203,13 +1178,71 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
return recalc;
}
+struct fan_fsid {
+ struct super_block *sb;
+ __kernel_fsid_t id;
+ bool weak;
+};
+
+static int fanotify_set_mark_fsid(struct fsnotify_group *group,
+ struct fsnotify_mark *mark,
+ struct fan_fsid *fsid)
+{
+ struct fsnotify_mark_connector *conn;
+ struct fsnotify_mark *old;
+ struct super_block *old_sb = NULL;
+
+ FANOTIFY_MARK(mark)->fsid = fsid->id;
+ mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
+ if (fsid->weak)
+ mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID;
+
+ /* First mark added will determine if group is single or multi fsid */
+ if (list_empty(&group->marks_list))
+ return 0;
+
+ /* Find sb of an existing mark */
+ list_for_each_entry(old, &group->marks_list, g_list) {
+ conn = READ_ONCE(old->connector);
+ if (!conn)
+ continue;
+ old_sb = fsnotify_connector_sb(conn);
+ if (old_sb)
+ break;
+ }
+
+ /* Only detached marks left? */
+ if (!old_sb)
+ return 0;
+
+ /* Do not allow mixing of marks with weak and strong fsid */
+ if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID)
+ return -EXDEV;
+
+ /* Allow mixing of marks with strong fsid from different fs */
+ if (!fsid->weak)
+ return 0;
+
+ /* Do not allow mixing marks with weak fsid from different fs */
+ if (old_sb != fsid->sb)
+ return -EXDEV;
+
+ /* Do not allow mixing marks from different btrfs sub-volumes */
+ if (!fanotify_fsid_equal(&FANOTIFY_MARK(old)->fsid,
+ &FANOTIFY_MARK(mark)->fsid))
+ return -EXDEV;
+
+ return 0;
+}
+
static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
- fsnotify_connp_t *connp,
+ void *obj,
unsigned int obj_type,
unsigned int fan_flags,
- __kernel_fsid_t *fsid)
+ struct fan_fsid *fsid)
{
struct ucounts *ucounts = group->fanotify_data.ucounts;
+ struct fanotify_mark *fan_mark;
struct fsnotify_mark *mark;
int ret;
@@ -1222,24 +1255,34 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
!inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS))
return ERR_PTR(-ENOSPC);
- mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
- if (!mark) {
+ fan_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
+ if (!fan_mark) {
ret = -ENOMEM;
goto out_dec_ucounts;
}
+ mark = &fan_mark->fsn_mark;
fsnotify_init_mark(mark, group);
if (fan_flags & FAN_MARK_EVICTABLE)
mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF;
- ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid);
- if (ret) {
- fsnotify_put_mark(mark);
- goto out_dec_ucounts;
+ /* Cache fsid of filesystem containing the marked object */
+ if (fsid) {
+ ret = fanotify_set_mark_fsid(group, mark, fsid);
+ if (ret)
+ goto out_put_mark;
+ } else {
+ fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0;
}
+ ret = fsnotify_add_mark_locked(mark, obj, obj_type, 0);
+ if (ret)
+ goto out_put_mark;
+
return mark;
+out_put_mark:
+ fsnotify_put_mark(mark);
out_dec_ucounts:
if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS))
dec_ucount(ucounts, UCOUNT_FANOTIFY_MARKS);
@@ -1288,18 +1331,18 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
}
static int fanotify_add_mark(struct fsnotify_group *group,
- fsnotify_connp_t *connp, unsigned int obj_type,
+ void *obj, unsigned int obj_type,
__u32 mask, unsigned int fan_flags,
- __kernel_fsid_t *fsid)
+ struct fan_fsid *fsid)
{
struct fsnotify_mark *fsn_mark;
bool recalc;
int ret = 0;
fsnotify_group_lock(group);
- fsn_mark = fsnotify_find_mark(connp, group);
+ fsn_mark = fsnotify_find_mark(obj, obj_type, group);
if (!fsn_mark) {
- fsn_mark = fanotify_add_new_mark(group, connp, obj_type,
+ fsn_mark = fanotify_add_new_mark(group, obj, obj_type,
fan_flags, fsid);
if (IS_ERR(fsn_mark)) {
fsnotify_group_unlock(group);
@@ -1336,42 +1379,6 @@ out:
return ret;
}
-static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
- struct vfsmount *mnt, __u32 mask,
- unsigned int flags, __kernel_fsid_t *fsid)
-{
- return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
-}
-
-static int fanotify_add_sb_mark(struct fsnotify_group *group,
- struct super_block *sb, __u32 mask,
- unsigned int flags, __kernel_fsid_t *fsid)
-{
- return fanotify_add_mark(group, &sb->s_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
-}
-
-static int fanotify_add_inode_mark(struct fsnotify_group *group,
- struct inode *inode, __u32 mask,
- unsigned int flags, __kernel_fsid_t *fsid)
-{
- pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
-
- /*
- * If some other task has this inode open for write we should not add
- * an ignore mask, unless that ignore mask is supposed to survive
- * modification changes anyway.
- */
- if ((flags & FANOTIFY_MARK_IGNORE_BITS) &&
- !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
- inode_is_open_for_write(inode))
- return 0;
-
- return fanotify_add_mark(group, &inode->i_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
-}
-
static struct fsnotify_event *fanotify_alloc_overflow_event(void)
{
struct fanotify_event *oevent;
@@ -1484,7 +1491,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
/* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
group = fsnotify_alloc_group(&fanotify_fsnotify_ops,
- FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS);
+ FSNOTIFY_GROUP_USER);
if (IS_ERR(group)) {
return PTR_ERR(group);
}
@@ -1520,13 +1527,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
INIT_LIST_HEAD(&group->fanotify_data.access_list);
switch (class) {
case FAN_CLASS_NOTIF:
- group->priority = FS_PRIO_0;
+ group->priority = FSNOTIFY_PRIO_NORMAL;
break;
case FAN_CLASS_CONTENT:
- group->priority = FS_PRIO_1;
+ group->priority = FSNOTIFY_PRIO_CONTENT;
break;
case FAN_CLASS_PRE_CONTENT:
- group->priority = FS_PRIO_2;
+ group->priority = FSNOTIFY_PRIO_PRE_CONTENT;
break;
default:
fd = -EINVAL;
@@ -1565,20 +1572,25 @@ out_destroy_group:
return fd;
}
-static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
+static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags,
+ struct fan_fsid *fsid)
{
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
__kernel_fsid_t root_fsid;
int err;
/*
* Make sure dentry is not of a filesystem with zero fsid (e.g. fuse).
*/
- err = vfs_get_fsid(dentry, fsid);
+ err = vfs_get_fsid(dentry, &fsid->id);
if (err)
return err;
- if (!fsid->val[0] && !fsid->val[1])
- return -ENODEV;
+ fsid->sb = dentry->d_sb;
+ if (!fsid->id.val[0] && !fsid->id.val[1]) {
+ err = -ENODEV;
+ goto weak;
+ }
/*
* Make sure dentry is not of a filesystem subvolume (e.g. btrfs)
@@ -1588,11 +1600,18 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
if (err)
return err;
- if (root_fsid.val[0] != fsid->val[0] ||
- root_fsid.val[1] != fsid->val[1])
- return -EXDEV;
+ if (!fanotify_fsid_equal(&root_fsid, &fsid->id)) {
+ err = -EXDEV;
+ goto weak;
+ }
+ fsid->weak = false;
return 0;
+
+weak:
+ /* Allow weak fsid when marking inodes */
+ fsid->weak = true;
+ return (mark_type == FAN_MARK_INODE) ? 0 : err;
}
/* Check if filesystem can encode a unique fid */
@@ -1606,7 +1625,7 @@ static int fanotify_test_fid(struct dentry *dentry, unsigned int flags)
* file handles so user can use name_to_handle_at() to compare fids
* reported with events to the file handle of watched objects.
*/
- if (!nop)
+ if (!exportfs_can_encode_fid(nop))
return -EOPNOTSUPP;
/*
@@ -1614,7 +1633,7 @@ static int fanotify_test_fid(struct dentry *dentry, unsigned int flags)
* supports decoding file handles, so user has a way to map back the
* reported fids to filesystem objects.
*/
- if (mark_type != FAN_MARK_INODE && !nop->fh_to_dentry)
+ if (mark_type != FAN_MARK_INODE && !exportfs_can_decode_fh(nop))
return -EOPNOTSUPP;
return 0;
@@ -1676,12 +1695,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
struct fsnotify_group *group;
struct fd f;
struct path path;
- __kernel_fsid_t __fsid, *fsid = NULL;
+ struct fan_fsid __fsid, *fsid = NULL;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
+ void *obj;
u32 umask = 0;
int ret;
@@ -1744,14 +1764,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
f = fdget(fanotify_fd);
- if (unlikely(!f.file))
+ if (unlikely(!fd_file(f)))
return -EBADF;
/* verify that this is indeed an fanotify instance */
ret = -EINVAL;
- if (unlikely(f.file->f_op != &fanotify_fops))
+ if (unlikely(fd_file(f)->f_op != &fanotify_fops))
goto fput_and_out;
- group = f.file->private_data;
+ group = fd_file(f)->private_data;
/*
* An unprivileged user is not allowed to setup mount nor filesystem
@@ -1765,12 +1785,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto fput_and_out;
/*
- * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
- * allowed to set permissions events.
+ * Permission events require minimum priority FAN_CLASS_CONTENT.
*/
ret = -EINVAL;
if (mask & FANOTIFY_PERM_EVENTS &&
- group->priority == FS_PRIO_0)
+ group->priority < FSNOTIFY_PRIO_CONTENT)
goto fput_and_out;
if (mask & FAN_FS_ERROR &&
@@ -1828,7 +1847,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
if (fid_mode) {
- ret = fanotify_test_fsid(path.dentry, &__fsid);
+ ret = fanotify_test_fsid(path.dentry, flags, &__fsid);
if (ret)
goto path_put_and_out;
@@ -1840,17 +1859,34 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* inode held in place by reference to path; group by fget on fd */
- if (mark_type == FAN_MARK_INODE)
+ if (mark_type == FAN_MARK_INODE) {
inode = path.dentry->d_inode;
- else
+ obj = inode;
+ } else {
mnt = path.mnt;
+ if (mark_type == FAN_MARK_MOUNT)
+ obj = mnt;
+ else
+ obj = mnt->mnt_sb;
+ }
+
+ /*
+ * If some other task has this inode open for write we should not add
+ * an ignore mask, unless that ignore mask is supposed to survive
+ * modification changes anyway.
+ */
+ if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
+ !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
+ ret = mnt ? -EINVAL : -EISDIR;
+ /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
+ if (ignore == FAN_MARK_IGNORE &&
+ (mnt || S_ISDIR(inode->i_mode)))
+ goto path_put_and_out;
- ret = mnt ? -EINVAL : -EISDIR;
- /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
- if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE &&
- (mnt || S_ISDIR(inode->i_mode)) &&
- !(flags & FAN_MARK_IGNORED_SURV_MODIFY))
- goto path_put_and_out;
+ ret = 0;
+ if (inode && inode_is_open_for_write(inode))
+ goto path_put_and_out;
+ }
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
@@ -1868,26 +1904,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
/* create/update an inode mark */
switch (mark_cmd) {
case FAN_MARK_ADD:
- if (mark_type == FAN_MARK_MOUNT)
- ret = fanotify_add_vfsmount_mark(group, mnt, mask,
- flags, fsid);
- else if (mark_type == FAN_MARK_FILESYSTEM)
- ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask,
- flags, fsid);
- else
- ret = fanotify_add_inode_mark(group, inode, mask,
- flags, fsid);
+ ret = fanotify_add_mark(group, obj, obj_type, mask, flags,
+ fsid);
break;
case FAN_MARK_REMOVE:
- if (mark_type == FAN_MARK_MOUNT)
- ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
- flags, umask);
- else if (mark_type == FAN_MARK_FILESYSTEM)
- ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
- flags, umask);
- else
- ret = fanotify_remove_inode_mark(group, inode, mask,
- flags, umask);
+ ret = fanotify_remove_mark(group, obj, obj_type, mask, flags,
+ umask);
break;
default:
ret = -EINVAL;
@@ -1946,7 +1968,7 @@ static int __init fanotify_user_setup(void)
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
- fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+ fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
SLAB_PANIC|SLAB_ACCOUNT);
fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event,
SLAB_PANIC);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 5c430736ec12..e933f9c65d90 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -41,29 +41,23 @@ static void show_fdinfo(struct seq_file *m, struct file *f,
#if defined(CONFIG_EXPORTFS)
static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
{
- struct {
- struct file_handle handle;
- u8 pad[MAX_HANDLE_SZ];
- } f;
+ DEFINE_FLEX(struct file_handle, f, f_handle, handle_bytes, MAX_HANDLE_SZ);
int size, ret, i;
- f.handle.handle_bytes = sizeof(f.pad);
- size = f.handle.handle_bytes >> 2;
+ size = f->handle_bytes >> 2;
- ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size);
- if ((ret == FILEID_INVALID) || (ret < 0)) {
- WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
+ ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size);
+ if ((ret == FILEID_INVALID) || (ret < 0))
return;
- }
- f.handle.handle_type = ret;
- f.handle.handle_bytes = size * sizeof(u32);
+ f->handle_type = ret;
+ f->handle_bytes = size * sizeof(u32);
seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:",
- f.handle.handle_bytes, f.handle.handle_type);
+ f->handle_bytes, f->handle_type);
- for (i = 0; i < f.handle.handle_bytes; i++)
- seq_printf(m, "%02x", (int)f.handle.f_handle[i]);
+ for (i = 0; i < f->handle_bytes; i++)
+ seq_printf(m, "%02x", (int)f->f_handle[i]);
}
#else
static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9cc4ebb53504..f976949d2634 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -89,11 +89,25 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
void fsnotify_sb_delete(struct super_block *sb)
{
+ struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+
+ /* Were any marks ever added to any object on this sb? */
+ if (!sbinfo)
+ return;
+
fsnotify_unmount_inodes(sb);
fsnotify_clear_marks_by_sb(sb);
/* Wait for outstanding object references from connectors */
- wait_var_event(&sb->s_fsnotify_connectors,
- !atomic_long_read(&sb->s_fsnotify_connectors));
+ wait_var_event(fsnotify_sb_watched_objects(sb),
+ !atomic_long_read(fsnotify_sb_watched_objects(sb)));
+ WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT));
+ WARN_ON(fsnotify_sb_has_priority_watchers(sb,
+ FSNOTIFY_PRIO_PRE_CONTENT));
+}
+
+void fsnotify_sb_free(struct super_block *sb)
+{
+ kfree(sb->s_fsnotify_info);
}
/*
@@ -120,7 +134,7 @@ void fsnotify_set_children_dentry_flags(struct inode *inode)
* d_flags to indicate parental interest (their parent is the
* original inode) */
spin_lock(&alias->d_lock);
- list_for_each_entry(child, &alias->d_subdirs, d_child) {
+ hlist_for_each_entry(child, &alias->d_children, d_sib) {
if (!child->d_inode)
continue;
@@ -152,7 +166,7 @@ static void fsnotify_clear_child_dentry_flag(struct inode *pinode,
}
/* Are inode/sb/mount interested in parent and name info with this event? */
-static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
+static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask,
__u32 mask)
{
__u32 marks_mask = 0;
@@ -169,15 +183,26 @@ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);
/* Did either inode/sb/mount subscribe for events with parent/name? */
- marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
- marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
- if (mnt)
- marks_mask |= fsnotify_parent_needed_mask(mnt->mnt_fsnotify_mask);
+ marks_mask |= fsnotify_parent_needed_mask(
+ READ_ONCE(inode->i_fsnotify_mask));
+ marks_mask |= fsnotify_parent_needed_mask(
+ READ_ONCE(inode->i_sb->s_fsnotify_mask));
+ marks_mask |= fsnotify_parent_needed_mask(mnt_mask);
/* Did they subscribe for this event with parent/name info? */
return mask & marks_mask;
}
+/* Are there any inode/mount/sb objects that are interested in this event? */
+static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
+ __u32 mask)
+{
+ __u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask |
+ READ_ONCE(inode->i_sb->s_fsnotify_mask);
+
+ return mask & marks_mask & ALL_FSNOTIFY_EVENTS;
+}
+
/*
* Notify this dentry's parent about a child's events with child name info
* if parent is watching or if inode/sb/mount are interested in events with
@@ -190,7 +215,8 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
int data_type)
{
const struct path *path = fsnotify_data_path(data, data_type);
- struct mount *mnt = path ? real_mount(path->mnt) : NULL;
+ __u32 mnt_mask = path ?
+ READ_ONCE(real_mount(path->mnt)->mnt_fsnotify_mask) : 0;
struct inode *inode = d_inode(dentry);
struct dentry *parent;
bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED;
@@ -201,16 +227,13 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
struct qstr *file_name = NULL;
int ret = 0;
- /*
- * Do inode/sb/mount care about parent and name info on non-dir?
- * Do they care about any event at all?
- */
- if (!inode->i_fsnotify_marks && !inode->i_sb->s_fsnotify_marks &&
- (!mnt || !mnt->mnt_fsnotify_marks) && !parent_watched)
+ /* Optimize the likely case of nobody watching this path */
+ if (likely(!parent_watched &&
+ !fsnotify_object_watched(inode, mnt_mask, mask)))
return 0;
parent = NULL;
- parent_needed = fsnotify_event_needs_parent(inode, mnt, mask);
+ parent_needed = fsnotify_event_needs_parent(inode, mnt_mask, mask);
if (!parent_watched && !parent_needed)
goto notify;
@@ -497,6 +520,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
{
const struct path *path = fsnotify_data_path(data, data_type);
struct super_block *sb = fsnotify_data_sb(data, data_type);
+ struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
struct fsnotify_iter_info iter_info = {};
struct mount *mnt = NULL;
struct inode *inode2 = NULL;
@@ -533,19 +557,19 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
* SRCU because we have no references to any objects and do not
* need SRCU to keep them "alive".
*/
- if (!sb->s_fsnotify_marks &&
+ if ((!sbinfo || !sbinfo->sb_marks) &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
(!inode2 || !inode2->i_fsnotify_marks))
return 0;
- marks_mask = sb->s_fsnotify_mask;
+ marks_mask = READ_ONCE(sb->s_fsnotify_mask);
if (mnt)
- marks_mask |= mnt->mnt_fsnotify_mask;
+ marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask);
if (inode)
- marks_mask |= inode->i_fsnotify_mask;
+ marks_mask |= READ_ONCE(inode->i_fsnotify_mask);
if (inode2)
- marks_mask |= inode2->i_fsnotify_mask;
+ marks_mask |= READ_ONCE(inode2->i_fsnotify_mask);
/*
@@ -560,8 +584,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
- iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
- fsnotify_first_mark(&sb->s_fsnotify_marks);
+ if (sbinfo) {
+ iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
+ fsnotify_first_mark(&sbinfo->sb_marks);
+ }
if (mnt) {
iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 2b4267de86e6..663759ed6fbc 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -9,39 +9,58 @@
#include "../mount.h"
+/*
+ * fsnotify_connp_t is what we embed in objects which connector can be attached
+ * to.
+ */
+typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
+
static inline struct inode *fsnotify_conn_inode(
struct fsnotify_mark_connector *conn)
{
- return container_of(conn->obj, struct inode, i_fsnotify_marks);
+ return conn->obj;
}
static inline struct mount *fsnotify_conn_mount(
struct fsnotify_mark_connector *conn)
{
- return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
+ return real_mount(conn->obj);
}
static inline struct super_block *fsnotify_conn_sb(
struct fsnotify_mark_connector *conn)
{
- return container_of(conn->obj, struct super_block, s_fsnotify_marks);
+ return conn->obj;
}
-static inline struct super_block *fsnotify_connector_sb(
- struct fsnotify_mark_connector *conn)
+static inline struct super_block *fsnotify_object_sb(void *obj,
+ enum fsnotify_obj_type obj_type)
{
- switch (conn->type) {
+ switch (obj_type) {
case FSNOTIFY_OBJ_TYPE_INODE:
- return fsnotify_conn_inode(conn)->i_sb;
+ return ((struct inode *)obj)->i_sb;
case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
- return fsnotify_conn_mount(conn)->mnt.mnt_sb;
+ return ((struct vfsmount *)obj)->mnt_sb;
case FSNOTIFY_OBJ_TYPE_SB:
- return fsnotify_conn_sb(conn);
+ return (struct super_block *)obj;
default:
return NULL;
}
}
+static inline struct super_block *fsnotify_connector_sb(
+ struct fsnotify_mark_connector *conn)
+{
+ return fsnotify_object_sb(conn->obj, conn->type);
+}
+
+static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb)
+{
+ struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+
+ return sbinfo ? &sbinfo->sb_marks : NULL;
+}
+
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
@@ -67,7 +86,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
/* run the list of all marks associated with sb and destroy them */
static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
{
- fsnotify_destroy_marks(&sb->s_fsnotify_marks);
+ fsnotify_destroy_marks(fsnotify_sb_marks(sb));
}
/*
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 1de6631a3925..18446b7b0d49 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -115,7 +115,6 @@ static struct fsnotify_group *__fsnotify_alloc_group(
const struct fsnotify_ops *ops,
int flags, gfp_t gfp)
{
- static struct lock_class_key nofs_marks_lock;
struct fsnotify_group *group;
group = kzalloc(sizeof(struct fsnotify_group), gfp);
@@ -136,16 +135,6 @@ static struct fsnotify_group *__fsnotify_alloc_group(
group->ops = ops;
group->flags = flags;
- /*
- * For most backends, eviction of inode with a mark is not expected,
- * because marks hold a refcount on the inode against eviction.
- *
- * Use a different lockdep class for groups that support evictable
- * inode marks, because with evictable marks, mark_mutex is NOT
- * fs-reclaim safe - the mutex is taken when evicting inodes.
- */
- if (flags & FSNOTIFY_GROUP_NOFS)
- lockdep_set_class(&group->mark_mutex, &nofs_marks_lock);
return group;
}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1c4bfdab008d..0794dcaf1e47 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -49,7 +49,7 @@
/* configurable via /proc/sys/fs/inotify/ */
static int inotify_max_queued_events __read_mostly;
-struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
+struct kmem_cache *inotify_inode_mark_cachep __ro_after_init;
#ifdef CONFIG_SYSCTL
@@ -85,7 +85,6 @@ static struct ctl_table inotify_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO
},
- { }
};
static void __init inotify_sysctls_init(void)
@@ -545,7 +544,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
int create = (arg & IN_MASK_CREATE);
int ret;
- fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
+ fsn_mark = fsnotify_find_inode_mark(inode, group);
if (!fsn_mark)
return -ENOENT;
else if (create) {
@@ -570,7 +569,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
/* more bits in old than in new? */
int dropped = (old_mask & ~new_mask);
/* more bits in this fsn_mark than the inode's mask? */
- int do_inode = (new_mask & ~inode->i_fsnotify_mask);
+ int do_inode = (new_mask & ~READ_ONCE(inode->i_fsnotify_mask));
/* update the inode with this new fsn_mark */
if (dropped || do_inode)
@@ -754,7 +753,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
return -EINVAL;
f = fdget(fd);
- if (unlikely(!f.file))
+ if (unlikely(!fd_file(f)))
return -EBADF;
/* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
@@ -764,7 +763,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
}
/* verify that this is indeed an inotify instance */
- if (unlikely(f.file->f_op != &inotify_fops)) {
+ if (unlikely(fd_file(f)->f_op != &inotify_fops)) {
ret = -EINVAL;
goto fput_and_out;
}
@@ -781,7 +780,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
/* inode held in place by reference to path; group by fget on fd */
inode = path.dentry->d_inode;
- group = f.file->private_data;
+ group = fd_file(f)->private_data;
/* create/update an inode mark */
ret = inotify_update_watch(group, inode, mask);
@@ -799,14 +798,14 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
int ret = -EINVAL;
f = fdget(fd);
- if (unlikely(!f.file))
+ if (unlikely(!fd_file(f)))
return -EBADF;
/* verify that this is indeed an inotify instance */
- if (unlikely(f.file->f_op != &inotify_fops))
+ if (unlikely(fd_file(f)->f_op != &inotify_fops))
goto out;
- group = f.file->private_data;
+ group = fd_file(f)->private_data;
i_mark = inotify_idr_find(group, wd);
if (unlikely(!i_mark))
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 4be6e883d492..4981439e6209 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -97,6 +97,21 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
refcount_inc(&mark->refcnt);
}
+static fsnotify_connp_t *fsnotify_object_connp(void *obj,
+ enum fsnotify_obj_type obj_type)
+{
+ switch (obj_type) {
+ case FSNOTIFY_OBJ_TYPE_INODE:
+ return &((struct inode *)obj)->i_fsnotify_marks;
+ case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
+ return &real_mount(obj)->mnt_fsnotify_marks;
+ case FSNOTIFY_OBJ_TYPE_SB:
+ return fsnotify_sb_marks(obj);
+ default:
+ return NULL;
+ }
+}
+
static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
{
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
@@ -113,13 +128,78 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
return 0;
- return *fsnotify_conn_mask_p(conn);
+ return READ_ONCE(*fsnotify_conn_mask_p(conn));
+}
+
+static void fsnotify_get_sb_watched_objects(struct super_block *sb)
+{
+ atomic_long_inc(fsnotify_sb_watched_objects(sb));
+}
+
+static void fsnotify_put_sb_watched_objects(struct super_block *sb)
+{
+ atomic_long_t *watched_objects = fsnotify_sb_watched_objects(sb);
+
+ /* the superblock can go away after this decrement */
+ if (atomic_long_dec_and_test(watched_objects))
+ wake_up_var(watched_objects);
}
static void fsnotify_get_inode_ref(struct inode *inode)
{
ihold(inode);
- atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
+ fsnotify_get_sb_watched_objects(inode->i_sb);
+}
+
+static void fsnotify_put_inode_ref(struct inode *inode)
+{
+ /* read ->i_sb before the inode can go away */
+ struct super_block *sb = inode->i_sb;
+
+ iput(inode);
+ fsnotify_put_sb_watched_objects(sb);
+}
+
+/*
+ * Grab or drop watched objects reference depending on whether the connector
+ * is attached and has any marks attached.
+ */
+static void fsnotify_update_sb_watchers(struct super_block *sb,
+ struct fsnotify_mark_connector *conn)
+{
+ struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+ bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED;
+ struct fsnotify_mark *first_mark = NULL;
+ unsigned int highest_prio = 0;
+
+ if (conn->obj)
+ first_mark = hlist_entry_safe(conn->list.first,
+ struct fsnotify_mark, obj_list);
+ if (first_mark)
+ highest_prio = first_mark->group->priority;
+ if (WARN_ON(highest_prio >= __FSNOTIFY_PRIO_NUM))
+ highest_prio = 0;
+
+ /*
+ * If the highest priority of group watching this object is prio,
+ * then watched object has a reference on counters [0..prio].
+ * Update priority >= 1 watched objects counters.
+ */
+ for (unsigned int p = conn->prio + 1; p <= highest_prio; p++)
+ atomic_long_inc(&sbinfo->watched_objects[p]);
+ for (unsigned int p = conn->prio; p > highest_prio; p--)
+ atomic_long_dec(&sbinfo->watched_objects[p]);
+ conn->prio = highest_prio;
+
+ /* Update priority >= 0 (a.k.a total) watched objects counter */
+ BUILD_BUG_ON(FSNOTIFY_PRIO_NORMAL != 0);
+ if (first_mark && !is_watched) {
+ conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED;
+ fsnotify_get_sb_watched_objects(sb);
+ } else if (!first_mark && is_watched) {
+ conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED;
+ fsnotify_put_sb_watched_objects(sb);
+ }
}
/*
@@ -171,7 +251,11 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
!(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
want_iref = true;
}
- *fsnotify_conn_mask_p(conn) = new_mask;
+ /*
+ * We use WRITE_ONCE() to prevent silly compiler optimizations from
+ * confusing readers not holding conn->lock with partial updates.
+ */
+ WRITE_ONCE(*fsnotify_conn_mask_p(conn), new_mask);
return fsnotify_update_iref(conn, want_iref);
}
@@ -239,35 +323,12 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
-static void fsnotify_put_inode_ref(struct inode *inode)
-{
- struct super_block *sb = inode->i_sb;
-
- iput(inode);
- if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
- wake_up_var(&sb->s_fsnotify_connectors);
-}
-
-static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn)
-{
- struct super_block *sb = fsnotify_connector_sb(conn);
-
- if (sb)
- atomic_long_inc(&sb->s_fsnotify_connectors);
-}
-
-static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn)
-{
- struct super_block *sb = fsnotify_connector_sb(conn);
-
- if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
- wake_up_var(&sb->s_fsnotify_connectors);
-}
-
static void *fsnotify_detach_connector_from_object(
struct fsnotify_mark_connector *conn,
unsigned int *type)
{
+ fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type);
+ struct super_block *sb = fsnotify_connector_sb(conn);
struct inode *inode = NULL;
*type = conn->type;
@@ -287,10 +348,10 @@ static void *fsnotify_detach_connector_from_object(
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
}
- fsnotify_put_sb_connectors(conn);
- rcu_assign_pointer(*(conn->obj), NULL);
+ rcu_assign_pointer(*connp, NULL);
conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
+ fsnotify_update_sb_watchers(sb, conn);
return inode;
}
@@ -342,6 +403,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
objp = fsnotify_detach_connector_from_object(conn, &type);
free_conn = true;
} else {
+ struct super_block *sb = fsnotify_connector_sb(conn);
+
+ /* Update watched objects after detaching mark */
+ if (sb)
+ fsnotify_update_sb_watchers(sb, conn);
objp = __fsnotify_recalc_mask(conn);
type = conn->type;
}
@@ -562,9 +628,28 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
return -1;
}
+static int fsnotify_attach_info_to_sb(struct super_block *sb)
+{
+ struct fsnotify_sb_info *sbinfo;
+
+ /* sb info is freed on fsnotify_sb_delete() */
+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
+ if (!sbinfo)
+ return -ENOMEM;
+
+ /*
+ * cmpxchg() provides the barrier so that callers of fsnotify_sb_info()
+ * will observe an initialized structure
+ */
+ if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) {
+ /* Someone else created sbinfo for us */
+ kfree(sbinfo);
+ }
+ return 0;
+}
+
static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
- unsigned int obj_type,
- __kernel_fsid_t *fsid)
+ void *obj, unsigned int obj_type)
{
struct fsnotify_mark_connector *conn;
@@ -574,17 +659,9 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
spin_lock_init(&conn->lock);
INIT_HLIST_HEAD(&conn->list);
conn->flags = 0;
+ conn->prio = 0;
conn->type = obj_type;
- conn->obj = connp;
- /* Cache fsid of filesystem containing the object */
- if (fsid) {
- conn->fsid = *fsid;
- conn->flags = FSNOTIFY_CONN_FLAG_HAS_FSID;
- } else {
- conn->fsid.val[0] = conn->fsid.val[1] = 0;
- conn->flags = 0;
- }
- fsnotify_get_sb_connectors(conn);
+ conn->obj = obj;
/*
* cmpxchg() provides the barrier so that readers of *connp can see
@@ -592,10 +669,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
*/
if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */
- fsnotify_put_sb_connectors(conn);
kmem_cache_free(fsnotify_mark_connector_cachep, conn);
}
-
return 0;
}
@@ -632,54 +707,39 @@ out:
* to which group and for which inodes. These marks are ordered according to
* priority, highest number first, and then by the group's location in memory.
*/
-static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
- fsnotify_connp_t *connp,
- unsigned int obj_type,
- int add_flags, __kernel_fsid_t *fsid)
+static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
+ unsigned int obj_type, int add_flags)
{
+ struct super_block *sb = fsnotify_object_sb(obj, obj_type);
struct fsnotify_mark *lmark, *last = NULL;
struct fsnotify_mark_connector *conn;
+ fsnotify_connp_t *connp;
int cmp;
int err = 0;
if (WARN_ON(!fsnotify_valid_obj_type(obj_type)))
return -EINVAL;
- /* Backend is expected to check for zero fsid (e.g. tmpfs) */
- if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1]))
- return -ENODEV;
+ /*
+ * Attach the sb info before attaching a connector to any object on sb.
+ * The sb info will remain attached as long as sb lives.
+ */
+ if (!fsnotify_sb_info(sb)) {
+ err = fsnotify_attach_info_to_sb(sb);
+ if (err)
+ return err;
+ }
+ connp = fsnotify_object_connp(obj, obj_type);
restart:
spin_lock(&mark->lock);
conn = fsnotify_grab_connector(connp);
if (!conn) {
spin_unlock(&mark->lock);
- err = fsnotify_attach_connector_to_object(connp, obj_type,
- fsid);
+ err = fsnotify_attach_connector_to_object(connp, obj, obj_type);
if (err)
return err;
goto restart;
- } else if (fsid && !(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) {
- conn->fsid = *fsid;
- /* Pairs with smp_rmb() in fanotify_get_fsid() */
- smp_wmb();
- conn->flags |= FSNOTIFY_CONN_FLAG_HAS_FSID;
- } else if (fsid && (conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID) &&
- (fsid->val[0] != conn->fsid.val[0] ||
- fsid->val[1] != conn->fsid.val[1])) {
- /*
- * Backend is expected to check for non uniform fsid
- * (e.g. btrfs), but maybe we missed something?
- * Only allow setting conn->fsid once to non zero fsid.
- * inotify and non-fid fanotify groups do not set nor test
- * conn->fsid.
- */
- pr_warn_ratelimited("%s: fsid mismatch on object of type %u: "
- "%x.%x != %x.%x\n", __func__, conn->type,
- fsid->val[0], fsid->val[1],
- conn->fsid.val[0], conn->fsid.val[1]);
- err = -EXDEV;
- goto out_err;
}
/* is mark the first mark? */
@@ -710,6 +770,7 @@ restart:
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
+ fsnotify_update_sb_watchers(sb, conn);
/*
* Since connector is attached to object using cmpxchg() we are
* guaranteed that connector initialization is fully visible by anyone
@@ -728,8 +789,8 @@ out_err:
* event types should be delivered to which group.
*/
int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
- fsnotify_connp_t *connp, unsigned int obj_type,
- int add_flags, __kernel_fsid_t *fsid)
+ void *obj, unsigned int obj_type,
+ int add_flags)
{
struct fsnotify_group *group = mark->group;
int ret = 0;
@@ -749,7 +810,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock);
- ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid);
+ ret = fsnotify_add_mark_list(mark, obj, obj_type, add_flags);
if (ret)
goto err;
@@ -767,15 +828,14 @@ err:
return ret;
}
-int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
- unsigned int obj_type, int add_flags,
- __kernel_fsid_t *fsid)
+int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj,
+ unsigned int obj_type, int add_flags)
{
int ret;
struct fsnotify_group *group = mark->group;
fsnotify_group_lock(group);
- ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid);
+ ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags);
fsnotify_group_unlock(group);
return ret;
}
@@ -785,12 +845,16 @@ EXPORT_SYMBOL_GPL(fsnotify_add_mark);
* Given a list of marks, find the mark associated with given group. If found
* take a reference to that mark and return it, else return NULL.
*/
-struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
+struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type,
struct fsnotify_group *group)
{
+ fsnotify_connp_t *connp = fsnotify_object_connp(obj, obj_type);
struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark;
+ if (!connp)
+ return NULL;
+
conn = fsnotify_grab_connector(connp);
if (!conn)
return NULL;