summaryrefslogtreecommitdiff
path: root/fs/notify
diff options
context:
space:
mode:
Diffstat (limited to 'fs/notify')
-rw-r--r--fs/notify/fanotify/Kconfig1
-rw-r--r--fs/notify/fanotify/fanotify.c298
-rw-r--r--fs/notify/fanotify/fanotify.h120
-rw-r--r--fs/notify/fanotify/fanotify_user.c468
-rw-r--r--fs/notify/fdinfo.c30
-rw-r--r--fs/notify/fsnotify.c48
-rw-r--r--fs/notify/fsnotify.h11
-rw-r--r--fs/notify/inotify/inotify.h1
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c18
-rw-r--r--fs/notify/inotify/inotify_user.c13
-rw-r--r--fs/notify/mark.c85
-rw-r--r--fs/notify/notification.c42
12 files changed, 819 insertions, 316 deletions
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 41355ce74ac0..735bfb2e9190 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -2,6 +2,7 @@ config FANOTIFY
bool "Filesystem wide access notification"
select FSNOTIFY
select ANON_INODES
+ select EXPORTFS
default n
---help---
Say Y here to enable fanotify support. fanotify is a file access
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 94b52157bf8d..6b9c27548997 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -13,22 +13,40 @@
#include <linux/wait.h>
#include <linux/audit.h>
#include <linux/sched/mm.h>
+#include <linux/statfs.h>
#include "fanotify.h"
static bool should_merge(struct fsnotify_event *old_fsn,
struct fsnotify_event *new_fsn)
{
- struct fanotify_event_info *old, *new;
+ struct fanotify_event *old, *new;
pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
old = FANOTIFY_E(old_fsn);
new = FANOTIFY_E(new_fsn);
- if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
- old->path.mnt == new->path.mnt &&
- old->path.dentry == new->path.dentry)
- return true;
+ if (old_fsn->inode != new_fsn->inode || old->pid != new->pid ||
+ old->fh_type != new->fh_type || old->fh_len != new->fh_len)
+ return false;
+
+ if (fanotify_event_has_path(old)) {
+ return old->path.mnt == new->path.mnt &&
+ old->path.dentry == new->path.dentry;
+ } else if (fanotify_event_has_fid(old)) {
+ /*
+ * We want to merge many dirent events in the same dir (i.e.
+ * creates/unlinks/renames), but we do not want to merge dirent
+ * events referring to subdirs with dirent events referring to
+ * non subdirs, otherwise, user won't be able to tell from a
+ * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+
+ * unlink pair or rmdir+create pair of events.
+ */
+ return (old->mask & FS_ISDIR) == (new->mask & FS_ISDIR) &&
+ fanotify_fid_equal(&old->fid, &new->fid, old->fh_len);
+ }
+
+ /* Do not merge events if we failed to encode fid */
return false;
}
@@ -36,20 +54,22 @@ static bool should_merge(struct fsnotify_event *old_fsn,
static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
{
struct fsnotify_event *test_event;
+ struct fanotify_event *new;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
+ new = FANOTIFY_E(event);
/*
* Don't merge a permission event with any other event so that we know
* the event structure we have created in fanotify_handle_event() is the
* one we should check for permission response.
*/
- if (fanotify_is_perm_event(event->mask))
+ if (fanotify_is_perm_event(new->mask))
return 0;
list_for_each_entry_reverse(test_event, list, list) {
if (should_merge(test_event, event)) {
- test_event->mask |= event->mask;
+ FANOTIFY_E(test_event)->mask |= new->mask;
return 1;
}
}
@@ -57,15 +77,44 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
return 0;
}
+/*
+ * Wait for response to permission event. The function also takes care of
+ * freeing the permission event (or offloads that in case the wait is canceled
+ * by a signal). The function returns 0 in case access got allowed by userspace,
+ * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case
+ * the wait got interrupted by a signal.
+ */
static int fanotify_get_response(struct fsnotify_group *group,
- struct fanotify_perm_event_info *event,
+ struct fanotify_perm_event *event,
struct fsnotify_iter_info *iter_info)
{
int ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- wait_event(group->fanotify_data.access_waitq, event->response);
+ ret = wait_event_killable(group->fanotify_data.access_waitq,
+ event->state == FAN_EVENT_ANSWERED);
+ /* Signal pending? */
+ if (ret < 0) {
+ spin_lock(&group->notification_lock);
+ /* Event reported to userspace and no answer yet? */
+ if (event->state == FAN_EVENT_REPORTED) {
+ /* Event will get freed once userspace answers to it */
+ event->state = FAN_EVENT_CANCELED;
+ spin_unlock(&group->notification_lock);
+ return ret;
+ }
+ /* Event not yet reported? Just remove it. */
+ if (event->state == FAN_EVENT_INIT)
+ fsnotify_remove_queued_event(group, &event->fae.fse);
+ /*
+ * Event may be also answered in case signal delivery raced
+ * with wakeup. In that case we have nothing to do besides
+ * freeing the event and reporting error.
+ */
+ spin_unlock(&group->notification_lock);
+ goto out;
+ }
/* userspace responded, convert to something usable */
switch (event->response & ~FAN_AUDIT) {
@@ -81,19 +130,27 @@ static int fanotify_get_response(struct fsnotify_group *group,
if (event->response & FAN_AUDIT)
audit_fanotify(event->response & ~FAN_AUDIT);
- event->response = 0;
-
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
-
+out:
+ fsnotify_destroy_event(group, &event->fae.fse);
+
return ret;
}
-static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
- u32 event_mask, const void *data,
- int data_type)
+/*
+ * This function returns a mask for an event that only contains the flags
+ * that have been specifically requested by the user. Flags that may have
+ * been included within the event mask, but have not been explicitly
+ * requested by the user, will not be present in the returned mask.
+ */
+static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ struct fsnotify_iter_info *iter_info,
+ u32 event_mask, const void *data,
+ int data_type)
{
__u32 marks_mask = 0, marks_ignored_mask = 0;
+ __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS;
const struct path *path = data;
struct fsnotify_mark *mark;
int type;
@@ -101,49 +158,132 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
- /* if we don't have enough info to send an event to userspace say no */
- if (data_type != FSNOTIFY_EVENT_PATH)
- return false;
-
- /* sorry, fanotify only gives a damn about files and dirs */
- if (!d_is_reg(path->dentry) &&
- !d_can_lookup(path->dentry))
- return false;
+ if (!FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+ /* Do we have path to open a file descriptor? */
+ if (data_type != FSNOTIFY_EVENT_PATH)
+ return 0;
+ /* Path type events are only relevant for files and dirs */
+ if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry))
+ return 0;
+ }
fsnotify_foreach_obj_type(type) {
if (!fsnotify_iter_should_report_type(iter_info, type))
continue;
mark = iter_info->marks[type];
/*
- * if the event is for a child and this inode doesn't care about
- * events on the child, don't send it!
+ * If the event is for a child and this mark doesn't care about
+ * events on a child, don't send it!
*/
- if (type == FSNOTIFY_OBJ_TYPE_INODE &&
- (event_mask & FS_EVENT_ON_CHILD) &&
- !(mark->mask & FS_EVENT_ON_CHILD))
+ if (event_mask & FS_EVENT_ON_CHILD &&
+ (type != FSNOTIFY_OBJ_TYPE_INODE ||
+ !(mark->mask & FS_EVENT_ON_CHILD)))
continue;
marks_mask |= mark->mask;
marks_ignored_mask |= mark->ignored_mask;
}
- if (d_is_dir(path->dentry) &&
+ test_mask = event_mask & marks_mask & ~marks_ignored_mask;
+
+ /*
+ * dirent modification events (create/delete/move) do not carry the
+ * child entry name/inode information. Instead, we report FAN_ONDIR
+ * for mkdir/rmdir so user can differentiate them from creat/unlink.
+ *
+ * For backward compatibility and consistency, do not report FAN_ONDIR
+ * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR
+ * to user in FAN_REPORT_FID mode for all event types.
+ */
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+ /* Do not report FAN_ONDIR without any event */
+ if (!(test_mask & ~FAN_ONDIR))
+ return 0;
+ } else {
+ user_mask &= ~FAN_ONDIR;
+ }
+
+ if (event_mask & FS_ISDIR &&
!(marks_mask & FS_ISDIR & ~marks_ignored_mask))
- return false;
+ return 0;
- if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
- ~marks_ignored_mask)
- return true;
+ return test_mask & user_mask;
+}
- return false;
+static int fanotify_encode_fid(struct fanotify_event *event,
+ struct inode *inode, gfp_t gfp,
+ __kernel_fsid_t *fsid)
+{
+ struct fanotify_fid *fid = &event->fid;
+ int dwords, bytes = 0;
+ int err, type;
+
+ fid->ext_fh = NULL;
+ dwords = 0;
+ err = -ENOENT;
+ type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
+ if (!dwords)
+ goto out_err;
+
+ bytes = dwords << 2;
+ if (bytes > FANOTIFY_INLINE_FH_LEN) {
+ /* Treat failure to allocate fh as failure to allocate event */
+ err = -ENOMEM;
+ fid->ext_fh = kmalloc(bytes, gfp);
+ if (!fid->ext_fh)
+ goto out_err;
+ }
+
+ type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes),
+ &dwords, NULL);
+ err = -EINVAL;
+ if (!type || type == FILEID_INVALID || bytes != dwords << 2)
+ goto out_err;
+
+ fid->fsid = *fsid;
+ event->fh_len = bytes;
+
+ return type;
+
+out_err:
+ pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, "
+ "type=%d, bytes=%d, err=%i)\n",
+ fsid->val[0], fsid->val[1], type, bytes, err);
+ kfree(fid->ext_fh);
+ fid->ext_fh = NULL;
+ event->fh_len = 0;
+
+ return FILEID_INVALID;
}
-struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
- struct inode *inode, u32 mask,
- const struct path *path)
+/*
+ * The inode to use as identifier when reporting fid depends on the event.
+ * Report the modified directory inode on dirent modification events.
+ * Report the "victim" inode otherwise.
+ * For example:
+ * FS_ATTRIB reports the child inode even if reported on a watched parent.
+ * FS_CREATE reports the modified dir inode and not the created inode.
+ */
+static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask,
+ const void *data, int data_type)
{
- struct fanotify_event_info *event = NULL;
+ if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
+ return to_tell;
+ else if (data_type == FSNOTIFY_EVENT_INODE)
+ return (struct inode *)data;
+ else if (data_type == FSNOTIFY_EVENT_PATH)
+ return d_inode(((struct path *)data)->dentry);
+ return NULL;
+}
+
+struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ struct inode *inode, u32 mask,
+ const void *data, int data_type,
+ __kernel_fsid_t *fsid)
+{
+ struct fanotify_event *event = NULL;
gfp_t gfp = GFP_KERNEL_ACCOUNT;
+ struct inode *id = fanotify_fid_inode(inode, mask, data, data_type);
/*
* For queues with unlimited length lost events are not expected and
@@ -157,25 +297,36 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
memalloc_use_memcg(group->memcg);
if (fanotify_is_perm_event(mask)) {
- struct fanotify_perm_event_info *pevent;
+ struct fanotify_perm_event *pevent;
pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
if (!pevent)
goto out;
event = &pevent->fae;
pevent->response = 0;
+ pevent->state = FAN_EVENT_INIT;
goto init;
}
event = kmem_cache_alloc(fanotify_event_cachep, gfp);
if (!event)
goto out;
init: __maybe_unused
- fsnotify_init_event(&event->fse, inode, mask);
- event->tgid = get_pid(task_tgid(current));
- if (path) {
- event->path = *path;
+ fsnotify_init_event(&event->fse, inode);
+ event->mask = mask;
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
+ event->pid = get_pid(task_pid(current));
+ else
+ event->pid = get_pid(task_tgid(current));
+ event->fh_len = 0;
+ if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+ /* Report the event without a file identifier on encode error */
+ event->fh_type = fanotify_encode_fid(event, id, gfp, fsid);
+ } else if (data_type == FSNOTIFY_EVENT_PATH) {
+ event->fh_type = FILEID_ROOT;
+ event->path = *((struct path *)data);
path_get(&event->path);
} else {
+ event->fh_type = FILEID_INVALID;
event->path.mnt = NULL;
event->path.dentry = NULL;
}
@@ -184,6 +335,29 @@ out:
return event;
}
+/*
+ * Get cached fsid of the filesystem containing the object from any connector.
+ * All connectors are supposed to have the same fsid, but we do not verify that
+ * here.
+ */
+static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
+{
+ int type;
+ __kernel_fsid_t fsid = {};
+
+ fsnotify_foreach_obj_type(type) {
+ if (!fsnotify_iter_should_report_type(iter_info, type))
+ continue;
+
+ fsid = iter_info->marks[type]->connector->fsid;
+ if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
+ continue;
+ return fsid;
+ }
+
+ return fsid;
+}
+
static int fanotify_handle_event(struct fsnotify_group *group,
struct inode *inode,
u32 mask, const void *data, int data_type,
@@ -191,21 +365,35 @@ static int fanotify_handle_event(struct fsnotify_group *group,
struct fsnotify_iter_info *iter_info)
{
int ret = 0;
- struct fanotify_event_info *event;
+ struct fanotify_event *event;
struct fsnotify_event *fsn_event;
+ __kernel_fsid_t fsid = {};
BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+ BUILD_BUG_ON(FAN_ATTRIB != FS_ATTRIB);
BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
+ BUILD_BUG_ON(FAN_MOVED_TO != FS_MOVED_TO);
+ BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM);
+ BUILD_BUG_ON(FAN_CREATE != FS_CREATE);
+ BUILD_BUG_ON(FAN_DELETE != FS_DELETE);
+ BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF);
+ BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF);
BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
+ BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
+ BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
- if (!fanotify_should_send_event(iter_info, mask, data, data_type))
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
+
+ mask = fanotify_group_event_mask(group, iter_info, mask, data,
+ data_type);
+ if (!mask)
return 0;
pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
@@ -220,7 +408,11 @@ static int fanotify_handle_event(struct fsnotify_group *group,
return 0;
}
- event = fanotify_alloc_event(group, inode, mask, data);
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_FID))
+ fsid = fanotify_get_fsid(iter_info);
+
+ event = fanotify_alloc_event(group, inode, mask, data, data_type,
+ &fsid);
ret = -ENOMEM;
if (unlikely(!event)) {
/*
@@ -236,7 +428,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
if (ret) {
/* Permission events shouldn't be merged */
- BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
+ BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
/* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
@@ -244,7 +436,6 @@ static int fanotify_handle_event(struct fsnotify_group *group,
} else if (fanotify_is_perm_event(mask)) {
ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
iter_info);
- fsnotify_destroy_event(group, fsn_event);
}
finish:
if (fanotify_is_perm_event(mask))
@@ -264,12 +455,15 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
static void fanotify_free_event(struct fsnotify_event *fsn_event)
{
- struct fanotify_event_info *event;
+ struct fanotify_event *event;
event = FANOTIFY_E(fsn_event);
- path_put(&event->path);
- put_pid(event->tgid);
- if (fanotify_is_perm_event(fsn_event->mask)) {
+ if (fanotify_event_has_path(event))
+ path_put(&event->path);
+ else if (fanotify_event_has_ext_fh(event))
+ kfree(event->fid.ext_fh);
+ put_pid(event->pid);
+ if (fanotify_is_perm_event(event->mask)) {
kmem_cache_free(fanotify_perm_event_cachep,
FANOTIFY_PE(fsn_event));
return;
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 8609ba06f474..68b30504284c 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -2,26 +2,112 @@
#include <linux/fsnotify_backend.h>
#include <linux/path.h>
#include <linux/slab.h>
+#include <linux/exportfs.h>
extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
+/* Possible states of the permission event */
+enum {
+ FAN_EVENT_INIT,
+ FAN_EVENT_REPORTED,
+ FAN_EVENT_ANSWERED,
+ FAN_EVENT_CANCELED,
+};
+
+/*
+ * 3 dwords are sufficient for most local fs (64bit ino, 32bit generation).
+ * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and
+ * fh_* fields increase the size of fanotify_event by another 4 bytes.
+ * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and
+ * fh_* fields are packed in a hole after mask.
+ */
+#if BITS_PER_LONG == 32
+#define FANOTIFY_INLINE_FH_LEN (3 << 2)
+#else
+#define FANOTIFY_INLINE_FH_LEN (4 << 2)
+#endif
+
+struct fanotify_fid {
+ __kernel_fsid_t fsid;
+ union {
+ unsigned char fh[FANOTIFY_INLINE_FH_LEN];
+ unsigned char *ext_fh;
+ };
+};
+
+static inline void *fanotify_fid_fh(struct fanotify_fid *fid,
+ unsigned int fh_len)
+{
+ return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh;
+}
+
+static inline bool fanotify_fid_equal(struct fanotify_fid *fid1,
+ struct fanotify_fid *fid2,
+ unsigned int fh_len)
+{
+ return fid1->fsid.val[0] == fid2->fsid.val[0] &&
+ fid1->fsid.val[1] == fid2->fsid.val[1] &&
+ !memcmp(fanotify_fid_fh(fid1, fh_len),
+ fanotify_fid_fh(fid2, fh_len), fh_len);
+}
+
/*
* Structure for normal fanotify events. It gets allocated in
* fanotify_handle_event() and freed when the information is retrieved by
* userspace
*/
-struct fanotify_event_info {
+struct fanotify_event {
struct fsnotify_event fse;
+ u32 mask;
/*
- * We hold ref to this path so it may be dereferenced at any point
- * during this object's lifetime
+ * Those fields are outside fanotify_fid to pack fanotify_event nicely
+ * on 64bit arch and to use fh_type as an indication of whether path
+ * or fid are used in the union:
+ * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither.
*/
- struct path path;
- struct pid *tgid;
+ u8 fh_type;
+ u8 fh_len;
+ u16 pad;
+ union {
+ /*
+ * We hold ref to this path so it may be dereferenced at any
+ * point during this object's lifetime
+ */
+ struct path path;
+ /*
+ * With FAN_REPORT_FID, we do not hold any reference on the
+ * victim object. Instead we store its NFS file handle and its
+ * filesystem's fsid as a unique identifier.
+ */
+ struct fanotify_fid fid;
+ };
+ struct pid *pid;
};
+static inline bool fanotify_event_has_path(struct fanotify_event *event)
+{
+ return event->fh_type == FILEID_ROOT;
+}
+
+static inline bool fanotify_event_has_fid(struct fanotify_event *event)
+{
+ return event->fh_type != FILEID_ROOT &&
+ event->fh_type != FILEID_INVALID;
+}
+
+static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event)
+{
+ return fanotify_event_has_fid(event) &&
+ event->fh_len > FANOTIFY_INLINE_FH_LEN;
+}
+
+static inline void *fanotify_event_fh(struct fanotify_event *event)
+{
+ return fanotify_fid_fh(&event->fid, event->fh_len);
+}
+
/*
* Structure for permission fanotify events. It gets allocated and freed in
* fanotify_handle_event() since we wait there for user response. When the
@@ -29,29 +115,31 @@ struct fanotify_event_info {
* group->notification_list to group->fanotify_data.access_list to wait for
* user response.
*/
-struct fanotify_perm_event_info {
- struct fanotify_event_info fae;
- int response; /* userspace answer to question */
+struct fanotify_perm_event {
+ struct fanotify_event fae;
+ unsigned short response; /* userspace answer to the event */
+ unsigned short state; /* state of the event */
int fd; /* fd we passed to userspace for this event */
};
-static inline struct fanotify_perm_event_info *
+static inline struct fanotify_perm_event *
FANOTIFY_PE(struct fsnotify_event *fse)
{
- return container_of(fse, struct fanotify_perm_event_info, fae.fse);
+ return container_of(fse, struct fanotify_perm_event, fae.fse);
}
static inline bool fanotify_is_perm_event(u32 mask)
{
return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) &&
- mask & FAN_ALL_PERM_EVENTS;
+ mask & FANOTIFY_PERM_EVENTS;
}
-static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
+static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
{
- return container_of(fse, struct fanotify_event_info, fse);
+ return container_of(fse, struct fanotify_event, fse);
}
-struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
- struct inode *inode, u32 mask,
- const struct path *path);
+struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ struct inode *inode, u32 mask,
+ const void *data, int data_type,
+ __kernel_fsid_t *fsid);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 69054886915b..56992b32c6bb 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -17,6 +17,8 @@
#include <linux/compat.h>
#include <linux/sched/signal.h>
#include <linux/memcontrol.h>
+#include <linux/statfs.h>
+#include <linux/exportfs.h>
#include <asm/ioctls.h>
@@ -47,33 +49,55 @@ struct kmem_cache *fanotify_mark_cache __read_mostly;
struct kmem_cache *fanotify_event_cachep __read_mostly;
struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
+#define FANOTIFY_EVENT_ALIGN 4
+
+static int fanotify_event_info_len(struct fanotify_event *event)
+{
+ if (!fanotify_event_has_fid(event))
+ return 0;
+
+ return roundup(sizeof(struct fanotify_event_info_fid) +
+ sizeof(struct file_handle) + event->fh_len,
+ FANOTIFY_EVENT_ALIGN);
+}
+
/*
* Get an fsnotify notification event if one exists and is small
* enough to fit in "count". Return an error pointer if the count
- * is not large enough.
- *
- * Called with the group->notification_lock held.
+ * is not large enough. When permission event is dequeued, its state is
+ * updated accordingly.
*/
static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t count)
{
- assert_spin_locked(&group->notification_lock);
+ size_t event_size = FAN_EVENT_METADATA_LEN;
+ struct fsnotify_event *fsn_event = NULL;
pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
+ spin_lock(&group->notification_lock);
if (fsnotify_notify_queue_is_empty(group))
- return NULL;
+ goto out;
- if (FAN_EVENT_METADATA_LEN > count)
- return ERR_PTR(-EINVAL);
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+ event_size += fanotify_event_info_len(
+ FANOTIFY_E(fsnotify_peek_first_event(group)));
+ }
- /* held the notification_lock the whole time, so this is the
- * same event we peeked above */
- return fsnotify_remove_first_event(group);
+ if (event_size > count) {
+ fsn_event = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ fsn_event = fsnotify_remove_first_event(group);
+ if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask))
+ FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED;
+out:
+ spin_unlock(&group->notification_lock);
+ return fsn_event;
}
static int create_fd(struct fsnotify_group *group,
- struct fanotify_event_info *event,
+ struct fanotify_event *event,
struct file **file)
{
int client_fd;
@@ -114,62 +138,32 @@ static int create_fd(struct fsnotify_group *group,
return client_fd;
}
-static int fill_event_metadata(struct fsnotify_group *group,
- struct fanotify_event_metadata *metadata,
- struct fsnotify_event *fsn_event,
- struct file **file)
-{
- int ret = 0;
- struct fanotify_event_info *event;
-
- pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
- group, metadata, fsn_event);
-
- *file = NULL;
- event = container_of(fsn_event, struct fanotify_event_info, fse);
- metadata->event_len = FAN_EVENT_METADATA_LEN;
- metadata->metadata_len = FAN_EVENT_METADATA_LEN;
- metadata->vers = FANOTIFY_METADATA_VERSION;
- metadata->reserved = 0;
- metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
- metadata->pid = pid_vnr(event->tgid);
- if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
- metadata->fd = FAN_NOFD;
- else {
- metadata->fd = create_fd(group, event, file);
- if (metadata->fd < 0)
- ret = metadata->fd;
- }
-
- return ret;
-}
-
-static struct fanotify_perm_event_info *dequeue_event(
- struct fsnotify_group *group, int fd)
+/*
+ * Finish processing of permission event by setting it to ANSWERED state and
+ * drop group->notification_lock.
+ */
+static void finish_permission_event(struct fsnotify_group *group,
+ struct fanotify_perm_event *event,
+ unsigned int response)
+ __releases(&group->notification_lock)
{
- struct fanotify_perm_event_info *event, *return_e = NULL;
-
- spin_lock(&group->notification_lock);
- list_for_each_entry(event, &group->fanotify_data.access_list,
- fae.fse.list) {
- if (event->fd != fd)
- continue;
+ bool destroy = false;
- list_del_init(&event->fae.fse.list);
- return_e = event;
- break;
- }
+ assert_spin_locked(&group->notification_lock);
+ event->response = response;
+ if (event->state == FAN_EVENT_CANCELED)
+ destroy = true;
+ else
+ event->state = FAN_EVENT_ANSWERED;
spin_unlock(&group->notification_lock);
-
- pr_debug("%s: found return_re=%p\n", __func__, return_e);
-
- return return_e;
+ if (destroy)
+ fsnotify_destroy_event(group, &event->fae.fse);
}
static int process_access_response(struct fsnotify_group *group,
struct fanotify_response *response_struct)
{
- struct fanotify_perm_event_info *event;
+ struct fanotify_perm_event *event;
int fd = response_struct->fd;
int response = response_struct->response;
@@ -191,45 +185,118 @@ static int process_access_response(struct fsnotify_group *group,
if (fd < 0)
return -EINVAL;
- if ((response & FAN_AUDIT) && !group->fanotify_data.audit)
+ if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
return -EINVAL;
- event = dequeue_event(group, fd);
- if (!event)
- return -ENOENT;
+ spin_lock(&group->notification_lock);
+ list_for_each_entry(event, &group->fanotify_data.access_list,
+ fae.fse.list) {
+ if (event->fd != fd)
+ continue;
- event->response = response;
- wake_up(&group->fanotify_data.access_waitq);
+ list_del_init(&event->fae.fse.list);
+ finish_permission_event(group, event, response);
+ wake_up(&group->fanotify_data.access_waitq);
+ return 0;
+ }
+ spin_unlock(&group->notification_lock);
- return 0;
+ return -ENOENT;
}
-static ssize_t copy_event_to_user(struct fsnotify_group *group,
- struct fsnotify_event *event,
- char __user *buf)
+static int copy_fid_to_user(struct fanotify_event *event, char __user *buf)
{
- struct fanotify_event_metadata fanotify_event_metadata;
- struct file *f;
- int fd, ret;
+ struct fanotify_event_info_fid info = { };
+ struct file_handle handle = { };
+ size_t fh_len = event->fh_len;
+ size_t len = fanotify_event_info_len(event);
- pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+ if (!len)
+ return 0;
- ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
- if (ret < 0)
- return ret;
+ if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len))
+ return -EFAULT;
+
+ /* Copy event info fid header followed by vaiable sized file handle */
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID;
+ info.hdr.len = len;
+ info.fsid = event->fid.fsid;
+ if (copy_to_user(buf, &info, sizeof(info)))
+ return -EFAULT;
+
+ buf += sizeof(info);
+ len -= sizeof(info);
+ handle.handle_type = event->fh_type;
+ handle.handle_bytes = fh_len;
+ if (copy_to_user(buf, &handle, sizeof(handle)))
+ return -EFAULT;
+
+ buf += sizeof(handle);
+ len -= sizeof(handle);
+ if (copy_to_user(buf, fanotify_event_fh(event), fh_len))
+ return -EFAULT;
+
+ /* Pad with 0's */
+ buf += fh_len;
+ len -= fh_len;
+ WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN);
+ if (len > 0 && clear_user(buf, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+static ssize_t copy_event_to_user(struct fsnotify_group *group,
+ struct fsnotify_event *fsn_event,
+ char __user *buf, size_t count)
+{
+ struct fanotify_event_metadata metadata;
+ struct fanotify_event *event;
+ struct file *f = NULL;
+ int ret, fd = FAN_NOFD;
+
+ pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
+
+ event = container_of(fsn_event, struct fanotify_event, fse);
+ metadata.event_len = FAN_EVENT_METADATA_LEN;
+ metadata.metadata_len = FAN_EVENT_METADATA_LEN;
+ metadata.vers = FANOTIFY_METADATA_VERSION;
+ metadata.reserved = 0;
+ metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
+ metadata.pid = pid_vnr(event->pid);
+
+ if (fanotify_event_has_path(event)) {
+ fd = create_fd(group, event, &f);
+ if (fd < 0)
+ return fd;
+ } else if (fanotify_event_has_fid(event)) {
+ metadata.event_len += fanotify_event_info_len(event);
+ }
+ metadata.fd = fd;
- fd = fanotify_event_metadata.fd;
ret = -EFAULT;
- if (copy_to_user(buf, &fanotify_event_metadata,
- fanotify_event_metadata.event_len))
+ /*
+ * Sanity check copy size in case get_one_event() and
+ * fill_event_metadata() event_len sizes ever get out of sync.
+ */
+ if (WARN_ON_ONCE(metadata.event_len > count))
+ goto out_close_fd;
+
+ if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN))
goto out_close_fd;
if (fanotify_is_perm_event(event->mask))
- FANOTIFY_PE(event)->fd = fd;
+ FANOTIFY_PE(fsn_event)->fd = fd;
- if (fd != FAN_NOFD)
+ if (fanotify_event_has_path(event)) {
fd_install(fd, f);
- return fanotify_event_metadata.event_len;
+ } else if (fanotify_event_has_fid(event)) {
+ ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN);
+ if (ret < 0)
+ return ret;
+ }
+
+ return metadata.event_len;
out_close_fd:
if (fd != FAN_NOFD) {
@@ -270,10 +337,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
add_wait_queue(&group->notification_waitq, &wait);
while (1) {
- spin_lock(&group->notification_lock);
kevent = get_one_event(group, count);
- spin_unlock(&group->notification_lock);
-
if (IS_ERR(kevent)) {
ret = PTR_ERR(kevent);
break;
@@ -295,7 +359,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
continue;
}
- ret = copy_event_to_user(group, kevent, buf);
+ ret = copy_event_to_user(group, kevent, buf, count);
if (unlikely(ret == -EOPENSTALE)) {
/*
* We cannot report events with stale fd so drop it.
@@ -310,11 +374,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
* Permission events get queued to wait for response. Other
* events can be destroyed now.
*/
- if (!fanotify_is_perm_event(kevent->mask)) {
+ if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) {
fsnotify_destroy_event(group, kevent);
} else {
if (ret <= 0) {
- FANOTIFY_PE(kevent)->response = FAN_DENY;
+ spin_lock(&group->notification_lock);
+ finish_permission_event(group,
+ FANOTIFY_PE(kevent), FAN_DENY);
wake_up(&group->fanotify_data.access_waitq);
} else {
spin_lock(&group->notification_lock);
@@ -364,7 +430,7 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
static int fanotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
- struct fanotify_perm_event_info *event, *next;
+ struct fanotify_perm_event *event;
struct fsnotify_event *fsn_event;
/*
@@ -379,13 +445,12 @@ static int fanotify_release(struct inode *ignored, struct file *file)
* and simulate reply from userspace.
*/
spin_lock(&group->notification_lock);
- list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
- fae.fse.list) {
- pr_debug("%s: found group=%p event=%p\n", __func__, group,
- event);
-
+ while (!list_empty(&group->fanotify_data.access_list)) {
+ event = list_first_entry(&group->fanotify_data.access_list,
+ struct fanotify_perm_event, fae.fse.list);
list_del_init(&event->fae.fse.list);
- event->response = FAN_ALLOW;
+ finish_permission_event(group, event, FAN_ALLOW);
+ spin_lock(&group->notification_lock);
}
/*
@@ -395,13 +460,14 @@ static int fanotify_release(struct inode *ignored, struct file *file)
*/
while (!fsnotify_notify_queue_is_empty(group)) {
fsn_event = fsnotify_remove_first_event(group);
- if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) {
+ if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) {
spin_unlock(&group->notification_lock);
fsnotify_destroy_event(group, fsn_event);
- spin_lock(&group->notification_lock);
} else {
- FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+ finish_permission_event(group, FANOTIFY_PE(fsn_event),
+ FAN_ALLOW);
}
+ spin_lock(&group->notification_lock);
}
spin_unlock(&group->notification_lock);
@@ -506,18 +572,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
- __u32 tmask = fsn_mark->mask & ~mask;
-
- if (flags & FAN_MARK_ONDIR)
- tmask &= ~FAN_ONDIR;
-
oldmask = fsn_mark->mask;
- fsn_mark->mask = tmask;
+ fsn_mark->mask &= ~mask;
} else {
- __u32 tmask = fsn_mark->ignored_mask & ~mask;
- if (flags & FAN_MARK_ONDIR)
- tmask &= ~FAN_ONDIR;
- fsn_mark->ignored_mask = tmask;
+ fsn_mark->ignored_mask &= ~mask;
}
*destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
spin_unlock(&fsn_mark->lock);
@@ -563,6 +621,13 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
mask, flags);
}
+static int fanotify_remove_sb_mark(struct fsnotify_group *group,
+ struct super_block *sb, __u32 mask,
+ unsigned int flags)
+{
+ return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags);
+}
+
static int fanotify_remove_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
unsigned int flags)
@@ -579,19 +644,10 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
- __u32 tmask = fsn_mark->mask | mask;
-
- if (flags & FAN_MARK_ONDIR)
- tmask |= FAN_ONDIR;
-
oldmask = fsn_mark->mask;
- fsn_mark->mask = tmask;
+ fsn_mark->mask |= mask;
} else {
- __u32 tmask = fsn_mark->ignored_mask | mask;
- if (flags & FAN_MARK_ONDIR)
- tmask |= FAN_ONDIR;
-
- fsn_mark->ignored_mask = tmask;
+ fsn_mark->ignored_mask |= mask;
if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
}
@@ -602,7 +658,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp,
- unsigned int type)
+ unsigned int type,
+ __kernel_fsid_t *fsid)
{
struct fsnotify_mark *mark;
int ret;
@@ -615,7 +672,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
return ERR_PTR(-ENOMEM);
fsnotify_init_mark(mark, group);
- ret = fsnotify_add_mark_locked(mark, connp, type, 0);
+ ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
if (ret) {
fsnotify_put_mark(mark);
return ERR_PTR(ret);
@@ -627,7 +684,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
static int fanotify_add_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, unsigned int type,
- __u32 mask, unsigned int flags)
+ __u32 mask, unsigned int flags,
+ __kernel_fsid_t *fsid)
{
struct fsnotify_mark *fsn_mark;
__u32 added;
@@ -635,7 +693,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
mutex_lock(&group->mark_mutex);
fsn_mark = fsnotify_find_mark(connp, group);
if (!fsn_mark) {
- fsn_mark = fanotify_add_new_mark(group, connp, type);
+ fsn_mark = fanotify_add_new_mark(group, connp, type, fsid);
if (IS_ERR(fsn_mark)) {
mutex_unlock(&group->mark_mutex);
return PTR_ERR(fsn_mark);
@@ -652,15 +710,23 @@ static int fanotify_add_mark(struct fsnotify_group *group,
static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
- unsigned int flags)
+ unsigned int flags, __kernel_fsid_t *fsid)
{
return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags);
+ FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
+}
+
+static int fanotify_add_sb_mark(struct fsnotify_group *group,
+ struct super_block *sb, __u32 mask,
+ unsigned int flags, __kernel_fsid_t *fsid)
+{
+ return fanotify_add_mark(group, &sb->s_fsnotify_marks,
+ FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
}
static int fanotify_add_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
- unsigned int flags)
+ unsigned int flags, __kernel_fsid_t *fsid)
{
pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
@@ -671,11 +737,11 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
*/
if ((flags & FAN_MARK_IGNORED_MASK) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
- (atomic_read(&inode->i_writecount) > 0))
+ inode_is_open_for_write(inode))
return 0;
return fanotify_add_mark(group, &inode->i_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_INODE, mask, flags);
+ FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
}
/* fanotify syscalls */
@@ -684,18 +750,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
struct fsnotify_group *group;
int f_flags, fd;
struct user_struct *user;
- struct fanotify_event_info *oevent;
+ struct fanotify_event *oevent;
- pr_debug("%s: flags=%d event_f_flags=%d\n",
- __func__, flags, event_f_flags);
+ pr_debug("%s: flags=%x event_f_flags=%x\n",
+ __func__, flags, event_f_flags);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
#ifdef CONFIG_AUDITSYSCALL
- if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT))
+ if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
#else
- if (flags & ~FAN_ALL_INIT_FLAGS)
+ if (flags & ~FANOTIFY_INIT_FLAGS)
#endif
return -EINVAL;
@@ -711,6 +777,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
return -EINVAL;
}
+ if ((flags & FAN_REPORT_FID) &&
+ (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF)
+ return -EINVAL;
+
user = get_current_user();
if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
free_uid(user);
@@ -731,10 +801,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
}
group->fanotify_data.user = user;
+ group->fanotify_data.flags = flags;
atomic_inc(&user->fanotify_listeners);
group->memcg = get_mem_cgroup_from_mm(current->mm);
- oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
+ oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL,
+ FSNOTIFY_EVENT_NONE, NULL);
if (unlikely(!oevent)) {
fd = -ENOMEM;
goto out_destroy_group;
@@ -746,7 +818,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->fanotify_data.f_flags = event_f_flags;
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
- switch (flags & FAN_ALL_CLASS_BITS) {
+ switch (flags & FANOTIFY_CLASS_BITS) {
case FAN_CLASS_NOTIF:
group->priority = FS_PRIO_0;
break;
@@ -783,7 +855,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
fd = -EPERM;
if (!capable(CAP_AUDIT_WRITE))
goto out_destroy_group;
- group->fanotify_data.audit = true;
}
fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
@@ -797,6 +868,48 @@ out_destroy_group:
return fd;
}
+/* Check if filesystem can encode a unique fid */
+static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
+{
+ __kernel_fsid_t root_fsid;
+ int err;
+
+ /*
+ * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
+ */
+ err = vfs_get_fsid(path->dentry, fsid);
+ if (err)
+ return err;
+
+ if (!fsid->val[0] && !fsid->val[1])
+ return -ENODEV;
+
+ /*
+ * Make sure path is not inside a filesystem subvolume (e.g. btrfs)
+ * which uses a different fsid than sb root.
+ */
+ err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid);
+ if (err)
+ return err;
+
+ if (root_fsid.val[0] != fsid->val[0] ||
+ root_fsid.val[1] != fsid->val[1])
+ return -EXDEV;
+
+ /*
+ * We need to make sure that the file system supports at least
+ * encoding a file handle so user can use name_to_handle_at() to
+ * compare fid returned with event to the file handle of watched
+ * objects. However, name_to_handle_at() requires that the
+ * filesystem also supports decoding file handles.
+ */
+ if (!path->dentry->d_sb->s_export_op ||
+ !path->dentry->d_sb->s_export_op->fh_to_dentry)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
@@ -805,7 +918,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
struct fsnotify_group *group;
struct fd f;
struct path path;
- u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD;
+ __kernel_fsid_t __fsid, *fsid = NULL;
+ u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
int ret;
pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -815,8 +930,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & ((__u64)0xffffffff << 32))
return -EINVAL;
- if (flags & ~FAN_ALL_MARK_FLAGS)
+ if (flags & ~FANOTIFY_MARK_FLAGS)
+ return -EINVAL;
+
+ switch (mark_type) {
+ case FAN_MARK_INODE:
+ case FAN_MARK_MOUNT:
+ case FAN_MARK_FILESYSTEM:
+ break;
+ default:
return -EINVAL;
+ }
+
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
case FAN_MARK_ADD: /* fallthrough */
case FAN_MARK_REMOVE:
@@ -824,20 +949,15 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
break;
case FAN_MARK_FLUSH:
- if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH))
+ if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH))
return -EINVAL;
break;
default:
return -EINVAL;
}
- if (mask & FAN_ONDIR) {
- flags |= FAN_MARK_ONDIR;
- mask &= ~FAN_ONDIR;
- }
-
if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
- valid_mask |= FAN_ALL_PERM_EVENTS;
+ valid_mask |= FANOTIFY_PERM_EVENTS;
if (mask & ~valid_mask)
return -EINVAL;
@@ -857,14 +977,28 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* allowed to set permissions events.
*/
ret = -EINVAL;
- if (mask & FAN_ALL_PERM_EVENTS &&
+ if (mask & FANOTIFY_PERM_EVENTS &&
group->priority == FS_PRIO_0)
goto fput_and_out;
+ /*
+ * Events with data type inode do not carry enough information to report
+ * event->fd, so we do not allow setting a mask for inode events unless
+ * group supports reporting fid.
+ * inode events are not supported on a mount mark, because they do not
+ * carry enough information (i.e. path) to be filtered by mount point.
+ */
+ if (mask & FANOTIFY_INODE_EVENTS &&
+ (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) ||
+ mark_type == FAN_MARK_MOUNT))
+ goto fput_and_out;
+
if (flags & FAN_MARK_FLUSH) {
ret = 0;
- if (flags & FAN_MARK_MOUNT)
+ if (mark_type == FAN_MARK_MOUNT)
fsnotify_clear_vfsmount_marks_by_group(group);
+ else if (mark_type == FAN_MARK_FILESYSTEM)
+ fsnotify_clear_sb_marks_by_group(group);
else
fsnotify_clear_inode_marks_by_group(group);
goto fput_and_out;
@@ -874,8 +1008,16 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (ret)
goto fput_and_out;
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+ ret = fanotify_test_fid(&path, &__fsid);
+ if (ret)
+ goto path_put_and_out;
+
+ fsid = &__fsid;
+ }
+
/* inode held in place by reference to path; group by fget on fd */
- if (!(flags & FAN_MARK_MOUNT))
+ if (mark_type == FAN_MARK_INODE)
inode = path.dentry->d_inode;
else
mnt = path.mnt;
@@ -883,21 +1025,32 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
/* create/update an inode mark */
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
case FAN_MARK_ADD:
- if (flags & FAN_MARK_MOUNT)
- ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
+ if (mark_type == FAN_MARK_MOUNT)
+ ret = fanotify_add_vfsmount_mark(group, mnt, mask,
+ flags, fsid);
+ else if (mark_type == FAN_MARK_FILESYSTEM)
+ ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask,
+ flags, fsid);
else
- ret = fanotify_add_inode_mark(group, inode, mask, flags);
+ ret = fanotify_add_inode_mark(group, inode, mask,
+ flags, fsid);
break;
case FAN_MARK_REMOVE:
- if (flags & FAN_MARK_MOUNT)
- ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
+ if (mark_type == FAN_MARK_MOUNT)
+ ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
+ flags);
+ else if (mark_type == FAN_MARK_FILESYSTEM)
+ ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
+ flags);
else
- ret = fanotify_remove_inode_mark(group, inode, mask, flags);
+ ret = fanotify_remove_inode_mark(group, inode, mask,
+ flags);
break;
default:
ret = -EINVAL;
}
+path_put_and_out:
path_put(&path);
fput_and_out:
fdput(f);
@@ -934,12 +1087,15 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
*/
static int __init fanotify_user_setup(void)
{
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
+
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
SLAB_PANIC|SLAB_ACCOUNT);
- fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
+ fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC);
if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
fanotify_perm_event_cachep =
- KMEM_CACHE(fanotify_perm_event_info, SLAB_PANIC);
+ KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
}
return 0;
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 86fcf5814279..1e2bfd26b352 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -15,6 +15,7 @@
#include <linux/exportfs.h>
#include "inotify/inotify.h"
+#include "fdinfo.h"
#include "fsnotify.h"
#if defined(CONFIG_PROC_FS)
@@ -131,37 +132,20 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
+ } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
+ struct super_block *sb = fsnotify_conn_sb(mark->connector);
+
+ seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
+ sb->s_dev, mflags, mark->mask, mark->ignored_mask);
}
}
void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
{
struct fsnotify_group *group = f->private_data;
- unsigned int flags = 0;
-
- switch (group->priority) {
- case FS_PRIO_0:
- flags |= FAN_CLASS_NOTIF;
- break;
- case FS_PRIO_1:
- flags |= FAN_CLASS_CONTENT;
- break;
- case FS_PRIO_2:
- flags |= FAN_CLASS_PRE_CONTENT;
- break;
- }
-
- if (group->max_events == UINT_MAX)
- flags |= FAN_UNLIMITED_QUEUE;
-
- if (group->fanotify_data.max_marks == UINT_MAX)
- flags |= FAN_UNLIMITED_MARKS;
-
- if (group->fanotify_data.audit)
- flags |= FAN_ENABLE_AUDIT;
seq_printf(m, "fanotify flags:%x event-flags:%x\n",
- flags, group->fanotify_data.f_flags);
+ group->fanotify_data.flags, group->fanotify_data.f_flags);
show_fdinfo(m, f, fanotify_fdinfo);
}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index ababdbfab537..df06f3da166c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -48,7 +48,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
* Called during unmount with no locks held, so needs to be safe against
* concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
*/
-void fsnotify_unmount_inodes(struct super_block *sb)
+static void fsnotify_unmount_inodes(struct super_block *sb)
{
struct inode *inode, *iput_inode = NULL;
@@ -96,6 +96,15 @@ void fsnotify_unmount_inodes(struct super_block *sb)
if (iput_inode)
iput(iput_inode);
+ /* Wait for outstanding inode references from connectors */
+ wait_var_event(&sb->s_fsnotify_inode_refs,
+ !atomic_long_read(&sb->s_fsnotify_inode_refs));
+}
+
+void fsnotify_sb_delete(struct super_block *sb)
+{
+ fsnotify_unmount_inodes(sb);
+ fsnotify_clear_marks_by_sb(sb);
}
/*
@@ -158,9 +167,9 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask
parent = dget_parent(dentry);
p_inode = parent->d_inode;
- if (unlikely(!fsnotify_inode_watches_children(p_inode)))
+ if (unlikely(!fsnotify_inode_watches_children(p_inode))) {
__fsnotify_update_child_dentry_flags(p_inode);
- else if (p_inode->i_fsnotify_mask & mask) {
+ } else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) {
struct name_snapshot name;
/* we are notifying a parent so come up with the new mask which
@@ -190,7 +199,7 @@ static int send_to_group(struct inode *to_tell,
struct fsnotify_iter_info *iter_info)
{
struct fsnotify_group *group = NULL;
- __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+ __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
__u32 marks_mask = 0;
__u32 marks_ignored_mask = 0;
struct fsnotify_mark *mark;
@@ -319,15 +328,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
const unsigned char *file_name, u32 cookie)
{
struct fsnotify_iter_info iter_info = {};
- struct mount *mnt;
+ struct super_block *sb = to_tell->i_sb;
+ struct mount *mnt = NULL;
+ __u32 mnt_or_sb_mask = sb->s_fsnotify_mask;
int ret = 0;
- /* global tests shouldn't care about events on child only the specific event */
- __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+ __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
- if (data_is == FSNOTIFY_EVENT_PATH)
+ if (data_is == FSNOTIFY_EVENT_PATH) {
mnt = real_mount(((const struct path *)data)->mnt);
- else
- mnt = NULL;
+ mnt_or_sb_mask |= mnt->mnt_fsnotify_mask;
+ }
+ /* An event "on child" is not intended for a mount/sb mark */
+ if (mask & FS_EVENT_ON_CHILD)
+ mnt_or_sb_mask = 0;
/*
* Optimization: srcu_read_lock() has a memory barrier which can
@@ -336,31 +349,32 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
* SRCU because we have no references to any objects and do not
* need SRCU to keep them "alive".
*/
- if (!to_tell->i_fsnotify_marks &&
+ if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks &&
(!mnt || !mnt->mnt_fsnotify_marks))
return 0;
/*
* if this is a modify event we may need to clear the ignored masks
- * otherwise return if neither the inode nor the vfsmount care about
+ * otherwise return if neither the inode nor the vfsmount/sb care about
* this type of event.
*/
if (!(mask & FS_MODIFY) &&
- !(test_mask & to_tell->i_fsnotify_mask) &&
- !(mnt && test_mask & mnt->mnt_fsnotify_mask))
+ !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask)))
return 0;
iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
fsnotify_first_mark(&to_tell->i_fsnotify_marks);
+ iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
+ fsnotify_first_mark(&sb->s_fsnotify_marks);
if (mnt) {
iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
}
/*
- * We need to merge inode & vfsmount mark lists so that inode mark
- * ignore masks are properly reflected for mount mark notifications.
+ * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
+ * ignore masks are properly reflected for mount/sb mark notifications.
* That's why this traversal is so complicated...
*/
while (fsnotify_iter_select_report_types(&iter_info)) {
@@ -386,7 +400,7 @@ static __init int fsnotify_init(void)
{
int ret;
- BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 7902653dd577..5a00121fb219 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -21,6 +21,12 @@ static inline struct mount *fsnotify_conn_mount(
return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
}
+static inline struct super_block *fsnotify_conn_sb(
+ struct fsnotify_mark_connector *conn)
+{
+ return container_of(conn->obj, struct super_block, s_fsnotify_marks);
+}
+
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
@@ -43,6 +49,11 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
{
fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
}
+/* run the list of all marks associated with sb and destroy them */
+static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
+{
+ fsnotify_destroy_marks(&sb->s_fsnotify_marks);
+}
/* Wait until all marks queued for destruction are destroyed */
extern void fsnotify_wait_marks_destroyed(void);
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 7e4578d35b61..74ae60305189 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -5,6 +5,7 @@
struct inotify_event_info {
struct fsnotify_event fse;
+ u32 mask;
int wd;
u32 sync_cookie;
int name_len;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index f4184b4f3815..ff30abd6a49b 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -43,11 +43,11 @@ static bool event_compare(struct fsnotify_event *old_fsn,
{
struct inotify_event_info *old, *new;
- if (old_fsn->mask & FS_IN_IGNORED)
- return false;
old = INOTIFY_E(old_fsn);
new = INOTIFY_E(new_fsn);
- if ((old_fsn->mask == new_fsn->mask) &&
+ if (old->mask & FS_IN_IGNORED)
+ return false;
+ if ((old->mask == new->mask) &&
(old_fsn->inode == new_fsn->inode) &&
(old->name_len == new->name_len) &&
(!old->name_len || !strcmp(old->name, new->name)))
@@ -113,8 +113,18 @@ int inotify_handle_event(struct fsnotify_group *group,
return -ENOMEM;
}
+ /*
+ * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events
+ * for fanotify. inotify never reported IN_ISDIR with those events.
+ * It looks like an oversight, but to avoid the risk of breaking
+ * existing inotify programs, mask the flag out from those events.
+ */
+ if (mask & (IN_MOVE_SELF | IN_DELETE_SELF))
+ mask &= ~IN_ISDIR;
+
fsn_event = &event->fse;
- fsnotify_init_event(fsn_event, inode, mask);
+ fsnotify_init_event(fsn_event, inode);
+ event->mask = mask;
event->wd = i_mark->wd;
event->sync_cookie = cookie;
event->name_len = len;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ac6978d3208c..e2901fbb9f76 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -189,7 +189,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
*/
pad_name_len = round_event_name_len(fsn_event);
inotify_event.len = pad_name_len;
- inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
+ inotify_event.mask = inotify_mask_to_arg(event->mask);
inotify_event.wd = event->wd;
inotify_event.cookie = event->sync_cookie;
@@ -634,7 +634,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
return ERR_PTR(-ENOMEM);
}
group->overflow_event = &oevent->fse;
- fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW);
+ fsnotify_init_event(group->overflow_event, NULL);
+ oevent->mask = FS_Q_OVERFLOW;
oevent->wd = -1;
oevent->sync_cookie = 0;
oevent->name_len = 0;
@@ -724,8 +725,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
return -EBADF;
/* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
- if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE)))
- return -EINVAL;
+ if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) {
+ ret = -EINVAL;
+ goto fput_and_out;
+ }
/* verify that this is indeed an inotify instance */
if (unlikely(f.file->f_op != &inotify_fops)) {
@@ -815,7 +818,7 @@ static int __init inotify_user_setup(void)
BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
- BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22);
+ BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
SLAB_PANIC|SLAB_ACCOUNT);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 59cdb27826de..d593d4269561 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -82,6 +82,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
+#include <linux/ratelimit.h>
#include <linux/atomic.h>
@@ -115,6 +116,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
+ else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
+ return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
return NULL;
}
@@ -179,19 +182,24 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
-static struct inode *fsnotify_detach_connector_from_object(
- struct fsnotify_mark_connector *conn)
+static void *fsnotify_detach_connector_from_object(
+ struct fsnotify_mark_connector *conn,
+ unsigned int *type)
{
struct inode *inode = NULL;
+ *type = conn->type;
if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
return NULL;
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0;
+ atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
+ } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
+ fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
}
rcu_assign_pointer(*(conn->obj), NULL);
@@ -211,10 +219,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
fsnotify_put_group(group);
}
+/* Drop object reference originally held by a connector */
+static void fsnotify_drop_object(unsigned int type, void *objp)
+{
+ struct inode *inode;
+ struct super_block *sb;
+
+ if (!objp)
+ return;
+ /* Currently only inode references are passed to be dropped */
+ if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
+ return;
+ inode = objp;
+ sb = inode->i_sb;
+ iput(inode);
+ if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
+ wake_up_var(&sb->s_fsnotify_inode_refs);
+}
+
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
struct fsnotify_mark_connector *conn;
- struct inode *inode = NULL;
+ void *objp = NULL;
+ unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
bool free_conn = false;
/* Catch marks that were actually never attached to object */
@@ -234,7 +261,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
conn = mark->connector;
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
- inode = fsnotify_detach_connector_from_object(conn);
+ objp = fsnotify_detach_connector_from_object(conn, &type);
free_conn = true;
} else {
__fsnotify_recalc_mask(conn);
@@ -242,7 +269,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
mark->connector = NULL;
spin_unlock(&conn->lock);
- iput(inode);
+ fsnotify_drop_object(type, objp);
if (free_conn) {
spin_lock(&destroy_lock);
@@ -455,7 +482,8 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
}
static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
- unsigned int type)
+ unsigned int type,
+ __kernel_fsid_t *fsid)
{
struct inode *inode = NULL;
struct fsnotify_mark_connector *conn;
@@ -467,6 +495,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
INIT_HLIST_HEAD(&conn->list);
conn->type = type;
conn->obj = connp;
+ /* Cache fsid of filesystem containing the object */
+ if (fsid)
+ conn->fsid = *fsid;
+ else
+ conn->fsid.val[0] = conn->fsid.val[1] = 0;
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
inode = igrab(fsnotify_conn_inode(conn));
/*
@@ -518,7 +551,7 @@ out:
*/
static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int type,
- int allow_dups)
+ int allow_dups, __kernel_fsid_t *fsid)
{
struct fsnotify_mark *lmark, *last = NULL;
struct fsnotify_mark_connector *conn;
@@ -527,15 +560,36 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
if (WARN_ON(!fsnotify_valid_obj_type(type)))
return -EINVAL;
+
+ /* Backend is expected to check for zero fsid (e.g. tmpfs) */
+ if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1]))
+ return -ENODEV;
+
restart:
spin_lock(&mark->lock);
conn = fsnotify_grab_connector(connp);
if (!conn) {
spin_unlock(&mark->lock);
- err = fsnotify_attach_connector_to_object(connp, type);
+ err = fsnotify_attach_connector_to_object(connp, type, fsid);
if (err)
return err;
goto restart;
+ } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) &&
+ (fsid->val[0] != conn->fsid.val[0] ||
+ fsid->val[1] != conn->fsid.val[1])) {
+ /*
+ * Backend is expected to check for non uniform fsid
+ * (e.g. btrfs), but maybe we missed something?
+ * Only allow setting conn->fsid once to non zero fsid.
+ * inotify and non-fid fanotify groups do not set nor test
+ * conn->fsid.
+ */
+ pr_warn_ratelimited("%s: fsid mismatch on object of type %u: "
+ "%x.%x != %x.%x\n", __func__, conn->type,
+ fsid->val[0], fsid->val[1],
+ conn->fsid.val[0], conn->fsid.val[1]);
+ err = -EXDEV;
+ goto out_err;
}
/* is mark the first mark? */
@@ -580,7 +634,7 @@ out_err:
*/
int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int type,
- int allow_dups)
+ int allow_dups, __kernel_fsid_t *fsid)
{
struct fsnotify_group *group = mark->group;
int ret = 0;
@@ -601,7 +655,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock);
- ret = fsnotify_add_mark_list(mark, connp, type, allow_dups);
+ ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid);
if (ret)
goto err;
@@ -622,13 +676,13 @@ err:
}
int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
- unsigned int type, int allow_dups)
+ unsigned int type, int allow_dups, __kernel_fsid_t *fsid)
{
int ret;
struct fsnotify_group *group = mark->group;
mutex_lock(&group->mark_mutex);
- ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups);
+ ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid);
mutex_unlock(&group->mark_mutex);
return ret;
}
@@ -709,7 +763,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
{
struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark, *old_mark = NULL;
- struct inode *inode;
+ void *objp;
+ unsigned int type;
conn = fsnotify_grab_connector(connp);
if (!conn)
@@ -735,11 +790,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
* mark references get dropped. It would lead to strange results such
* as delaying inode deletion or blocking unmount.
*/
- inode = fsnotify_detach_connector_from_object(conn);
+ objp = fsnotify_detach_connector_from_object(conn, &type);
spin_unlock(&conn->lock);
if (old_mark)
fsnotify_put_mark(old_mark);
- iput(inode);
+ fsnotify_drop_object(type, objp);
}
/*
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 3c3e36745f59..5f3a54d444b5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -71,7 +71,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
struct fsnotify_event *event)
{
/* Overflow events are per-group and we don't want to free them */
- if (!event || event->mask == FS_Q_OVERFLOW)
+ if (!event || event == group->overflow_event)
return;
/*
* If the event is still queued, we have a problem... Do an unreliable
@@ -141,6 +141,18 @@ queue:
return ret;
}
+void fsnotify_remove_queued_event(struct fsnotify_group *group,
+ struct fsnotify_event *event)
+{
+ assert_spin_locked(&group->notification_lock);
+ /*
+ * We need to init list head for the case of overflow event so that
+ * check in fsnotify_add_event() works
+ */
+ list_del_init(&event->list);
+ group->q_len--;
+}
+
/*
* Remove and return the first event from the notification list. It is the
* responsibility of the caller to destroy the obtained event
@@ -155,13 +167,7 @@ struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
event = list_first_entry(&group->notification_list,
struct fsnotify_event, list);
- /*
- * We need to init list head for the case of overflow event so that
- * check in fsnotify_add_event() works
- */
- list_del_init(&event->list);
- group->q_len--;
-
+ fsnotify_remove_queued_event(group, event);
return event;
}
@@ -194,23 +200,3 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
}
spin_unlock(&group->notification_lock);
}
-
-/*
- * fsnotify_create_event - Allocate a new event which will be sent to each
- * group's handle_event function if the group was interested in this
- * particular event.
- *
- * @inode the inode which is supposed to receive the event (sometimes a
- * parent of the inode to which the event happened.
- * @mask what actually happened.
- * @data pointer to the object which was actually affected
- * @data_type flag indication if the data is a file, path, inode, nothing...
- * @name the filename, if available
- */
-void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode,
- u32 mask)
-{
- INIT_LIST_HEAD(&event->list);
- event->inode = inode;
- event->mask = mask;
-}