diff options
Diffstat (limited to 'fs/notify')
-rw-r--r-- | fs/notify/fanotify/Kconfig | 1 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 298 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.h | 120 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 468 | ||||
-rw-r--r-- | fs/notify/fdinfo.c | 30 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 48 | ||||
-rw-r--r-- | fs/notify/fsnotify.h | 11 | ||||
-rw-r--r-- | fs/notify/inotify/inotify.h | 1 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 18 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 13 | ||||
-rw-r--r-- | fs/notify/mark.c | 85 | ||||
-rw-r--r-- | fs/notify/notification.c | 42 |
12 files changed, 819 insertions, 316 deletions
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig index 41355ce74ac0..735bfb2e9190 100644 --- a/fs/notify/fanotify/Kconfig +++ b/fs/notify/fanotify/Kconfig @@ -2,6 +2,7 @@ config FANOTIFY bool "Filesystem wide access notification" select FSNOTIFY select ANON_INODES + select EXPORTFS default n ---help--- Say Y here to enable fanotify support. fanotify is a file access diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 94b52157bf8d..6b9c27548997 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -13,22 +13,40 @@ #include <linux/wait.h> #include <linux/audit.h> #include <linux/sched/mm.h> +#include <linux/statfs.h> #include "fanotify.h" static bool should_merge(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { - struct fanotify_event_info *old, *new; + struct fanotify_event *old, *new; pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn); old = FANOTIFY_E(old_fsn); new = FANOTIFY_E(new_fsn); - if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid && - old->path.mnt == new->path.mnt && - old->path.dentry == new->path.dentry) - return true; + if (old_fsn->inode != new_fsn->inode || old->pid != new->pid || + old->fh_type != new->fh_type || old->fh_len != new->fh_len) + return false; + + if (fanotify_event_has_path(old)) { + return old->path.mnt == new->path.mnt && + old->path.dentry == new->path.dentry; + } else if (fanotify_event_has_fid(old)) { + /* + * We want to merge many dirent events in the same dir (i.e. + * creates/unlinks/renames), but we do not want to merge dirent + * events referring to subdirs with dirent events referring to + * non subdirs, otherwise, user won't be able to tell from a + * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+ + * unlink pair or rmdir+create pair of events. + */ + return (old->mask & FS_ISDIR) == (new->mask & FS_ISDIR) && + fanotify_fid_equal(&old->fid, &new->fid, old->fh_len); + } + + /* Do not merge events if we failed to encode fid */ return false; } @@ -36,20 +54,22 @@ static bool should_merge(struct fsnotify_event *old_fsn, static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) { struct fsnotify_event *test_event; + struct fanotify_event *new; pr_debug("%s: list=%p event=%p\n", __func__, list, event); + new = FANOTIFY_E(event); /* * Don't merge a permission event with any other event so that we know * the event structure we have created in fanotify_handle_event() is the * one we should check for permission response. */ - if (fanotify_is_perm_event(event->mask)) + if (fanotify_is_perm_event(new->mask)) return 0; list_for_each_entry_reverse(test_event, list, list) { if (should_merge(test_event, event)) { - test_event->mask |= event->mask; + FANOTIFY_E(test_event)->mask |= new->mask; return 1; } } @@ -57,15 +77,44 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) return 0; } +/* + * Wait for response to permission event. The function also takes care of + * freeing the permission event (or offloads that in case the wait is canceled + * by a signal). The function returns 0 in case access got allowed by userspace, + * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case + * the wait got interrupted by a signal. + */ static int fanotify_get_response(struct fsnotify_group *group, - struct fanotify_perm_event_info *event, + struct fanotify_perm_event *event, struct fsnotify_iter_info *iter_info) { int ret; pr_debug("%s: group=%p event=%p\n", __func__, group, event); - wait_event(group->fanotify_data.access_waitq, event->response); + ret = wait_event_killable(group->fanotify_data.access_waitq, + event->state == FAN_EVENT_ANSWERED); + /* Signal pending? */ + if (ret < 0) { + spin_lock(&group->notification_lock); + /* Event reported to userspace and no answer yet? */ + if (event->state == FAN_EVENT_REPORTED) { + /* Event will get freed once userspace answers to it */ + event->state = FAN_EVENT_CANCELED; + spin_unlock(&group->notification_lock); + return ret; + } + /* Event not yet reported? Just remove it. */ + if (event->state == FAN_EVENT_INIT) + fsnotify_remove_queued_event(group, &event->fae.fse); + /* + * Event may be also answered in case signal delivery raced + * with wakeup. In that case we have nothing to do besides + * freeing the event and reporting error. + */ + spin_unlock(&group->notification_lock); + goto out; + } /* userspace responded, convert to something usable */ switch (event->response & ~FAN_AUDIT) { @@ -81,19 +130,27 @@ static int fanotify_get_response(struct fsnotify_group *group, if (event->response & FAN_AUDIT) audit_fanotify(event->response & ~FAN_AUDIT); - event->response = 0; - pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, group, event, ret); - +out: + fsnotify_destroy_event(group, &event->fae.fse); + return ret; } -static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, - u32 event_mask, const void *data, - int data_type) +/* + * This function returns a mask for an event that only contains the flags + * that have been specifically requested by the user. Flags that may have + * been included within the event mask, but have not been explicitly + * requested by the user, will not be present in the returned mask. + */ +static u32 fanotify_group_event_mask(struct fsnotify_group *group, + struct fsnotify_iter_info *iter_info, + u32 event_mask, const void *data, + int data_type) { __u32 marks_mask = 0, marks_ignored_mask = 0; + __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS; const struct path *path = data; struct fsnotify_mark *mark; int type; @@ -101,49 +158,132 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", __func__, iter_info->report_mask, event_mask, data, data_type); - /* if we don't have enough info to send an event to userspace say no */ - if (data_type != FSNOTIFY_EVENT_PATH) - return false; - - /* sorry, fanotify only gives a damn about files and dirs */ - if (!d_is_reg(path->dentry) && - !d_can_lookup(path->dentry)) - return false; + if (!FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + /* Do we have path to open a file descriptor? */ + if (data_type != FSNOTIFY_EVENT_PATH) + return 0; + /* Path type events are only relevant for files and dirs */ + if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry)) + return 0; + } fsnotify_foreach_obj_type(type) { if (!fsnotify_iter_should_report_type(iter_info, type)) continue; mark = iter_info->marks[type]; /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! + * If the event is for a child and this mark doesn't care about + * events on a child, don't send it! */ - if (type == FSNOTIFY_OBJ_TYPE_INODE && - (event_mask & FS_EVENT_ON_CHILD) && - !(mark->mask & FS_EVENT_ON_CHILD)) + if (event_mask & FS_EVENT_ON_CHILD && + (type != FSNOTIFY_OBJ_TYPE_INODE || + !(mark->mask & FS_EVENT_ON_CHILD))) continue; marks_mask |= mark->mask; marks_ignored_mask |= mark->ignored_mask; } - if (d_is_dir(path->dentry) && + test_mask = event_mask & marks_mask & ~marks_ignored_mask; + + /* + * dirent modification events (create/delete/move) do not carry the + * child entry name/inode information. Instead, we report FAN_ONDIR + * for mkdir/rmdir so user can differentiate them from creat/unlink. + * + * For backward compatibility and consistency, do not report FAN_ONDIR + * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR + * to user in FAN_REPORT_FID mode for all event types. + */ + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + /* Do not report FAN_ONDIR without any event */ + if (!(test_mask & ~FAN_ONDIR)) + return 0; + } else { + user_mask &= ~FAN_ONDIR; + } + + if (event_mask & FS_ISDIR && !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) - return false; + return 0; - if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask & - ~marks_ignored_mask) - return true; + return test_mask & user_mask; +} - return false; +static int fanotify_encode_fid(struct fanotify_event *event, + struct inode *inode, gfp_t gfp, + __kernel_fsid_t *fsid) +{ + struct fanotify_fid *fid = &event->fid; + int dwords, bytes = 0; + int err, type; + + fid->ext_fh = NULL; + dwords = 0; + err = -ENOENT; + type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); + if (!dwords) + goto out_err; + + bytes = dwords << 2; + if (bytes > FANOTIFY_INLINE_FH_LEN) { + /* Treat failure to allocate fh as failure to allocate event */ + err = -ENOMEM; + fid->ext_fh = kmalloc(bytes, gfp); + if (!fid->ext_fh) + goto out_err; + } + + type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes), + &dwords, NULL); + err = -EINVAL; + if (!type || type == FILEID_INVALID || bytes != dwords << 2) + goto out_err; + + fid->fsid = *fsid; + event->fh_len = bytes; + + return type; + +out_err: + pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, " + "type=%d, bytes=%d, err=%i)\n", + fsid->val[0], fsid->val[1], type, bytes, err); + kfree(fid->ext_fh); + fid->ext_fh = NULL; + event->fh_len = 0; + + return FILEID_INVALID; } -struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path) +/* + * The inode to use as identifier when reporting fid depends on the event. + * Report the modified directory inode on dirent modification events. + * Report the "victim" inode otherwise. + * For example: + * FS_ATTRIB reports the child inode even if reported on a watched parent. + * FS_CREATE reports the modified dir inode and not the created inode. + */ +static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask, + const void *data, int data_type) { - struct fanotify_event_info *event = NULL; + if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) + return to_tell; + else if (data_type == FSNOTIFY_EVENT_INODE) + return (struct inode *)data; + else if (data_type == FSNOTIFY_EVENT_PATH) + return d_inode(((struct path *)data)->dentry); + return NULL; +} + +struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, + struct inode *inode, u32 mask, + const void *data, int data_type, + __kernel_fsid_t *fsid) +{ + struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; + struct inode *id = fanotify_fid_inode(inode, mask, data, data_type); /* * For queues with unlimited length lost events are not expected and @@ -157,25 +297,36 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, memalloc_use_memcg(group->memcg); if (fanotify_is_perm_event(mask)) { - struct fanotify_perm_event_info *pevent; + struct fanotify_perm_event *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) goto out; event = &pevent->fae; pevent->response = 0; + pevent->state = FAN_EVENT_INIT; goto init; } event = kmem_cache_alloc(fanotify_event_cachep, gfp); if (!event) goto out; init: __maybe_unused - fsnotify_init_event(&event->fse, inode, mask); - event->tgid = get_pid(task_tgid(current)); - if (path) { - event->path = *path; + fsnotify_init_event(&event->fse, inode); + event->mask = mask; + if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) + event->pid = get_pid(task_pid(current)); + else + event->pid = get_pid(task_tgid(current)); + event->fh_len = 0; + if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + /* Report the event without a file identifier on encode error */ + event->fh_type = fanotify_encode_fid(event, id, gfp, fsid); + } else if (data_type == FSNOTIFY_EVENT_PATH) { + event->fh_type = FILEID_ROOT; + event->path = *((struct path *)data); path_get(&event->path); } else { + event->fh_type = FILEID_INVALID; event->path.mnt = NULL; event->path.dentry = NULL; } @@ -184,6 +335,29 @@ out: return event; } +/* + * Get cached fsid of the filesystem containing the object from any connector. + * All connectors are supposed to have the same fsid, but we do not verify that + * here. + */ +static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) +{ + int type; + __kernel_fsid_t fsid = {}; + + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + + fsid = iter_info->marks[type]->connector->fsid; + if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) + continue; + return fsid; + } + + return fsid; +} + static int fanotify_handle_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, @@ -191,21 +365,35 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_iter_info *iter_info) { int ret = 0; - struct fanotify_event_info *event; + struct fanotify_event *event; struct fsnotify_event *fsn_event; + __kernel_fsid_t fsid = {}; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); + BUILD_BUG_ON(FAN_ATTRIB != FS_ATTRIB); BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); BUILD_BUG_ON(FAN_OPEN != FS_OPEN); + BUILD_BUG_ON(FAN_MOVED_TO != FS_MOVED_TO); + BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM); + BUILD_BUG_ON(FAN_CREATE != FS_CREATE); + BUILD_BUG_ON(FAN_DELETE != FS_DELETE); + BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF); + BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF); BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); + BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); + BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); - if (!fanotify_should_send_event(iter_info, mask, data, data_type)) + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19); + + mask = fanotify_group_event_mask(group, iter_info, mask, data, + data_type); + if (!mask) return 0; pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, @@ -220,7 +408,11 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - event = fanotify_alloc_event(group, inode, mask, data); + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) + fsid = fanotify_get_fsid(iter_info); + + event = fanotify_alloc_event(group, inode, mask, data, data_type, + &fsid); ret = -ENOMEM; if (unlikely(!event)) { /* @@ -236,7 +428,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, ret = fsnotify_add_event(group, fsn_event, fanotify_merge); if (ret) { /* Permission events shouldn't be merged */ - BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); + BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); @@ -244,7 +436,6 @@ static int fanotify_handle_event(struct fsnotify_group *group, } else if (fanotify_is_perm_event(mask)) { ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), iter_info); - fsnotify_destroy_event(group, fsn_event); } finish: if (fanotify_is_perm_event(mask)) @@ -264,12 +455,15 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) static void fanotify_free_event(struct fsnotify_event *fsn_event) { - struct fanotify_event_info *event; + struct fanotify_event *event; event = FANOTIFY_E(fsn_event); - path_put(&event->path); - put_pid(event->tgid); - if (fanotify_is_perm_event(fsn_event->mask)) { + if (fanotify_event_has_path(event)) + path_put(&event->path); + else if (fanotify_event_has_ext_fh(event)) + kfree(event->fid.ext_fh); + put_pid(event->pid); + if (fanotify_is_perm_event(event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); return; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 8609ba06f474..68b30504284c 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -2,26 +2,112 @@ #include <linux/fsnotify_backend.h> #include <linux/path.h> #include <linux/slab.h> +#include <linux/exportfs.h> extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; +/* Possible states of the permission event */ +enum { + FAN_EVENT_INIT, + FAN_EVENT_REPORTED, + FAN_EVENT_ANSWERED, + FAN_EVENT_CANCELED, +}; + +/* + * 3 dwords are sufficient for most local fs (64bit ino, 32bit generation). + * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and + * fh_* fields increase the size of fanotify_event by another 4 bytes. + * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and + * fh_* fields are packed in a hole after mask. + */ +#if BITS_PER_LONG == 32 +#define FANOTIFY_INLINE_FH_LEN (3 << 2) +#else +#define FANOTIFY_INLINE_FH_LEN (4 << 2) +#endif + +struct fanotify_fid { + __kernel_fsid_t fsid; + union { + unsigned char fh[FANOTIFY_INLINE_FH_LEN]; + unsigned char *ext_fh; + }; +}; + +static inline void *fanotify_fid_fh(struct fanotify_fid *fid, + unsigned int fh_len) +{ + return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh; +} + +static inline bool fanotify_fid_equal(struct fanotify_fid *fid1, + struct fanotify_fid *fid2, + unsigned int fh_len) +{ + return fid1->fsid.val[0] == fid2->fsid.val[0] && + fid1->fsid.val[1] == fid2->fsid.val[1] && + !memcmp(fanotify_fid_fh(fid1, fh_len), + fanotify_fid_fh(fid2, fh_len), fh_len); +} + /* * Structure for normal fanotify events. It gets allocated in * fanotify_handle_event() and freed when the information is retrieved by * userspace */ -struct fanotify_event_info { +struct fanotify_event { struct fsnotify_event fse; + u32 mask; /* - * We hold ref to this path so it may be dereferenced at any point - * during this object's lifetime + * Those fields are outside fanotify_fid to pack fanotify_event nicely + * on 64bit arch and to use fh_type as an indication of whether path + * or fid are used in the union: + * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither. */ - struct path path; - struct pid *tgid; + u8 fh_type; + u8 fh_len; + u16 pad; + union { + /* + * We hold ref to this path so it may be dereferenced at any + * point during this object's lifetime + */ + struct path path; + /* + * With FAN_REPORT_FID, we do not hold any reference on the + * victim object. Instead we store its NFS file handle and its + * filesystem's fsid as a unique identifier. + */ + struct fanotify_fid fid; + }; + struct pid *pid; }; +static inline bool fanotify_event_has_path(struct fanotify_event *event) +{ + return event->fh_type == FILEID_ROOT; +} + +static inline bool fanotify_event_has_fid(struct fanotify_event *event) +{ + return event->fh_type != FILEID_ROOT && + event->fh_type != FILEID_INVALID; +} + +static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event) +{ + return fanotify_event_has_fid(event) && + event->fh_len > FANOTIFY_INLINE_FH_LEN; +} + +static inline void *fanotify_event_fh(struct fanotify_event *event) +{ + return fanotify_fid_fh(&event->fid, event->fh_len); +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -29,29 +115,31 @@ struct fanotify_event_info { * group->notification_list to group->fanotify_data.access_list to wait for * user response. */ -struct fanotify_perm_event_info { - struct fanotify_event_info fae; - int response; /* userspace answer to question */ +struct fanotify_perm_event { + struct fanotify_event fae; + unsigned short response; /* userspace answer to the event */ + unsigned short state; /* state of the event */ int fd; /* fd we passed to userspace for this event */ }; -static inline struct fanotify_perm_event_info * +static inline struct fanotify_perm_event * FANOTIFY_PE(struct fsnotify_event *fse) { - return container_of(fse, struct fanotify_perm_event_info, fae.fse); + return container_of(fse, struct fanotify_perm_event, fae.fse); } static inline bool fanotify_is_perm_event(u32 mask) { return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) && - mask & FAN_ALL_PERM_EVENTS; + mask & FANOTIFY_PERM_EVENTS; } -static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) +static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) { - return container_of(fse, struct fanotify_event_info, fse); + return container_of(fse, struct fanotify_event, fse); } -struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path); +struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, + struct inode *inode, u32 mask, + const void *data, int data_type, + __kernel_fsid_t *fsid); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 69054886915b..56992b32c6bb 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -17,6 +17,8 @@ #include <linux/compat.h> #include <linux/sched/signal.h> #include <linux/memcontrol.h> +#include <linux/statfs.h> +#include <linux/exportfs.h> #include <asm/ioctls.h> @@ -47,33 +49,55 @@ struct kmem_cache *fanotify_mark_cache __read_mostly; struct kmem_cache *fanotify_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; +#define FANOTIFY_EVENT_ALIGN 4 + +static int fanotify_event_info_len(struct fanotify_event *event) +{ + if (!fanotify_event_has_fid(event)) + return 0; + + return roundup(sizeof(struct fanotify_event_info_fid) + + sizeof(struct file_handle) + event->fh_len, + FANOTIFY_EVENT_ALIGN); +} + /* * Get an fsnotify notification event if one exists and is small * enough to fit in "count". Return an error pointer if the count - * is not large enough. - * - * Called with the group->notification_lock held. + * is not large enough. When permission event is dequeued, its state is + * updated accordingly. */ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, size_t count) { - assert_spin_locked(&group->notification_lock); + size_t event_size = FAN_EVENT_METADATA_LEN; + struct fsnotify_event *fsn_event = NULL; pr_debug("%s: group=%p count=%zd\n", __func__, group, count); + spin_lock(&group->notification_lock); if (fsnotify_notify_queue_is_empty(group)) - return NULL; + goto out; - if (FAN_EVENT_METADATA_LEN > count) - return ERR_PTR(-EINVAL); + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + event_size += fanotify_event_info_len( + FANOTIFY_E(fsnotify_peek_first_event(group))); + } - /* held the notification_lock the whole time, so this is the - * same event we peeked above */ - return fsnotify_remove_first_event(group); + if (event_size > count) { + fsn_event = ERR_PTR(-EINVAL); + goto out; + } + fsn_event = fsnotify_remove_first_event(group); + if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask)) + FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED; +out: + spin_unlock(&group->notification_lock); + return fsn_event; } static int create_fd(struct fsnotify_group *group, - struct fanotify_event_info *event, + struct fanotify_event *event, struct file **file) { int client_fd; @@ -114,62 +138,32 @@ static int create_fd(struct fsnotify_group *group, return client_fd; } -static int fill_event_metadata(struct fsnotify_group *group, - struct fanotify_event_metadata *metadata, - struct fsnotify_event *fsn_event, - struct file **file) -{ - int ret = 0; - struct fanotify_event_info *event; - - pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, - group, metadata, fsn_event); - - *file = NULL; - event = container_of(fsn_event, struct fanotify_event_info, fse); - metadata->event_len = FAN_EVENT_METADATA_LEN; - metadata->metadata_len = FAN_EVENT_METADATA_LEN; - metadata->vers = FANOTIFY_METADATA_VERSION; - metadata->reserved = 0; - metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS; - metadata->pid = pid_vnr(event->tgid); - if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) - metadata->fd = FAN_NOFD; - else { - metadata->fd = create_fd(group, event, file); - if (metadata->fd < 0) - ret = metadata->fd; - } - - return ret; -} - -static struct fanotify_perm_event_info *dequeue_event( - struct fsnotify_group *group, int fd) +/* + * Finish processing of permission event by setting it to ANSWERED state and + * drop group->notification_lock. + */ +static void finish_permission_event(struct fsnotify_group *group, + struct fanotify_perm_event *event, + unsigned int response) + __releases(&group->notification_lock) { - struct fanotify_perm_event_info *event, *return_e = NULL; - - spin_lock(&group->notification_lock); - list_for_each_entry(event, &group->fanotify_data.access_list, - fae.fse.list) { - if (event->fd != fd) - continue; + bool destroy = false; - list_del_init(&event->fae.fse.list); - return_e = event; - break; - } + assert_spin_locked(&group->notification_lock); + event->response = response; + if (event->state == FAN_EVENT_CANCELED) + destroy = true; + else + event->state = FAN_EVENT_ANSWERED; spin_unlock(&group->notification_lock); - - pr_debug("%s: found return_re=%p\n", __func__, return_e); - - return return_e; + if (destroy) + fsnotify_destroy_event(group, &event->fae.fse); } static int process_access_response(struct fsnotify_group *group, struct fanotify_response *response_struct) { - struct fanotify_perm_event_info *event; + struct fanotify_perm_event *event; int fd = response_struct->fd; int response = response_struct->response; @@ -191,45 +185,118 @@ static int process_access_response(struct fsnotify_group *group, if (fd < 0) return -EINVAL; - if ((response & FAN_AUDIT) && !group->fanotify_data.audit) + if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) return -EINVAL; - event = dequeue_event(group, fd); - if (!event) - return -ENOENT; + spin_lock(&group->notification_lock); + list_for_each_entry(event, &group->fanotify_data.access_list, + fae.fse.list) { + if (event->fd != fd) + continue; - event->response = response; - wake_up(&group->fanotify_data.access_waitq); + list_del_init(&event->fae.fse.list); + finish_permission_event(group, event, response); + wake_up(&group->fanotify_data.access_waitq); + return 0; + } + spin_unlock(&group->notification_lock); - return 0; + return -ENOENT; } -static ssize_t copy_event_to_user(struct fsnotify_group *group, - struct fsnotify_event *event, - char __user *buf) +static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) { - struct fanotify_event_metadata fanotify_event_metadata; - struct file *f; - int fd, ret; + struct fanotify_event_info_fid info = { }; + struct file_handle handle = { }; + size_t fh_len = event->fh_len; + size_t len = fanotify_event_info_len(event); - pr_debug("%s: group=%p event=%p\n", __func__, group, event); + if (!len) + return 0; - ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); - if (ret < 0) - return ret; + if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) + return -EFAULT; + + /* Copy event info fid header followed by vaiable sized file handle */ + info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; + info.hdr.len = len; + info.fsid = event->fid.fsid; + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + buf += sizeof(info); + len -= sizeof(info); + handle.handle_type = event->fh_type; + handle.handle_bytes = fh_len; + if (copy_to_user(buf, &handle, sizeof(handle))) + return -EFAULT; + + buf += sizeof(handle); + len -= sizeof(handle); + if (copy_to_user(buf, fanotify_event_fh(event), fh_len)) + return -EFAULT; + + /* Pad with 0's */ + buf += fh_len; + len -= fh_len; + WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); + if (len > 0 && clear_user(buf, len)) + return -EFAULT; + + return 0; +} + +static ssize_t copy_event_to_user(struct fsnotify_group *group, + struct fsnotify_event *fsn_event, + char __user *buf, size_t count) +{ + struct fanotify_event_metadata metadata; + struct fanotify_event *event; + struct file *f = NULL; + int ret, fd = FAN_NOFD; + + pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); + + event = container_of(fsn_event, struct fanotify_event, fse); + metadata.event_len = FAN_EVENT_METADATA_LEN; + metadata.metadata_len = FAN_EVENT_METADATA_LEN; + metadata.vers = FANOTIFY_METADATA_VERSION; + metadata.reserved = 0; + metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; + metadata.pid = pid_vnr(event->pid); + + if (fanotify_event_has_path(event)) { + fd = create_fd(group, event, &f); + if (fd < 0) + return fd; + } else if (fanotify_event_has_fid(event)) { + metadata.event_len += fanotify_event_info_len(event); + } + metadata.fd = fd; - fd = fanotify_event_metadata.fd; ret = -EFAULT; - if (copy_to_user(buf, &fanotify_event_metadata, - fanotify_event_metadata.event_len)) + /* + * Sanity check copy size in case get_one_event() and + * fill_event_metadata() event_len sizes ever get out of sync. + */ + if (WARN_ON_ONCE(metadata.event_len > count)) + goto out_close_fd; + + if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) goto out_close_fd; if (fanotify_is_perm_event(event->mask)) - FANOTIFY_PE(event)->fd = fd; + FANOTIFY_PE(fsn_event)->fd = fd; - if (fd != FAN_NOFD) + if (fanotify_event_has_path(event)) { fd_install(fd, f); - return fanotify_event_metadata.event_len; + } else if (fanotify_event_has_fid(event)) { + ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); + if (ret < 0) + return ret; + } + + return metadata.event_len; out_close_fd: if (fd != FAN_NOFD) { @@ -270,10 +337,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, add_wait_queue(&group->notification_waitq, &wait); while (1) { - spin_lock(&group->notification_lock); kevent = get_one_event(group, count); - spin_unlock(&group->notification_lock); - if (IS_ERR(kevent)) { ret = PTR_ERR(kevent); break; @@ -295,7 +359,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, continue; } - ret = copy_event_to_user(group, kevent, buf); + ret = copy_event_to_user(group, kevent, buf, count); if (unlikely(ret == -EOPENSTALE)) { /* * We cannot report events with stale fd so drop it. @@ -310,11 +374,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!fanotify_is_perm_event(kevent->mask)) { + if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { fsnotify_destroy_event(group, kevent); } else { if (ret <= 0) { - FANOTIFY_PE(kevent)->response = FAN_DENY; + spin_lock(&group->notification_lock); + finish_permission_event(group, + FANOTIFY_PE(kevent), FAN_DENY); wake_up(&group->fanotify_data.access_waitq); } else { spin_lock(&group->notification_lock); @@ -364,7 +430,7 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct fanotify_perm_event_info *event, *next; + struct fanotify_perm_event *event; struct fsnotify_event *fsn_event; /* @@ -379,13 +445,12 @@ static int fanotify_release(struct inode *ignored, struct file *file) * and simulate reply from userspace. */ spin_lock(&group->notification_lock); - list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, - fae.fse.list) { - pr_debug("%s: found group=%p event=%p\n", __func__, group, - event); - + while (!list_empty(&group->fanotify_data.access_list)) { + event = list_first_entry(&group->fanotify_data.access_list, + struct fanotify_perm_event, fae.fse.list); list_del_init(&event->fae.fse.list); - event->response = FAN_ALLOW; + finish_permission_event(group, event, FAN_ALLOW); + spin_lock(&group->notification_lock); } /* @@ -395,13 +460,14 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ while (!fsnotify_notify_queue_is_empty(group)) { fsn_event = fsnotify_remove_first_event(group); - if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) { + if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); - spin_lock(&group->notification_lock); } else { - FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; + finish_permission_event(group, FANOTIFY_PE(fsn_event), + FAN_ALLOW); } + spin_lock(&group->notification_lock); } spin_unlock(&group->notification_lock); @@ -506,18 +572,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { - __u32 tmask = fsn_mark->mask & ~mask; - - if (flags & FAN_MARK_ONDIR) - tmask &= ~FAN_ONDIR; - oldmask = fsn_mark->mask; - fsn_mark->mask = tmask; + fsn_mark->mask &= ~mask; } else { - __u32 tmask = fsn_mark->ignored_mask & ~mask; - if (flags & FAN_MARK_ONDIR) - tmask &= ~FAN_ONDIR; - fsn_mark->ignored_mask = tmask; + fsn_mark->ignored_mask &= ~mask; } *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); spin_unlock(&fsn_mark->lock); @@ -563,6 +621,13 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, mask, flags); } +static int fanotify_remove_sb_mark(struct fsnotify_group *group, + struct super_block *sb, __u32 mask, + unsigned int flags) +{ + return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags); +} + static int fanotify_remove_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, unsigned int flags) @@ -579,19 +644,10 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, spin_lock(&fsn_mark->lock); if (!(flags & FAN_MARK_IGNORED_MASK)) { - __u32 tmask = fsn_mark->mask | mask; - - if (flags & FAN_MARK_ONDIR) - tmask |= FAN_ONDIR; - oldmask = fsn_mark->mask; - fsn_mark->mask = tmask; + fsn_mark->mask |= mask; } else { - __u32 tmask = fsn_mark->ignored_mask | mask; - if (flags & FAN_MARK_ONDIR) - tmask |= FAN_ONDIR; - - fsn_mark->ignored_mask = tmask; + fsn_mark->ignored_mask |= mask; if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } @@ -602,7 +658,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct fsnotify_mark *mark; int ret; @@ -615,7 +672,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, return ERR_PTR(-ENOMEM); fsnotify_init_mark(mark, group); - ret = fsnotify_add_mark_locked(mark, connp, type, 0); + ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -627,7 +684,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, static int fanotify_add_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, unsigned int type, - __u32 mask, unsigned int flags) + __u32 mask, unsigned int flags, + __kernel_fsid_t *fsid) { struct fsnotify_mark *fsn_mark; __u32 added; @@ -635,7 +693,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, type); + fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); if (IS_ERR(fsn_mark)) { mutex_unlock(&group->mark_mutex); return PTR_ERR(fsn_mark); @@ -652,15 +710,23 @@ static int fanotify_add_mark(struct fsnotify_group *group, static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); + FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); +} + +static int fanotify_add_sb_mark(struct fsnotify_group *group, + struct super_block *sb, __u32 mask, + unsigned int flags, __kernel_fsid_t *fsid) +{ + return fanotify_add_mark(group, &sb->s_fsnotify_marks, + FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); } static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); @@ -671,11 +737,11 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, */ if ((flags & FAN_MARK_IGNORED_MASK) && !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && - (atomic_read(&inode->i_writecount) > 0)) + inode_is_open_for_write(inode)) return 0; return fanotify_add_mark(group, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, mask, flags); + FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); } /* fanotify syscalls */ @@ -684,18 +750,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) struct fsnotify_group *group; int f_flags, fd; struct user_struct *user; - struct fanotify_event_info *oevent; + struct fanotify_event *oevent; - pr_debug("%s: flags=%d event_f_flags=%d\n", - __func__, flags, event_f_flags); + pr_debug("%s: flags=%x event_f_flags=%x\n", + __func__, flags, event_f_flags); if (!capable(CAP_SYS_ADMIN)) return -EPERM; #ifdef CONFIG_AUDITSYSCALL - if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT)) + if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) #else - if (flags & ~FAN_ALL_INIT_FLAGS) + if (flags & ~FANOTIFY_INIT_FLAGS) #endif return -EINVAL; @@ -711,6 +777,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) return -EINVAL; } + if ((flags & FAN_REPORT_FID) && + (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF) + return -EINVAL; + user = get_current_user(); if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { free_uid(user); @@ -731,10 +801,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } group->fanotify_data.user = user; + group->fanotify_data.flags = flags; atomic_inc(&user->fanotify_listeners); group->memcg = get_mem_cgroup_from_mm(current->mm); - oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); + oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, + FSNOTIFY_EVENT_NONE, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; @@ -746,7 +818,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.f_flags = event_f_flags; init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); - switch (flags & FAN_ALL_CLASS_BITS) { + switch (flags & FANOTIFY_CLASS_BITS) { case FAN_CLASS_NOTIF: group->priority = FS_PRIO_0; break; @@ -783,7 +855,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) goto out_destroy_group; - group->fanotify_data.audit = true; } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); @@ -797,6 +868,48 @@ out_destroy_group: return fd; } +/* Check if filesystem can encode a unique fid */ +static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) +{ + __kernel_fsid_t root_fsid; + int err; + + /* + * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). + */ + err = vfs_get_fsid(path->dentry, fsid); + if (err) + return err; + + if (!fsid->val[0] && !fsid->val[1]) + return -ENODEV; + + /* + * Make sure path is not inside a filesystem subvolume (e.g. btrfs) + * which uses a different fsid than sb root. + */ + err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); + if (err) + return err; + + if (root_fsid.val[0] != fsid->val[0] || + root_fsid.val[1] != fsid->val[1]) + return -EXDEV; + + /* + * We need to make sure that the file system supports at least + * encoding a file handle so user can use name_to_handle_at() to + * compare fid returned with event to the file handle of watched + * objects. However, name_to_handle_at() requires that the + * filesystem also supports decoding file handles. + */ + if (!path->dentry->d_sb->s_export_op || + !path->dentry->d_sb->s_export_op->fh_to_dentry) + return -EOPNOTSUPP; + + return 0; +} + static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { @@ -805,7 +918,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct fd f; struct path path; - u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD; + __kernel_fsid_t __fsid, *fsid = NULL; + u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", @@ -815,8 +930,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (mask & ((__u64)0xffffffff << 32)) return -EINVAL; - if (flags & ~FAN_ALL_MARK_FLAGS) + if (flags & ~FANOTIFY_MARK_FLAGS) + return -EINVAL; + + switch (mark_type) { + case FAN_MARK_INODE: + case FAN_MARK_MOUNT: + case FAN_MARK_FILESYSTEM: + break; + default: return -EINVAL; + } + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { case FAN_MARK_ADD: /* fallthrough */ case FAN_MARK_REMOVE: @@ -824,20 +949,15 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; break; case FAN_MARK_FLUSH: - if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH)) + if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) return -EINVAL; break; default: return -EINVAL; } - if (mask & FAN_ONDIR) { - flags |= FAN_MARK_ONDIR; - mask &= ~FAN_ONDIR; - } - if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) - valid_mask |= FAN_ALL_PERM_EVENTS; + valid_mask |= FANOTIFY_PERM_EVENTS; if (mask & ~valid_mask) return -EINVAL; @@ -857,14 +977,28 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * allowed to set permissions events. */ ret = -EINVAL; - if (mask & FAN_ALL_PERM_EVENTS && + if (mask & FANOTIFY_PERM_EVENTS && group->priority == FS_PRIO_0) goto fput_and_out; + /* + * Events with data type inode do not carry enough information to report + * event->fd, so we do not allow setting a mask for inode events unless + * group supports reporting fid. + * inode events are not supported on a mount mark, because they do not + * carry enough information (i.e. path) to be filtered by mount point. + */ + if (mask & FANOTIFY_INODE_EVENTS && + (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) || + mark_type == FAN_MARK_MOUNT)) + goto fput_and_out; + if (flags & FAN_MARK_FLUSH) { ret = 0; - if (flags & FAN_MARK_MOUNT) + if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); + else if (mark_type == FAN_MARK_FILESYSTEM) + fsnotify_clear_sb_marks_by_group(group); else fsnotify_clear_inode_marks_by_group(group); goto fput_and_out; @@ -874,8 +1008,16 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + ret = fanotify_test_fid(&path, &__fsid); + if (ret) + goto path_put_and_out; + + fsid = &__fsid; + } + /* inode held in place by reference to path; group by fget on fd */ - if (!(flags & FAN_MARK_MOUNT)) + if (mark_type == FAN_MARK_INODE) inode = path.dentry->d_inode; else mnt = path.mnt; @@ -883,21 +1025,32 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, /* create/update an inode mark */ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: - if (flags & FAN_MARK_MOUNT) - ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); + if (mark_type == FAN_MARK_MOUNT) + ret = fanotify_add_vfsmount_mark(group, mnt, mask, + flags, fsid); + else if (mark_type == FAN_MARK_FILESYSTEM) + ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, + flags, fsid); else - ret = fanotify_add_inode_mark(group, inode, mask, flags); + ret = fanotify_add_inode_mark(group, inode, mask, + flags, fsid); break; case FAN_MARK_REMOVE: - if (flags & FAN_MARK_MOUNT) - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); + if (mark_type == FAN_MARK_MOUNT) + ret = fanotify_remove_vfsmount_mark(group, mnt, mask, + flags); + else if (mark_type == FAN_MARK_FILESYSTEM) + ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, + flags); else - ret = fanotify_remove_inode_mark(group, inode, mask, flags); + ret = fanotify_remove_inode_mark(group, inode, mask, + flags); break; default: ret = -EINVAL; } +path_put_and_out: path_put(&path); fput_and_out: fdput(f); @@ -934,12 +1087,15 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); + fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); + fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC); if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { fanotify_perm_event_cachep = - KMEM_CACHE(fanotify_perm_event_info, SLAB_PANIC); + KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } return 0; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 86fcf5814279..1e2bfd26b352 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -15,6 +15,7 @@ #include <linux/exportfs.h> #include "inotify/inotify.h" +#include "fdinfo.h" #include "fsnotify.h" #if defined(CONFIG_PROC_FS) @@ -131,37 +132,20 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) { + struct super_block *sb = fsnotify_conn_sb(mark->connector); + + seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", + sb->s_dev, mflags, mark->mask, mark->ignored_mask); } } void fanotify_show_fdinfo(struct seq_file *m, struct file *f) { struct fsnotify_group *group = f->private_data; - unsigned int flags = 0; - - switch (group->priority) { - case FS_PRIO_0: - flags |= FAN_CLASS_NOTIF; - break; - case FS_PRIO_1: - flags |= FAN_CLASS_CONTENT; - break; - case FS_PRIO_2: - flags |= FAN_CLASS_PRE_CONTENT; - break; - } - - if (group->max_events == UINT_MAX) - flags |= FAN_UNLIMITED_QUEUE; - - if (group->fanotify_data.max_marks == UINT_MAX) - flags |= FAN_UNLIMITED_MARKS; - - if (group->fanotify_data.audit) - flags |= FAN_ENABLE_AUDIT; seq_printf(m, "fanotify flags:%x event-flags:%x\n", - flags, group->fanotify_data.f_flags); + group->fanotify_data.flags, group->fanotify_data.f_flags); show_fdinfo(m, f, fanotify_fdinfo); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ababdbfab537..df06f3da166c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -48,7 +48,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) * Called during unmount with no locks held, so needs to be safe against * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. */ -void fsnotify_unmount_inodes(struct super_block *sb) +static void fsnotify_unmount_inodes(struct super_block *sb) { struct inode *inode, *iput_inode = NULL; @@ -96,6 +96,15 @@ void fsnotify_unmount_inodes(struct super_block *sb) if (iput_inode) iput(iput_inode); + /* Wait for outstanding inode references from connectors */ + wait_var_event(&sb->s_fsnotify_inode_refs, + !atomic_long_read(&sb->s_fsnotify_inode_refs)); +} + +void fsnotify_sb_delete(struct super_block *sb) +{ + fsnotify_unmount_inodes(sb); + fsnotify_clear_marks_by_sb(sb); } /* @@ -158,9 +167,9 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask parent = dget_parent(dentry); p_inode = parent->d_inode; - if (unlikely(!fsnotify_inode_watches_children(p_inode))) + if (unlikely(!fsnotify_inode_watches_children(p_inode))) { __fsnotify_update_child_dentry_flags(p_inode); - else if (p_inode->i_fsnotify_mask & mask) { + } else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) { struct name_snapshot name; /* we are notifying a parent so come up with the new mask which @@ -190,7 +199,7 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; - __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; __u32 marks_ignored_mask = 0; struct fsnotify_mark *mark; @@ -319,15 +328,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const unsigned char *file_name, u32 cookie) { struct fsnotify_iter_info iter_info = {}; - struct mount *mnt; + struct super_block *sb = to_tell->i_sb; + struct mount *mnt = NULL; + __u32 mnt_or_sb_mask = sb->s_fsnotify_mask; int ret = 0; - /* global tests shouldn't care about events on child only the specific event */ - __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); - if (data_is == FSNOTIFY_EVENT_PATH) + if (data_is == FSNOTIFY_EVENT_PATH) { mnt = real_mount(((const struct path *)data)->mnt); - else - mnt = NULL; + mnt_or_sb_mask |= mnt->mnt_fsnotify_mask; + } + /* An event "on child" is not intended for a mount/sb mark */ + if (mask & FS_EVENT_ON_CHILD) + mnt_or_sb_mask = 0; /* * Optimization: srcu_read_lock() has a memory barrier which can @@ -336,31 +349,32 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (!to_tell->i_fsnotify_marks && + if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks && (!mnt || !mnt->mnt_fsnotify_marks)) return 0; /* * if this is a modify event we may need to clear the ignored masks - * otherwise return if neither the inode nor the vfsmount care about + * otherwise return if neither the inode nor the vfsmount/sb care about * this type of event. */ if (!(mask & FS_MODIFY) && - !(test_mask & to_tell->i_fsnotify_mask) && - !(mnt && test_mask & mnt->mnt_fsnotify_mask)) + !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask))) return 0; iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&to_tell->i_fsnotify_marks); + iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = + fsnotify_first_mark(&sb->s_fsnotify_marks); if (mnt) { iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } /* - * We need to merge inode & vfsmount mark lists so that inode mark - * ignore masks are properly reflected for mount mark notifications. + * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark + * ignore masks are properly reflected for mount/sb mark notifications. * That's why this traversal is so complicated... */ while (fsnotify_iter_select_report_types(&iter_info)) { @@ -386,7 +400,7 @@ static __init int fsnotify_init(void) { int ret; - BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 7902653dd577..5a00121fb219 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -21,6 +21,12 @@ static inline struct mount *fsnotify_conn_mount( return container_of(conn->obj, struct mount, mnt_fsnotify_marks); } +static inline struct super_block *fsnotify_conn_sb( + struct fsnotify_mark_connector *conn) +{ + return container_of(conn->obj, struct super_block, s_fsnotify_marks); +} + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -43,6 +49,11 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks); } +/* run the list of all marks associated with sb and destroy them */ +static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) +{ + fsnotify_destroy_marks(&sb->s_fsnotify_marks); +} /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 7e4578d35b61..74ae60305189 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -5,6 +5,7 @@ struct inotify_event_info { struct fsnotify_event fse; + u32 mask; int wd; u32 sync_cookie; int name_len; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index f4184b4f3815..ff30abd6a49b 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -43,11 +43,11 @@ static bool event_compare(struct fsnotify_event *old_fsn, { struct inotify_event_info *old, *new; - if (old_fsn->mask & FS_IN_IGNORED) - return false; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); - if ((old_fsn->mask == new_fsn->mask) && + if (old->mask & FS_IN_IGNORED) + return false; + if ((old->mask == new->mask) && (old_fsn->inode == new_fsn->inode) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) @@ -113,8 +113,18 @@ int inotify_handle_event(struct fsnotify_group *group, return -ENOMEM; } + /* + * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events + * for fanotify. inotify never reported IN_ISDIR with those events. + * It looks like an oversight, but to avoid the risk of breaking + * existing inotify programs, mask the flag out from those events. + */ + if (mask & (IN_MOVE_SELF | IN_DELETE_SELF)) + mask &= ~IN_ISDIR; + fsn_event = &event->fse; - fsnotify_init_event(fsn_event, inode, mask); + fsnotify_init_event(fsn_event, inode); + event->mask = mask; event->wd = i_mark->wd; event->sync_cookie = cookie; event->name_len = len; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ac6978d3208c..e2901fbb9f76 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -189,7 +189,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, */ pad_name_len = round_event_name_len(fsn_event); inotify_event.len = pad_name_len; - inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); + inotify_event.mask = inotify_mask_to_arg(event->mask); inotify_event.wd = event->wd; inotify_event.cookie = event->sync_cookie; @@ -634,7 +634,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + fsnotify_init_event(group->overflow_event, NULL); + oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; oevent->name_len = 0; @@ -724,8 +725,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, return -EBADF; /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ - if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) - return -EINVAL; + if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) { + ret = -EINVAL; + goto fput_and_out; + } /* verify that this is indeed an inotify instance */ if (unlikely(f.file->f_op != &inotify_fops)) { @@ -815,7 +818,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); + BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC|SLAB_ACCOUNT); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 59cdb27826de..d593d4269561 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -82,6 +82,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/srcu.h> +#include <linux/ratelimit.h> #include <linux/atomic.h> @@ -115,6 +116,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) return &fsnotify_conn_inode(conn)->i_fsnotify_mask; else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; + else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) + return &fsnotify_conn_sb(conn)->s_fsnotify_mask; return NULL; } @@ -179,19 +182,24 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static struct inode *fsnotify_detach_connector_from_object( - struct fsnotify_mark_connector *conn) +static void *fsnotify_detach_connector_from_object( + struct fsnotify_mark_connector *conn, + unsigned int *type) { struct inode *inode = NULL; + *type = conn->type; if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) return NULL; if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; + atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs); } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { + fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; } rcu_assign_pointer(*(conn->obj), NULL); @@ -211,10 +219,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) fsnotify_put_group(group); } +/* Drop object reference originally held by a connector */ +static void fsnotify_drop_object(unsigned int type, void *objp) +{ + struct inode *inode; + struct super_block *sb; + + if (!objp) + return; + /* Currently only inode references are passed to be dropped */ + if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE)) + return; + inode = objp; + sb = inode->i_sb; + iput(inode); + if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs)) + wake_up_var(&sb->s_fsnotify_inode_refs); +} + void fsnotify_put_mark(struct fsnotify_mark *mark) { struct fsnotify_mark_connector *conn; - struct inode *inode = NULL; + void *objp = NULL; + unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED; bool free_conn = false; /* Catch marks that were actually never attached to object */ @@ -234,7 +261,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) conn = mark->connector; hlist_del_init_rcu(&mark->obj_list); if (hlist_empty(&conn->list)) { - inode = fsnotify_detach_connector_from_object(conn); + objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { __fsnotify_recalc_mask(conn); @@ -242,7 +269,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) mark->connector = NULL; spin_unlock(&conn->lock); - iput(inode); + fsnotify_drop_object(type, objp); if (free_conn) { spin_lock(&destroy_lock); @@ -455,7 +482,8 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) } static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct inode *inode = NULL; struct fsnotify_mark_connector *conn; @@ -467,6 +495,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, INIT_HLIST_HEAD(&conn->list); conn->type = type; conn->obj = connp; + /* Cache fsid of filesystem containing the object */ + if (fsid) + conn->fsid = *fsid; + else + conn->fsid.val[0] = conn->fsid.val[1] = 0; if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) inode = igrab(fsnotify_conn_inode(conn)); /* @@ -518,7 +551,7 @@ out: */ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups) + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; @@ -527,15 +560,36 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, if (WARN_ON(!fsnotify_valid_obj_type(type))) return -EINVAL; + + /* Backend is expected to check for zero fsid (e.g. tmpfs) */ + if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1])) + return -ENODEV; + restart: spin_lock(&mark->lock); conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, type); + err = fsnotify_attach_connector_to_object(connp, type, fsid); if (err) return err; goto restart; + } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) && + (fsid->val[0] != conn->fsid.val[0] || + fsid->val[1] != conn->fsid.val[1])) { + /* + * Backend is expected to check for non uniform fsid + * (e.g. btrfs), but maybe we missed something? + * Only allow setting conn->fsid once to non zero fsid. + * inotify and non-fid fanotify groups do not set nor test + * conn->fsid. + */ + pr_warn_ratelimited("%s: fsid mismatch on object of type %u: " + "%x.%x != %x.%x\n", __func__, conn->type, + fsid->val[0], fsid->val[1], + conn->fsid.val[0], conn->fsid.val[1]); + err = -EXDEV; + goto out_err; } /* is mark the first mark? */ @@ -580,7 +634,7 @@ out_err: */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups) + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_group *group = mark->group; int ret = 0; @@ -601,7 +655,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, type, allow_dups); + ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid); if (ret) goto err; @@ -622,13 +676,13 @@ err: } int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int type, int allow_dups) + unsigned int type, int allow_dups, __kernel_fsid_t *fsid) { int ret; struct fsnotify_group *group = mark->group; mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups); + ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid); mutex_unlock(&group->mark_mutex); return ret; } @@ -709,7 +763,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp) { struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark, *old_mark = NULL; - struct inode *inode; + void *objp; + unsigned int type; conn = fsnotify_grab_connector(connp); if (!conn) @@ -735,11 +790,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp) * mark references get dropped. It would lead to strange results such * as delaying inode deletion or blocking unmount. */ - inode = fsnotify_detach_connector_from_object(conn); + objp = fsnotify_detach_connector_from_object(conn, &type); spin_unlock(&conn->lock); if (old_mark) fsnotify_put_mark(old_mark); - iput(inode); + fsnotify_drop_object(type, objp); } /* diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 3c3e36745f59..5f3a54d444b5 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -71,7 +71,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event) { /* Overflow events are per-group and we don't want to free them */ - if (!event || event->mask == FS_Q_OVERFLOW) + if (!event || event == group->overflow_event) return; /* * If the event is still queued, we have a problem... Do an unreliable @@ -141,6 +141,18 @@ queue: return ret; } +void fsnotify_remove_queued_event(struct fsnotify_group *group, + struct fsnotify_event *event) +{ + assert_spin_locked(&group->notification_lock); + /* + * We need to init list head for the case of overflow event so that + * check in fsnotify_add_event() works + */ + list_del_init(&event->list); + group->q_len--; +} + /* * Remove and return the first event from the notification list. It is the * responsibility of the caller to destroy the obtained event @@ -155,13 +167,7 @@ struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group) event = list_first_entry(&group->notification_list, struct fsnotify_event, list); - /* - * We need to init list head for the case of overflow event so that - * check in fsnotify_add_event() works - */ - list_del_init(&event->list); - group->q_len--; - + fsnotify_remove_queued_event(group, event); return event; } @@ -194,23 +200,3 @@ void fsnotify_flush_notify(struct fsnotify_group *group) } spin_unlock(&group->notification_lock); } - -/* - * fsnotify_create_event - Allocate a new event which will be sent to each - * group's handle_event function if the group was interested in this - * particular event. - * - * @inode the inode which is supposed to receive the event (sometimes a - * parent of the inode to which the event happened. - * @mask what actually happened. - * @data pointer to the object which was actually affected - * @data_type flag indication if the data is a file, path, inode, nothing... - * @name the filename, if available - */ -void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode, - u32 mask) -{ - INIT_LIST_HEAD(&event->list); - event->inode = inode; - event->mask = mask; -} |