From ff0b16a9850e8a240ad59e10b0a1291a8fcf7cbc Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:25 -0500 Subject: fanotify: fscking all notification system fanotify is a novel file notification system which bases notification on giving userspace both an event type (open, close, read, write) and an open file descriptor to the object in question. This should address a number of races and problems with other notification systems like inotify and dnotify and should allow the future implementation of blocking or access controlled notification. These are useful for on access scanners or hierachical storage management schemes. This patch just implements the basics of the fsnotify functions. Signed-off-by: Eric Paris --- include/linux/fanotify.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/linux/fanotify.h (limited to 'include/linux/fanotify.h') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h new file mode 100644 index 000000000000..b560f86d1401 --- /dev/null +++ b/include/linux/fanotify.h @@ -0,0 +1,40 @@ +#ifndef _LINUX_FANOTIFY_H +#define _LINUX_FANOTIFY_H + +#include + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_CLOSE_WRITE 0x00000008 /* Unwrittable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Writtable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ + +/* FIXME currently Q's have no limit.... */ +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ + +/* + * All of the events - we build the list by hand so that we can add flags in + * the future and not break backward compatibility. Apps will get only the + * events that they originally wanted. Be sure to add new events here! + */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All legal FAN bits userspace can request (although possibly not all + * at the same time. + */ +#define FAN_ALL_INCOMING_EVENTS (FAN_ALL_EVENTS |\ + FAN_EVENT_ON_CHILD) +#ifdef __KERNEL__ + +#endif /* __KERNEL__ */ +#endif /* _LINUX_FANOTIFY_H */ -- cgit v1.2.3 From 52c923dd079df49f58016a9e56df184b132611d6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:26 -0500 Subject: fanotify: fanotify_init syscall implementation NAME fanotify_init - initialize an fanotify group SYNOPSIS int fanotify_init(unsigned int flags, unsigned int event_f_flags, int priority); DESCRIPTION fanotify_init() initializes a new fanotify instance and returns a file descriptor associated with the new fanotify event queue. The following values can be OR'd into the flags field: FAN_NONBLOCK Set the O_NONBLOCK file status flag on the new open file description. Using this flag saves extra calls to fcntl(2) to achieve the same result. FAN_CLOEXEC Set the close-on-exec (FD_CLOEXEC) flag on the new file descriptor. See the description of the O_CLOEXEC flag in open(2) for reasons why this may be useful. The event_f_flags argument is unused and must be set to 0 The priority argument is unused and must be set to 0 RETURN VALUE On success, this system call return a new file descriptor. On error, -1 is returned, and errno is set to indicate the error. ERRORS EINVAL An invalid value was specified in flags. EINVAL A non-zero valid was passed in event_f_flags or in priority ENFILE The system limit on the total number of file descriptors has been reached. ENOMEM Insufficient kernel memory is available. CONFORMING TO These system calls are Linux-specific. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.h | 2 ++ fs/notify/fanotify/fanotify_user.c | 61 +++++++++++++++++++++++++++++++++++++- include/linux/fanotify.h | 4 +++ 3 files changed, 66 insertions(+), 1 deletion(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 50765eb30fe4..dd656cfab1ba 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -4,6 +4,8 @@ #include #include +extern const struct fsnotify_ops fanotify_fsnotify_ops; + static inline bool fanotify_mask_valid(__u32 mask) { if (mask & ~((__u32)FAN_ALL_INCOMING_EVENTS)) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index cf176fc7086b..67c0b5e4a488 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1,13 +1,72 @@ #include #include +#include #include #include #include #include "fanotify.h" +static int fanotify_release(struct inode *ignored, struct file *file) +{ + struct fsnotify_group *group = file->private_data; + + pr_debug("%s: file=%p group=%p\n", __func__, file, group); + + /* matches the fanotify_init->fsnotify_alloc_group */ + fsnotify_put_group(group); + + return 0; +} + +static const struct file_operations fanotify_fops = { + .poll = NULL, + .read = NULL, + .fasync = NULL, + .release = fanotify_release, + .unlocked_ioctl = NULL, + .compat_ioctl = NULL, +}; + +/* fanotify syscalls */ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags, unsigned int, priority) { - return -ENOSYS; + struct fsnotify_group *group; + int f_flags, fd; + + pr_debug("%s: flags=%d event_f_flags=%d priority=%d\n", + __func__, flags, event_f_flags, priority); + + if (event_f_flags) + return -EINVAL; + if (priority) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (flags & ~FAN_ALL_INIT_FLAGS) + return -EINVAL; + + f_flags = (O_RDONLY | FMODE_NONOTIFY); + if (flags & FAN_CLOEXEC) + f_flags |= O_CLOEXEC; + if (flags & FAN_NONBLOCK) + f_flags |= O_NONBLOCK; + + /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ + group = fsnotify_alloc_group(&fanotify_fsnotify_ops); + if (IS_ERR(group)) + return PTR_ERR(group); + + fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); + if (fd < 0) + goto out_put_group; + + return fd; + +out_put_group: + fsnotify_put_group(group); + return fd; } diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index b560f86d1401..00bc6d4fbb58 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -18,6 +18,10 @@ /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK) /* * All of the events - we build the list by hand so that we can add flags in * the future and not break backward compatibility. Apps will get only the -- cgit v1.2.3 From 2a3edf86040a7e15684525a2aadc29f532c51325 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:26 -0500 Subject: fanotify: fanotify_mark syscall implementation NAME fanotify_mark - add, remove, or modify an fanotify mark on a filesystem object SYNOPSIS int fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int dfd, const char *pathname) DESCRIPTION fanotify_mark() is used to add remove or modify a mark on a filesystem object. Marks are used to indicate that the fanotify group is interested in events which occur on that object. At this point in time marks may only be added to files and directories. fanotify_fd must be a file descriptor returned by fanotify_init() The flags field must contain exactly one of the following: FAN_MARK_ADD - or the bits in mask and ignored mask into the mark FAN_MARK_REMOVE - bitwise remove the bits in mask and ignored mark from the mark The following values can be OR'd into the flags field: FAN_MARK_DONT_FOLLOW - same meaning as O_NOFOLLOW as described in open(2) FAN_MARK_ONLYDIR - same meaning as O_DIRECTORY as described in open(2) dfd may be any of the following: AT_FDCWD: the object will be lookup up based on pathname similar to open(2) file descriptor of a directory: if pathname is not NULL the object to modify will be lookup up similar to openat(2) file descriptor of the final object: if pathname is NULL the object to modify will be the object referenced by dfd The mask is the bitwise OR of the set of events of interest such as: FAN_ACCESS - object was accessed (read) FAN_MODIFY - object was modified (write) FAN_CLOSE_WRITE - object was writable and was closed FAN_CLOSE_NOWRITE - object was read only and was closed FAN_OPEN - object was opened FAN_EVENT_ON_CHILD - interested in objected that happen to children. Only relavent when the object is a directory FAN_Q_OVERFLOW - event queue overflowed (not implemented) RETURN VALUE On success, this system call returns 0. On error, -1 is returned, and errno is set to indicate the error. ERRORS EINVAL An invalid value was specified in flags. EINVAL An invalid value was specified in mask. EINVAL An invalid value was specified in ignored_mask. EINVAL fanotify_fd is not a file descriptor as returned by fanotify_init() EBADF fanotify_fd is not a valid file descriptor EBADF dfd is not a valid file descriptor and path is NULL. ENOTDIR dfd is not a directory and path is not NULL EACCESS no search permissions on some part of the path ENENT file not found ENOMEM Insufficient kernel memory is available. CONFORMING TO These system calls are Linux-specific. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.h | 18 +++ fs/notify/fanotify/fanotify_user.c | 239 ++++++++++++++++++++++++++++++++++++- include/linux/fanotify.h | 13 ++ 3 files changed, 269 insertions(+), 1 deletion(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index dd656cfab1ba..59c3331a0e81 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -6,6 +6,24 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; +static inline bool fanotify_mark_flags_valid(unsigned int flags) +{ + /* must be either and add or a remove */ + if (!(flags & (FAN_MARK_ADD | FAN_MARK_REMOVE))) + return false; + + /* cannot be both add and remove */ + if ((flags & FAN_MARK_ADD) && + (flags & FAN_MARK_REMOVE)) + return false; + + /* cannot have more flags than we know about */ + if (flags & ~FAN_ALL_MARK_FLAGS) + return false; + + return true; +} + static inline bool fanotify_mask_valid(__u32 mask) { if (mask & ~((__u32)FAN_ALL_INCOMING_EVENTS)) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 55d6e379f2b6..bc4fa48157f1 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1,12 +1,18 @@ #include +#include #include #include #include +#include +#include #include #include +#include #include "fanotify.h" +static struct kmem_cache *fanotify_mark_cache __read_mostly; + static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; @@ -28,6 +34,185 @@ static const struct file_operations fanotify_fops = { .compat_ioctl = NULL, }; +static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) +{ + kmem_cache_free(fanotify_mark_cache, fsn_mark); +} + +static int fanotify_find_path(int dfd, const char __user *filename, + struct path *path, unsigned int flags) +{ + int ret; + + pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__, + dfd, filename, flags); + + if (filename == NULL) { + struct file *file; + int fput_needed; + + ret = -EBADF; + file = fget_light(dfd, &fput_needed); + if (!file) + goto out; + + ret = -ENOTDIR; + if ((flags & FAN_MARK_ONLYDIR) && + !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { + fput_light(file, fput_needed); + goto out; + } + + *path = file->f_path; + path_get(path); + fput_light(file, fput_needed); + } else { + unsigned int lookup_flags = 0; + + if (!(flags & FAN_MARK_DONT_FOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + if (flags & FAN_MARK_ONLYDIR) + lookup_flags |= LOOKUP_DIRECTORY; + + ret = user_path_at(dfd, filename, lookup_flags, path); + if (ret) + goto out; + } + + /* you can only watch an inode if you have read permissions on it */ + ret = inode_permission(path->dentry->d_inode, MAY_READ); + if (ret) + path_put(path); +out: + return ret; +} + +static int fanotify_remove_mark(struct fsnotify_group *group, + struct inode *inode, + __u32 mask) +{ + struct fsnotify_mark *fsn_mark; + __u32 new_mask; + + pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, + group, inode, mask); + + fsn_mark = fsnotify_find_mark(group, inode); + if (!fsn_mark) + return -ENOENT; + + spin_lock(&fsn_mark->lock); + fsn_mark->mask &= ~mask; + new_mask = fsn_mark->mask; + spin_unlock(&fsn_mark->lock); + + if (!new_mask) + fsnotify_destroy_mark(fsn_mark); + else + fsnotify_recalc_inode_mask(inode); + + fsnotify_recalc_group_mask(group); + + /* matches the fsnotify_find_mark() */ + fsnotify_put_mark(fsn_mark); + + return 0; +} + +static int fanotify_add_mark(struct fsnotify_group *group, + struct inode *inode, + __u32 mask) +{ + struct fsnotify_mark *fsn_mark; + __u32 old_mask, new_mask; + int ret; + + pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, + group, inode, mask); + + fsn_mark = fsnotify_find_mark(group, inode); + if (!fsn_mark) { + struct fsnotify_mark *new_fsn_mark; + + ret = -ENOMEM; + new_fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); + if (!new_fsn_mark) + goto out; + + fsnotify_init_mark(new_fsn_mark, fanotify_free_mark); + ret = fsnotify_add_mark(new_fsn_mark, group, inode, 0); + if (ret) { + fanotify_free_mark(new_fsn_mark); + goto out; + } + + fsn_mark = new_fsn_mark; + } + + ret = 0; + + spin_lock(&fsn_mark->lock); + old_mask = fsn_mark->mask; + fsn_mark->mask |= mask; + new_mask = fsn_mark->mask; + spin_unlock(&fsn_mark->lock); + + /* we made changes to a mask, update the group mask and the inode mask + * so things happen quickly. */ + if (old_mask != new_mask) { + /* more bits in old than in new? */ + int dropped = (old_mask & ~new_mask); + /* more bits in this mark than the inode's mask? */ + int do_inode = (new_mask & ~inode->i_fsnotify_mask); + /* more bits in this mark than the group? */ + int do_group = (new_mask & ~group->mask); + + /* update the inode with this new mark */ + if (dropped || do_inode) + fsnotify_recalc_inode_mask(inode); + + /* update the group mask with the new mask */ + if (dropped || do_group) + fsnotify_recalc_group_mask(group); + } + + /* match the init or the find.... */ + fsnotify_put_mark(fsn_mark); +out: + return ret; +} + +static int fanotify_update_mark(struct fsnotify_group *group, + struct inode *inode, int flags, + __u32 mask) +{ + pr_debug("%s: group=%p inode=%p flags=%x mask=%x\n", __func__, + group, inode, flags, mask); + + if (flags & FAN_MARK_ADD) + fanotify_add_mark(group, inode, mask); + else if (flags & FAN_MARK_REMOVE) + fanotify_remove_mark(group, inode, mask); + else + BUG(); + + return 0; +} + +static bool fanotify_mark_validate_input(int flags, + __u32 mask) +{ + pr_debug("%s: flags=%x mask=%x\n", __func__, flags, mask); + + /* are flags valid of this operation? */ + if (!fanotify_mark_flags_valid(flags)) + return false; + /* is the mask valid? */ + if (!fanotify_mask_valid(mask)) + return false; + return true; +} + /* fanotify syscalls */ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags, unsigned int, priority) @@ -74,5 +259,57 @@ out_put_group: SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, __u64, mask, int, dfd, const char __user *, pathname) { - return -ENOSYS; + struct inode *inode; + struct fsnotify_group *group; + struct file *filp; + struct path path; + int ret, fput_needed; + + pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", + __func__, fanotify_fd, flags, dfd, pathname, mask); + + /* we only use the lower 32 bits as of right now. */ + if (mask & ((__u64)0xffffffff << 32)) + return -EINVAL; + + if (!fanotify_mark_validate_input(flags, mask)) + return -EINVAL; + + filp = fget_light(fanotify_fd, &fput_needed); + if (unlikely(!filp)) + return -EBADF; + + /* verify that this is indeed an fanotify instance */ + ret = -EINVAL; + if (unlikely(filp->f_op != &fanotify_fops)) + goto fput_and_out; + + ret = fanotify_find_path(dfd, pathname, &path, flags); + if (ret) + goto fput_and_out; + + /* inode held in place by reference to path; group by fget on fd */ + inode = path.dentry->d_inode; + group = filp->private_data; + + /* create/update an inode mark */ + ret = fanotify_update_mark(group, inode, flags, mask); + + path_put(&path); +fput_and_out: + fput_light(filp, fput_needed); + return ret; +} + +/* + * fanotify_user_setup - Our initialization function. Note that we cannnot return + * error because we have compiled-in VFS hooks. So an (unlikely) failure here + * must result in panic(). + */ +static int __init fanotify_user_setup(void) +{ + fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); + + return 0; } +device_initcall(fanotify_user_setup); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 00bc6d4fbb58..95aeea2a3ca6 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -18,10 +18,23 @@ /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +/* flags used for fanotify_init() */ #define FAN_CLOEXEC 0x00000001 #define FAN_NONBLOCK 0x00000002 #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 + +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR) + /* * All of the events - we build the list by hand so that we can add flags in * the future and not break backward compatibility. Apps will get only the -- cgit v1.2.3 From a1014f102322398e67524b68b3300acf384e6c1f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:26 -0500 Subject: fanotify: send events using read Send events to userspace by reading the file descriptor from fanotify_init(). One will get blocks of data which look like: struct fanotify_event_metadata { __u32 event_len; __u32 vers; __s32 fd; __u64 mask; __s64 pid; __u64 cookie; } __attribute__ ((packed)); Simple code to retrieve and deal with events is below while ((len = read(fan_fd, buf, sizeof(buf))) > 0) { struct fanotify_event_metadata *metadata; metadata = (void *)buf; while(FAN_EVENT_OK(metadata, len)) { [PROCESS HERE!!] if (metadata->fd >= 0 && close(metadata->fd) != 0) goto fail; metadata = FAN_EVENT_NEXT(metadata, len); } } Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.h | 5 + fs/notify/fanotify/fanotify_user.c | 220 ++++++++++++++++++++++++++++++++++++- include/linux/fanotify.h | 24 ++++ 3 files changed, 245 insertions(+), 4 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 59c3331a0e81..5608783c6bca 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -30,3 +30,8 @@ static inline bool fanotify_mask_valid(__u32 mask) return false; return true; } + +static inline __u32 fanotify_outgoing_mask(__u32 mask) +{ + return mask & FAN_ALL_OUTGOING_EVENTS; +} diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index bc4fa48157f1..a99550f83f8a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -4,15 +4,202 @@ #include #include #include +#include #include +#include #include #include #include +#include + +#include #include "fanotify.h" static struct kmem_cache *fanotify_mark_cache __read_mostly; +/* + * Get an fsnotify notification event if one exists and is small + * enough to fit in "count". Return an error pointer if the count + * is not large enough. + * + * Called with the group->notification_mutex held. + */ +static struct fsnotify_event *get_one_event(struct fsnotify_group *group, + size_t count) +{ + BUG_ON(!mutex_is_locked(&group->notification_mutex)); + + pr_debug("%s: group=%p count=%zd\n", __func__, group, count); + + if (fsnotify_notify_queue_is_empty(group)) + return NULL; + + if (FAN_EVENT_METADATA_LEN > count) + return ERR_PTR(-EINVAL); + + /* held the notification_mutex the whole time, so this is the + * same event we peeked above */ + return fsnotify_remove_notify_event(group); +} + +static int create_and_fill_fd(struct fsnotify_group *group, + struct fanotify_event_metadata *metadata, + struct fsnotify_event *event) +{ + int client_fd; + struct dentry *dentry; + struct vfsmount *mnt; + struct file *new_file; + + pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group, + metadata, event); + + client_fd = get_unused_fd(); + if (client_fd < 0) + return client_fd; + + if (event->data_type != FSNOTIFY_EVENT_PATH) { + WARN_ON(1); + put_unused_fd(client_fd); + return -EINVAL; + } + + /* + * we need a new file handle for the userspace program so it can read even if it was + * originally opened O_WRONLY. + */ + dentry = dget(event->path.dentry); + mnt = mntget(event->path.mnt); + /* it's possible this event was an overflow event. in that case dentry and mnt + * are NULL; That's fine, just don't call dentry open */ + if (dentry && mnt) + new_file = dentry_open(dentry, mnt, + O_RDONLY | O_LARGEFILE | FMODE_NONOTIFY, + current_cred()); + else + new_file = ERR_PTR(-EOVERFLOW); + if (IS_ERR(new_file)) { + /* + * we still send an event even if we can't open the file. this + * can happen when say tasks are gone and we try to open their + * /proc files or we try to open a WRONLY file like in sysfs + * we just send the errno to userspace since there isn't much + * else we can do. + */ + put_unused_fd(client_fd); + client_fd = PTR_ERR(new_file); + } else { + fd_install(client_fd, new_file); + } + + metadata->fd = client_fd; + + return 0; +} + +static ssize_t fill_event_metadata(struct fsnotify_group *group, + struct fanotify_event_metadata *metadata, + struct fsnotify_event *event) +{ + pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, + group, metadata, event); + + metadata->event_len = FAN_EVENT_METADATA_LEN; + metadata->vers = FANOTIFY_METADATA_VERSION; + metadata->mask = fanotify_outgoing_mask(event->mask); + + return create_and_fill_fd(group, metadata, event); + +} + +static ssize_t copy_event_to_user(struct fsnotify_group *group, + struct fsnotify_event *event, + char __user *buf) +{ + struct fanotify_event_metadata fanotify_event_metadata; + int ret; + + pr_debug("%s: group=%p event=%p\n", __func__, group, event); + + ret = fill_event_metadata(group, &fanotify_event_metadata, event); + if (ret) + return ret; + + if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) + return -EFAULT; + + return FAN_EVENT_METADATA_LEN; +} + +/* intofiy userspace file descriptor functions */ +static unsigned int fanotify_poll(struct file *file, poll_table *wait) +{ + struct fsnotify_group *group = file->private_data; + int ret = 0; + + poll_wait(file, &group->notification_waitq, wait); + mutex_lock(&group->notification_mutex); + if (!fsnotify_notify_queue_is_empty(group)) + ret = POLLIN | POLLRDNORM; + mutex_unlock(&group->notification_mutex); + + return ret; +} + +static ssize_t fanotify_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct fsnotify_group *group; + struct fsnotify_event *kevent; + char __user *start; + int ret; + DEFINE_WAIT(wait); + + start = buf; + group = file->private_data; + + pr_debug("%s: group=%p\n", __func__, group); + + while (1) { + prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); + + mutex_lock(&group->notification_mutex); + kevent = get_one_event(group, count); + mutex_unlock(&group->notification_mutex); + + if (kevent) { + ret = PTR_ERR(kevent); + if (IS_ERR(kevent)) + break; + ret = copy_event_to_user(group, kevent, buf); + fsnotify_put_event(kevent); + if (ret < 0) + break; + buf += ret; + count -= ret; + continue; + } + + ret = -EAGAIN; + if (file->f_flags & O_NONBLOCK) + break; + ret = -EINTR; + if (signal_pending(current)) + break; + + if (start != buf) + break; + + schedule(); + } + + finish_wait(&group->notification_waitq, &wait); + if (start != buf && ret != -EFAULT) + ret = buf - start; + return ret; +} + static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; @@ -25,13 +212,38 @@ static int fanotify_release(struct inode *ignored, struct file *file) return 0; } +static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct fsnotify_group *group; + struct fsnotify_event_holder *holder; + void __user *p; + int ret = -ENOTTY; + size_t send_len = 0; + + group = file->private_data; + + p = (void __user *) arg; + + switch (cmd) { + case FIONREAD: + mutex_lock(&group->notification_mutex); + list_for_each_entry(holder, &group->notification_list, event_list) + send_len += FAN_EVENT_METADATA_LEN; + mutex_unlock(&group->notification_mutex); + ret = put_user(send_len, (int __user *) p); + break; + } + + return ret; +} + static const struct file_operations fanotify_fops = { - .poll = NULL, - .read = NULL, + .poll = fanotify_poll, + .read = fanotify_read, .fasync = NULL, .release = fanotify_release, - .unlocked_ioctl = NULL, - .compat_ioctl = NULL, + .unlocked_ioctl = fanotify_ioctl, + .compat_ioctl = fanotify_ioctl, }; static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 95aeea2a3ca6..c1c66162a46c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -51,6 +51,30 @@ */ #define FAN_ALL_INCOMING_EVENTS (FAN_ALL_EVENTS |\ FAN_EVENT_ON_CHILD) + +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 1 + +struct fanotify_event_metadata { + __u32 event_len; + __u32 vers; + __s32 fd; + __u64 mask; +} __attribute__ ((packed)); + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + #ifdef __KERNEL__ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 32c3263221bd63316815286dccacdc7abfd7f3c4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Dec 2009 21:24:27 -0500 Subject: fanotify: Add pids to events Pass the process identifiers of the triggering processes to fanotify listeners: this information is useful for event filtering and logging. Signed-off-by: Andreas Gruenbacher Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 5 +++-- fs/notify/fanotify/fanotify_user.c | 1 + fs/notify/notification.c | 3 +++ include/linux/fanotify.h | 1 + include/linux/fsnotify_backend.h | 1 + 5 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 5b0b6b485a9c..881067dc7923 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -10,8 +10,9 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) { pr_debug("%s: old=%p new=%p\n", __func__, old, new); - if ((old->to_tell == new->to_tell) && - (old->data_type == new->data_type)) { + if (old->to_tell == new->to_tell && + old->data_type == new->data_type && + old->tgid == new->tgid) { switch (old->data_type) { case (FSNOTIFY_EVENT_PATH): if ((old->path.mnt == new->path.mnt) && diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index cf9c30009825..66e38fc052b2 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -103,6 +103,7 @@ static ssize_t fill_event_metadata(struct fsnotify_group *group, metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; metadata->mask = fanotify_outgoing_mask(event->mask); + metadata->pid = pid_vnr(event->tgid); metadata->fd = create_fd(group, event); return metadata->fd; diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 066f1f988bac..7fc8d004084c 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -93,6 +93,7 @@ void fsnotify_put_event(struct fsnotify_event *event) BUG_ON(!list_empty(&event->private_data_list)); kfree(event->file_name); + put_pid(event->tgid); kmem_cache_free(fsnotify_event_cachep, event); } } @@ -346,6 +347,7 @@ struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event) return NULL; } } + event->tgid = get_pid(old_event->tgid); if (event->data_type == FSNOTIFY_EVENT_PATH) path_get(&event->path); @@ -385,6 +387,7 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, event->name_len = strlen(event->file_name); } + event->tgid = get_pid(task_tgid(current)); event->sync_cookie = cookie; event->to_tell = to_tell; event->data_type = data_type; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index c1c66162a46c..5f633af4d1b0 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -62,6 +62,7 @@ struct fanotify_event_metadata { __u32 vers; __s32 fd; __u64 mask; + __s64 pid; } __attribute__ ((packed)); /* Helper functions to deal with fanotify_event_metadata buffers */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ff654c1932f2..7d93572ec568 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -221,6 +221,7 @@ struct fsnotify_event { u32 sync_cookie; /* used to corrolate events, namely inotify mv events */ char *file_name; size_t name_len; + struct pid *tgid; struct list_head private_data_list; /* groups can store private data here */ }; -- cgit v1.2.3 From 0ff21db9fcc39042b814dad8a4b7508710a75235 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:29 -0500 Subject: fanotify: hooks the fanotify_mark syscall to the vfsmount code Create a new fanotify_mark flag which indicates we should attach the mark to the vfsmount holding the object referenced by dfd and pathname rather than the inode itself. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 15 +++++++++++---- include/linux/fanotify.h | 4 +++- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index db80a0d89d24..81267260d1b9 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -485,7 +485,8 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user * pathname) { - struct inode *inode; + struct inode *inode = NULL; + struct vfsmount *mnt = NULL; struct fsnotify_group *group; struct file *filp; struct path path; @@ -515,16 +516,22 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, goto fput_and_out; /* inode held in place by reference to path; group by fget on fd */ - inode = path.dentry->d_inode; + if (!(flags & FAN_MARK_ON_VFSMOUNT)) + inode = path.dentry->d_inode; + else + mnt = path.mnt; group = filp->private_data; /* create/update an inode mark */ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: - ret = fanotify_add_inode_mark(group, inode, mask); + if (flags & FAN_MARK_ON_VFSMOUNT) + ret = fanotify_add_vfsmount_mark(group, mnt, mask); + else + ret = fanotify_add_inode_mark(group, inode, mask); break; case FAN_MARK_REMOVE: - ret = fanotify_remove_mark(group, inode, NULL, mask); + ret = fanotify_remove_mark(group, inode, mnt, mask); break; default: ret = -EINVAL; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 5f633af4d1b0..e25d348188ca 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -29,11 +29,13 @@ #define FAN_MARK_REMOVE 0x00000002 #define FAN_MARK_DONT_FOLLOW 0x00000004 #define FAN_MARK_ONLYDIR 0x00000008 +#define FAN_MARK_ON_VFSMOUNT 0x00000010 #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ FAN_MARK_DONT_FOLLOW |\ - FAN_MARK_ONLYDIR) + FAN_MARK_ONLYDIR |\ + FAN_MARK_ON_VFSMOUNT) /* * All of the events - we build the list by hand so that we can add flags in -- cgit v1.2.3 From eac8e9e80ccbd30801b7b76a2ee4c6c5a681e53c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Dec 2009 21:24:29 -0500 Subject: fanotify: rename FAN_MARK_ON_VFSMOUNT to FAN_MARK_MOUNT the term 'vfsmount' isn't sensicle to userspace. instead call is 'mount. Signed-off-by: Andreas Gruenbacher Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 4 ++-- include/linux/fanotify.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 81267260d1b9..091371e1bde3 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -516,7 +516,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, goto fput_and_out; /* inode held in place by reference to path; group by fget on fd */ - if (!(flags & FAN_MARK_ON_VFSMOUNT)) + if (!(flags & FAN_MARK_MOUNT)) inode = path.dentry->d_inode; else mnt = path.mnt; @@ -525,7 +525,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, /* create/update an inode mark */ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: - if (flags & FAN_MARK_ON_VFSMOUNT) + if (flags & FAN_MARK_MOUNT) ret = fanotify_add_vfsmount_mark(group, mnt, mask); else ret = fanotify_add_inode_mark(group, inode, mask); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index e25d348188ca..5ee22fb274e5 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -29,13 +29,13 @@ #define FAN_MARK_REMOVE 0x00000002 #define FAN_MARK_DONT_FOLLOW 0x00000004 #define FAN_MARK_ONLYDIR 0x00000008 -#define FAN_MARK_ON_VFSMOUNT 0x00000010 +#define FAN_MARK_MOUNT 0x00000010 #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ FAN_MARK_DONT_FOLLOW |\ FAN_MARK_ONLYDIR |\ - FAN_MARK_ON_VFSMOUNT) + FAN_MARK_MOUNT) /* * All of the events - we build the list by hand so that we can add flags in -- cgit v1.2.3 From 88380fe66e0ac22529f5426ab27d67da00ed2628 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Dec 2009 21:24:29 -0500 Subject: fanotify: remove fanotify.h declarations fanotify_mark_validate functions are all needlessly declared in headers as static inlines. Instead just do the checks where they are needed for code readability. Signed-off-by: Andreas Gruenbacher Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.h | 25 ------------------------- fs/notify/fanotify/fanotify_user.c | 25 ++++++++++--------------- include/linux/fanotify.h | 7 ------- 3 files changed, 10 insertions(+), 47 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 5608783c6bca..4d5723a74a8e 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -6,31 +6,6 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; -static inline bool fanotify_mark_flags_valid(unsigned int flags) -{ - /* must be either and add or a remove */ - if (!(flags & (FAN_MARK_ADD | FAN_MARK_REMOVE))) - return false; - - /* cannot be both add and remove */ - if ((flags & FAN_MARK_ADD) && - (flags & FAN_MARK_REMOVE)) - return false; - - /* cannot have more flags than we know about */ - if (flags & ~FAN_ALL_MARK_FLAGS) - return false; - - return true; -} - -static inline bool fanotify_mask_valid(__u32 mask) -{ - if (mask & ~((__u32)FAN_ALL_INCOMING_EVENTS)) - return false; - return true; -} - static inline __u32 fanotify_outgoing_mask(__u32 mask) { return mask & FAN_ALL_OUTGOING_EVENTS; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 00628d3ce5a2..618867e4d30f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -430,20 +430,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return 0; } -static bool fanotify_mark_validate_input(int flags, - __u32 mask) -{ - pr_debug("%s: flags=%x mask=%x\n", __func__, flags, mask); - - /* are flags valid of this operation? */ - if (!fanotify_mark_flags_valid(flags)) - return false; - /* is the mask valid? */ - if (!fanotify_mask_valid(mask)) - return false; - return true; -} - /* fanotify syscalls */ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags, unsigned int, priority) @@ -505,7 +491,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, if (mask & ((__u64)0xffffffff << 32)) return -EINVAL; - if (!fanotify_mark_validate_input(flags, mask)) + if (flags & ~FAN_ALL_MARK_FLAGS) + return -EINVAL; + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { + case FAN_MARK_ADD: + case FAN_MARK_REMOVE: + break; + default: + return -EINVAL; + } + if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD)) return -EINVAL; filp = fget_light(fanotify_fd, &fput_needed); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 5ee22fb274e5..90e59b24fd04 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -47,13 +47,6 @@ FAN_CLOSE |\ FAN_OPEN) -/* - * All legal FAN bits userspace can request (although possibly not all - * at the same time. - */ -#define FAN_ALL_INCOMING_EVENTS (FAN_ALL_EVENTS |\ - FAN_EVENT_ON_CHILD) - #define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ FAN_Q_OVERFLOW) -- cgit v1.2.3 From b9e4e3bd0495fea9e8f8e712889c9cd8ffa43c94 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:33 -0500 Subject: fanotify: allow users to set an ignored_mask Change the sys_fanotify_mark() system call so users can set ignored_masks on inodes. Remember, if a user new sets a real mask, and only sets ignored masks, the ignore will never be pinned in memory. Thus ignored_masks can be lost under memory pressure and the user may again get events they previously thought were ignored. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 54 +++++++++++++++++++++++++------------- include/linux/fanotify.h | 4 ++- 2 files changed, 39 insertions(+), 19 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3320f0c57e31..ad02d475770f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -296,13 +296,20 @@ out: return ret; } -static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u32 mask) +static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, + __u32 mask, + unsigned int flags) { __u32 oldmask; spin_lock(&fsn_mark->lock); - oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask)); + if (!(flags & FAN_MARK_IGNORED_MASK)) { + oldmask = fsn_mark->mask; + fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask)); + } else { + oldmask = fsn_mark->ignored_mask; + fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask)); + } spin_unlock(&fsn_mark->lock); if (!(oldmask & ~mask)) @@ -312,7 +319,8 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u3 } static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask) + struct vfsmount *mnt, __u32 mask, + unsigned int flags) { struct fsnotify_mark *fsn_mark = NULL; __u32 removed; @@ -321,7 +329,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, if (!fsn_mark) return -ENOENT; - removed = fanotify_mark_remove_from_mask(fsn_mark, mask); + removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags); fsnotify_put_mark(fsn_mark); if (removed & group->mask) fsnotify_recalc_group_mask(group); @@ -332,7 +340,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, } static int fanotify_remove_inode_mark(struct fsnotify_group *group, - struct inode *inode, __u32 mask) + struct inode *inode, __u32 mask, + unsigned int flags) { struct fsnotify_mark *fsn_mark = NULL; __u32 removed; @@ -341,7 +350,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, if (!fsn_mark) return -ENOENT; - removed = fanotify_mark_remove_from_mask(fsn_mark, mask); + removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags); /* matches the fsnotify_find_inode_mark() */ fsnotify_put_mark(fsn_mark); @@ -353,20 +362,28 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, return 0; } -static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, __u32 mask) +static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, + __u32 mask, + unsigned int flags) { __u32 oldmask; spin_lock(&fsn_mark->lock); - oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); + if (!(flags & FAN_MARK_IGNORED_MASK)) { + oldmask = fsn_mark->mask; + fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); + } else { + oldmask = fsn_mark->ignored_mask; + fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask)); + } spin_unlock(&fsn_mark->lock); return mask & ~oldmask; } static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask) + struct vfsmount *mnt, __u32 mask, + unsigned int flags) { struct fsnotify_mark *fsn_mark; __u32 added; @@ -386,7 +403,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, return ret; } } - added = fanotify_mark_add_to_mask(fsn_mark, mask); + added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); fsnotify_put_mark(fsn_mark); if (added) { if (added & ~group->mask) @@ -398,7 +415,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, } static int fanotify_add_inode_mark(struct fsnotify_group *group, - struct inode *inode, __u32 mask) + struct inode *inode, __u32 mask, + unsigned int flags) { struct fsnotify_mark *fsn_mark; __u32 added; @@ -420,7 +438,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return ret; } } - added = fanotify_mark_add_to_mask(fsn_mark, mask); + added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); fsnotify_put_mark(fsn_mark); if (added) { if (added & ~group->mask) @@ -528,15 +546,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: if (flags & FAN_MARK_MOUNT) - ret = fanotify_add_vfsmount_mark(group, mnt, mask); + ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); else - ret = fanotify_add_inode_mark(group, inode, mask); + ret = fanotify_add_inode_mark(group, inode, mask, flags); break; case FAN_MARK_REMOVE: if (flags & FAN_MARK_MOUNT) - ret = fanotify_remove_vfsmount_mark(group, mnt, mask); + ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); else - ret = fanotify_remove_inode_mark(group, inode, mask); + ret = fanotify_remove_inode_mark(group, inode, mask, flags); break; default: ret = -EINVAL; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 90e59b24fd04..b8daa9f9b560 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -30,12 +30,14 @@ #define FAN_MARK_DONT_FOLLOW 0x00000004 #define FAN_MARK_ONLYDIR 0x00000008 #define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_IGNORED_MASK 0x00000020 #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ FAN_MARK_DONT_FOLLOW |\ FAN_MARK_ONLYDIR |\ - FAN_MARK_MOUNT) + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK) /* * All of the events - we build the list by hand so that we can add flags in -- cgit v1.2.3 From c9778a98e7440fb73e0d27b8155a688663a0d493 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:33 -0500 Subject: fanotify: allow ignored_masks to survive modify Some users may want to truely ignore an inode even if it has been modified. Say you are wanting a mount which contains a log file and you really don't want any notification about that file. This patch allows the listener to do that. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 2 ++ include/linux/fanotify.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ad02d475770f..3e275f17e7b7 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -375,6 +375,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, } else { oldmask = fsn_mark->ignored_mask; fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask)); + if (flags & FAN_MARK_IGNORED_SURV_MODIFY) + fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } spin_unlock(&fsn_mark->lock); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index b8daa9f9b560..e43934d0b74c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -31,13 +31,15 @@ #define FAN_MARK_ONLYDIR 0x00000008 #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ FAN_MARK_DONT_FOLLOW |\ FAN_MARK_ONLYDIR |\ FAN_MARK_MOUNT |\ - FAN_MARK_IGNORED_MASK) + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY) /* * All of the events - we build the list by hand so that we can add flags in -- cgit v1.2.3 From 4d92604cc90aa18bbbe0f6e23b7a9fdb612836d3 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:34 -0500 Subject: fanotify: clear all fanotify marks fanotify listeners may want to clear all marks. They may want to do this to destroy all of their inode marks which have nothing but ignores. Realistically this is useful for av vendors who update policy and want to clear all of their cached allows. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 12 ++++++++++-- fs/notify/inode_mark.c | 8 ++++++++ fs/notify/mark.c | 21 ++++++++++++++++----- fs/notify/vfsmount_mark.c | 5 +++++ include/linux/fanotify.h | 1 + include/linux/fsnotify_backend.h | 6 ++++++ 6 files changed, 46 insertions(+), 7 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3e275f17e7b7..9fe760baf69f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -514,9 +514,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, if (flags & ~FAN_ALL_MARK_FLAGS) return -EINVAL; - switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { case FAN_MARK_ADD: case FAN_MARK_REMOVE: + case FAN_MARK_FLUSH: break; default: return -EINVAL; @@ -545,7 +546,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, group = filp->private_data; /* create/update an inode mark */ - switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { case FAN_MARK_ADD: if (flags & FAN_MARK_MOUNT) ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); @@ -558,6 +559,13 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, else ret = fanotify_remove_inode_mark(group, inode, mask, flags); break; + case FAN_MARK_FLUSH: + if (flags & FAN_MARK_MOUNT) + fsnotify_clear_vfsmount_marks_by_group(group); + else + fsnotify_clear_inode_marks_by_group(group); + fsnotify_recalc_group_mask(group); + break; default: ret = -EINVAL; } diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 4292f9e23ae8..0c0a48b1659f 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -103,6 +103,14 @@ void fsnotify_clear_marks_by_inode(struct inode *inode) } } +/* + * Given a group clear all of the inode marks associated with that group. + */ +void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE); +} + /* * given a group and inode, find the mark associated with that combination. * if found take a reference to that mark and return it, else return NULL diff --git a/fs/notify/mark.c b/fs/notify/mark.c index cb1d822f227f..1e824e64441d 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -270,18 +270,21 @@ err: } /* - * Given a group, destroy all of the marks associated with that group. + * clear any marks in a group in which mark->flags & flags is true */ -void fsnotify_clear_marks_by_group(struct fsnotify_group *group) +void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, + unsigned int flags) { struct fsnotify_mark *lmark, *mark; LIST_HEAD(free_list); spin_lock(&group->mark_lock); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - list_add(&mark->free_g_list, &free_list); - list_del_init(&mark->g_list); - fsnotify_get_mark(mark); + if (mark->flags & flags) { + list_add(&mark->free_g_list, &free_list); + list_del_init(&mark->g_list); + fsnotify_get_mark(mark); + } } spin_unlock(&group->mark_lock); @@ -291,6 +294,14 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group) } } +/* + * Given a group, destroy all of the marks associated with that group. + */ +void fsnotify_clear_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1); +} + void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) { assert_spin_locked(&old->lock); diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 1b61d0a942de..8f1aa02f4f02 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -51,6 +51,11 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) } } +void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT); +} + /* * Recalculate the mask of events relevant to a given vfsmount locked. */ diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index e43934d0b74c..385896c9f828 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -32,6 +32,7 @@ #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_IGNORED_MASK 0x00000020 #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 8ca19df8a171..be4a36ed2008 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -384,6 +384,12 @@ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group * struct inode *inode, struct vfsmount *mnt, int allow_dups); /* given a mark, flag it to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark); +/* run all the marks in a group, and clear all of the vfsmount marks */ +extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); +/* run all the marks in a group, and clear all of the inode marks */ +extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); +/* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/ +extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); /* run all the marks in a group, and flag them to be freed */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_get_mark(struct fsnotify_mark *mark); -- cgit v1.2.3 From 9e66e4233db9c7e31e9ee706be2c9ddd54cf99b3 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:34 -0500 Subject: fanotify: permissions and blocking This is the backend work needed for fanotify to support the new FS_OPEN_PERM and FS_ACCESS_PERM fsnotify events. This is done using the new fsnotify secondary queue. No userspace interface is provided actually respond to or request these events. Signed-off-by: Eric Paris --- fs/notify/fanotify/Kconfig | 14 ++++++++++ fs/notify/fanotify/fanotify.c | 54 +++++++++++++++++++++++++++++++++++--- fs/notify/fanotify/fanotify_user.c | 5 ++++ include/linux/fanotify.h | 18 +++++++++++++ include/linux/fsnotify_backend.h | 12 +++++++++ 5 files changed, 99 insertions(+), 4 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig index 668e5df28e28..566de30395c2 100644 --- a/fs/notify/fanotify/Kconfig +++ b/fs/notify/fanotify/Kconfig @@ -10,3 +10,17 @@ config FANOTIFY the event. If unsure, say Y. + +config FANOTIFY_ACCESS_PERMISSIONS + bool "fanotify permissions checking" + depends on FANOTIFY + depends on SECURITY + default n + ---help--- + Say Y here is you want fanotify listeners to be able to make permissions + decisions concerning filesystem events. This is used by some fanotify + listeners which need to scan files before allowing the system access to + use those files. This is used by some anti-malware vendors and by some + hierarchical storage managent systems. + + If unsure, say N. diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 4feed8601e29..52d0a55a249e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -2,9 +2,12 @@ #include #include #include +#include #include /* UINT_MAX */ #include +#include #include +#include static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) { @@ -88,10 +91,37 @@ out: return ret; } +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +static int fanotify_get_response_from_access(struct fsnotify_group *group, + struct fsnotify_event *event) +{ + int ret; + + pr_debug("%s: group=%p event=%p\n", __func__, group, event); + + wait_event(group->fanotify_data.access_waitq, event->response); + + /* userspace responded, convert to something usable */ + spin_lock(&event->lock); + switch (event->response) { + case FAN_ALLOW: + ret = 0; + break; + case FAN_DENY: + default: + ret = -EPERM; + } + event->response = 0; + spin_unlock(&event->lock); + + return ret; +} +#endif + static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event) { int ret; - struct fsnotify_event *used_event; + struct fsnotify_event *notify_event = NULL; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); @@ -100,15 +130,31 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e BUILD_BUG_ON(FAN_OPEN != FS_OPEN); BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); + BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); + BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); pr_debug("%s: group=%p event=%p\n", __func__, group, event); - ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge, (void **)&used_event); + ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge, + (void **)¬ify_event); /* -EEXIST means this event was merged with another, not that it was an error */ if (ret == -EEXIST) ret = 0; - if (used_event) - fsnotify_put_event(used_event); + if (ret) + goto out; + +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + if (event->mask & FAN_ALL_PERM_EVENTS) { + /* if we merged we need to wait on the new event */ + if (notify_event) + event = notify_event; + ret = fanotify_get_response_from_access(group, event); + } +#endif + +out: + if (notify_event) + fsnotify_put_event(notify_event); return ret; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 84d3e2047de3..09d9bdb62af3 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -482,6 +482,11 @@ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags, return PTR_ERR(group); group->priority = priority; +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + mutex_init(&group->fanotify_data.access_mutex); + init_waitqueue_head(&group->fanotify_data.access_waitq); + INIT_LIST_HEAD(&group->fanotify_data.access_list); +#endif fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 385896c9f828..02f80676c238 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -15,6 +15,9 @@ /* FIXME currently Q's have no limit.... */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ + /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ @@ -52,7 +55,14 @@ FAN_CLOSE |\ FAN_OPEN) +/* + * All events which require a permission response from userspace + */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + #define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW) #define FANOTIFY_METADATA_VERSION 1 @@ -65,6 +75,10 @@ struct fanotify_event_metadata { __s64 pid; } __attribute__ ((packed)); +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 + /* Helper functions to deal with fanotify_event_metadata buffers */ #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) @@ -78,5 +92,9 @@ struct fanotify_event_metadata { #ifdef __KERNEL__ +struct fanotify_wait { + struct fsnotify_event *event; + __s32 fd; +}; #endif /* __KERNEL__ */ #endif /* _LINUX_FANOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c34728e7d8cb..b0d00fd6bfad 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -159,6 +159,14 @@ struct fsnotify_group { struct fasync_struct *fa; /* async notification */ struct user_struct *user; } inotify_data; +#endif +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + struct fanotify_group_private_data { + /* allows a group to block waiting for a userspace response */ + struct mutex access_mutex; + struct list_head access_list; + wait_queue_head_t access_waitq; + } fanotify_data; #endif }; }; @@ -227,6 +235,10 @@ struct fsnotify_event { size_t name_len; struct pid *tgid; +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + __u32 response; /* userspace answer to question */ +#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ + struct list_head private_data_list; /* groups can store private data here */ }; -- cgit v1.2.3 From b2d879096ac799722e6017ee82c0586f0d101c9c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:34 -0500 Subject: fanotify: userspace interface for permission responses fanotify groups need to respond to events which include permissions types. To do so groups will send a response using write() on the fanotify_fd they have open. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 3 + fs/notify/fanotify/fanotify_user.c | 182 +++++++++++++++++++++++++++++++++++-- include/linux/fanotify.h | 5 + 3 files changed, 184 insertions(+), 6 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 52d0a55a249e..bbcfccd4a8ea 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -114,6 +114,9 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, event->response = 0; spin_unlock(&event->lock); + pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, + group, event, ret); + return ret; } #endif diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 09d9bdb62af3..87f0be852f71 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -18,6 +18,13 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; static struct kmem_cache *fanotify_mark_cache __read_mostly; +static struct kmem_cache *fanotify_response_event_cache __read_mostly; + +struct fanotify_response_event { + struct list_head list; + __s32 fd; + struct fsnotify_event *event; +}; /* * Get an fsnotify notification event if one exists and is small @@ -110,23 +117,152 @@ static ssize_t fill_event_metadata(struct fsnotify_group *group, return metadata->fd; } +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group, + __s32 fd) +{ + struct fanotify_response_event *re, *return_re = NULL; + + mutex_lock(&group->fanotify_data.access_mutex); + list_for_each_entry(re, &group->fanotify_data.access_list, list) { + if (re->fd != fd) + continue; + + list_del_init(&re->list); + return_re = re; + break; + } + mutex_unlock(&group->fanotify_data.access_mutex); + + pr_debug("%s: found return_re=%p\n", __func__, return_re); + + return return_re; +} + +static int process_access_response(struct fsnotify_group *group, + struct fanotify_response *response_struct) +{ + struct fanotify_response_event *re; + __s32 fd = response_struct->fd; + __u32 response = response_struct->response; + + pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, + fd, response); + /* + * make sure the response is valid, if invalid we do nothing and either + * userspace can send a valid responce or we will clean it up after the + * timeout + */ + switch (response) { + case FAN_ALLOW: + case FAN_DENY: + break; + default: + return -EINVAL; + } + + if (fd < 0) + return -EINVAL; + + re = dequeue_re(group, fd); + if (!re) + return -ENOENT; + + re->event->response = response; + + wake_up(&group->fanotify_data.access_waitq); + + kmem_cache_free(fanotify_response_event_cache, re); + + return 0; +} + +static int prepare_for_access_response(struct fsnotify_group *group, + struct fsnotify_event *event, + __s32 fd) +{ + struct fanotify_response_event *re; + + if (!(event->mask & FAN_ALL_PERM_EVENTS)) + return 0; + + re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL); + if (!re) + return -ENOMEM; + + re->event = event; + re->fd = fd; + + mutex_lock(&group->fanotify_data.access_mutex); + list_add_tail(&re->list, &group->fanotify_data.access_list); + mutex_unlock(&group->fanotify_data.access_mutex); + + return 0; +} + +static void remove_access_response(struct fsnotify_group *group, + struct fsnotify_event *event, + __s32 fd) +{ + struct fanotify_response_event *re; + + if (!(event->mask & FAN_ALL_PERM_EVENTS)) + return; + + re = dequeue_re(group, fd); + if (!re) + return; + + BUG_ON(re->event != event); + + kmem_cache_free(fanotify_response_event_cache, re); + + return; +} +#else +static int prepare_for_access_response(struct fsnotify_group *group, + struct fsnotify_event *event, + __s32 fd) +{ + return 0; +} + +static void remove_access_response(struct fsnotify_group *group, + struct fsnotify_event *event, + __s32 fd) +{ + return 0; +} +#endif + static ssize_t copy_event_to_user(struct fsnotify_group *group, struct fsnotify_event *event, char __user *buf) { struct fanotify_event_metadata fanotify_event_metadata; - int ret; + int fd, ret; pr_debug("%s: group=%p event=%p\n", __func__, group, event); - ret = fill_event_metadata(group, &fanotify_event_metadata, event); - if (ret < 0) - return ret; + fd = fill_event_metadata(group, &fanotify_event_metadata, event); + if (fd < 0) + return fd; + + ret = prepare_for_access_response(group, event, fd); + if (ret) + goto out_close_fd; + ret = -EFAULT; if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) - return -EFAULT; + goto out_kill_access_response; return FAN_EVENT_METADATA_LEN; + +out_kill_access_response: + remove_access_response(group, event, fd); +out_close_fd: + sys_close(fd); + return ret; } /* intofiy userspace file descriptor functions */ @@ -197,6 +333,33 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, return ret; } +static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) +{ +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + struct fanotify_response response = { .fd = -1, .response = -1 }; + struct fsnotify_group *group; + int ret; + + group = file->private_data; + + if (count > sizeof(response)) + count = sizeof(response); + + pr_debug("%s: group=%p count=%zu\n", __func__, group, count); + + if (copy_from_user(&response, buf, count)) + return -EFAULT; + + ret = process_access_response(group, &response); + if (ret < 0) + count = ret; + + return count; +#else + return -EINVAL; +#endif +} + static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; @@ -237,6 +400,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar static const struct file_operations fanotify_fops = { .poll = fanotify_poll, .read = fanotify_read, + .write = fanotify_write, .fasync = NULL, .release = fanotify_release, .unlocked_ioctl = fanotify_ioctl, @@ -470,7 +634,7 @@ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags, if (flags & ~FAN_ALL_INIT_FLAGS) return -EINVAL; - f_flags = (O_RDONLY | FMODE_NONOTIFY); + f_flags = O_RDWR | FMODE_NONOTIFY; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) @@ -527,7 +691,11 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, default: return -EINVAL; } +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) +#else if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD)) +#endif return -EINVAL; filp = fget_light(fanotify_fd, &fput_needed); @@ -600,6 +768,8 @@ SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark); static int __init fanotify_user_setup(void) { fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); + fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event, + SLAB_PANIC); return 0; } diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 02f80676c238..f0949a57ca9d 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -75,6 +75,11 @@ struct fanotify_event_metadata { __s64 pid; } __attribute__ ((packed)); +struct fanotify_response { + __s32 fd; + __u32 response; +} __attribute__ ((packed)); + /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 #define FAN_DENY 0x02 -- cgit v1.2.3 From 2eebf582c9b3106abb9c33f4fc0a347fb9391037 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 18 Aug 2010 12:25:50 -0400 Subject: fanotify: flush outstanding perm requests on group destroy When an fanotify listener is closing it may cause a deadlock between the listener and the original task doing an fs operation. If the original task is waiting for a permissions response it will be holding the srcu lock. The listener cannot clean up and exit until after that srcu lock is syncronized. Thus deadlock. The fix introduced here is to stop accepting new permissions events when a listener is shutting down and to grant permission for all outstanding events. Thus the original task will eventually release the srcu lock and the listener can complete shutdown. Reported-by: Andreas Gruenbacher Cc: Andreas Gruenbacher Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 27 +++++++++++++++++++++++++++ include/linux/fanotify.h | 7 ------- include/linux/fsnotify_backend.h | 1 + 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 032b837fcd11..b966b7230f47 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group, re->fd = fd; mutex_lock(&group->fanotify_data.access_mutex); + + if (group->fanotify_data.bypass_perm) { + mutex_unlock(&group->fanotify_data.access_mutex); + kmem_cache_free(fanotify_response_event_cache, re); + event->response = FAN_ALLOW; + return 0; + } + list_add_tail(&re->list, &group->fanotify_data.access_list); mutex_unlock(&group->fanotify_data.access_mutex); @@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; + struct fanotify_response_event *re, *lre; pr_debug("%s: file=%p group=%p\n", __func__, file, group); +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + mutex_lock(&group->fanotify_data.access_mutex); + + group->fanotify_data.bypass_perm = true; + + list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { + pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, + re, re->event); + + list_del_init(&re->list); + re->event->response = FAN_ALLOW; + + kmem_cache_free(fanotify_response_event_cache, re); + } + mutex_unlock(&group->fanotify_data.access_mutex); + + wake_up(&group->fanotify_data.access_waitq); +#endif /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_put_group(group); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index f0949a57ca9d..985435622ecd 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -95,11 +95,4 @@ struct fanotify_response { (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ (long)(meta)->event_len <= (long)(len)) -#ifdef __KERNEL__ - -struct fanotify_wait { - struct fsnotify_event *event; - __s32 fd; -}; -#endif /* __KERNEL__ */ #endif /* _LINUX_FANOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ed36fb57c426..e40190d16878 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -156,6 +156,7 @@ struct fsnotify_group { struct mutex access_mutex; struct list_head access_list; wait_queue_head_t access_waitq; + bool bypass_perm; /* protected by access_mutex */ #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; } fanotify_data; -- cgit v1.2.3 From 0fb85621df4f9f7c663c6c77c302e821a832c95e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 20 Aug 2010 10:02:15 +0100 Subject: fanotify: resize pid and reorder structure resize pid and reorder the fanotify_event_metadata so it is naturally aligned and we can work towards dropping the packed attributed Signed-off-by: Tvrtko Ursulin Cc: Andreas Dilger Signed-off-by: Eric Paris --- include/linux/fanotify.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 985435622ecd..63531a6b4d2a 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -65,14 +65,14 @@ FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW) -#define FANOTIFY_METADATA_VERSION 1 +#define FANOTIFY_METADATA_VERSION 2 struct fanotify_event_metadata { __u32 event_len; __u32 vers; - __s32 fd; __u64 mask; - __s64 pid; + __s32 fd; + __s32 pid; } __attribute__ ((packed)); struct fanotify_response { -- cgit v1.2.3 From 4231a23530a30e86eb32fbe869bbef1b3e54d5aa Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fanotify: implement fanotify listener ordering The fanotify listeners needs to be able to specify what types of operations they are going to perform so they can be ordered appropriately between other listeners doing other types of operations. They need this to be able to make sure that things like hierarchichal storage managers will get access to inodes before processes which need the data. This patch defines 3 possible uses which groups must indicate in the fanotify_init() flags. FAN_CLASS_PRE_CONTENT FAN_CLASS_CONTENT FAN_CLASS_NOTIF Groups will receive notification in that order. The order between 2 groups in the same class is undeterministic. FAN_CLASS_PRE_CONTENT is intended to be used by listeners which need access to the inode before they are certain that the inode contains it's final data. A hierarchical storage manager should choose to use this class. FAN_CLASS_CONTENT is intended to be used by listeners which need access to the inode after it contains its intended contents. This would be the appropriate level for an AV solution or document control system. FAN_CLASS_NOTIF is intended for normal async notification about access, much the same as inotify and dnotify. Syncronous permissions events are not permitted at this class. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 25 ++++++++++++++++++++++++- include/linux/fanotify.h | 11 ++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index bbcb98e7fcc6..1c09e6321c5e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -664,6 +664,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); #endif + switch (flags & FAN_ALL_CLASS_BITS) { + case FAN_CLASS_NOTIF: + group->priority = FS_PRIO_0; + break; + case FAN_CLASS_CONTENT: + group->priority = FS_PRIO_1; + break; + case FAN_CLASS_PRE_CONTENT: + group->priority = FS_PRIO_2; + break; + default: + fd = -EINVAL; + goto out_put_group; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) @@ -719,6 +733,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, ret = -EINVAL; if (unlikely(filp->f_op != &fanotify_fops)) goto fput_and_out; + group = filp->private_data; + + /* + * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not + * allowed to set permissions events. + */ + ret = -EINVAL; + if (mask & FAN_ALL_PERM_EVENTS && + group->priority == FS_PRIO_0) + goto fput_and_out; ret = fanotify_find_path(dfd, pathname, &path, flags); if (ret) @@ -729,7 +753,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, inode = path.dentry->d_inode; else mnt = path.mnt; - group = filp->private_data; /* create/update an inode mark */ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 63531a6b4d2a..2c89ce7b644e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,16 @@ #define FAN_CLOEXEC 0x00000001 #define FAN_NONBLOCK 0x00000002 -#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK) +/* These are NOT bitwise flags. Both bits are used togther. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS) /* flags used for fanotify_modify_mark() */ #define FAN_MARK_ADD 0x00000001 -- cgit v1.2.3 From 2868201965419b9011f3f07fd80e765181343cb1 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fanotify: use __aligned_u64 in fanotify userspace metadata Currently the userspace struct exposed by fanotify uses __attribute__((packed)) to make sure that alignment works on multiarch platforms. Since this causes a severe performance penalty on some platforms we are going to switch to using explicit alignment notation on the 64bit values so we don't have to use 'packed' Signed-off-by: Eric Paris --- include/linux/fanotify.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 2c89ce7b644e..8a621c1a0991 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -79,10 +79,10 @@ struct fanotify_event_metadata { __u32 event_len; __u32 vers; - __u64 mask; + __aligned_u64 mask; __s32 fd; __s32 pid; -} __attribute__ ((packed)); +}; struct fanotify_response { __s32 fd; -- cgit v1.2.3 From bbf2aba50f6ed7c8dd53623fa1437b539928ac39 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fanotify: allow userspace to flush all marks fanotify is supposed to be able to flush all marks. This is mostly useful for the AV community to flush all cached decisions on a security policy change. This functionality has existed in the kernel but wasn't correctly exposed to userspace. Signed-off-by: Eric Paris --- include/linux/fanotify.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/fanotify.h') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 8a621c1a0991..a97c96d28c07 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -52,7 +52,8 @@ FAN_MARK_ONLYDIR |\ FAN_MARK_MOUNT |\ FAN_MARK_IGNORED_MASK |\ - FAN_MARK_IGNORED_SURV_MODIFY) + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) /* * All of the events - we build the list by hand so that we can add flags in -- cgit v1.2.3 From 2529a0df0f64dab1f60ae08e038b89c53a6b4c02 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fsnotify: implement a default maximum queue depth Currently fanotify has no maximum queue depth. Since fanotify is CAP_SYS_ADMIN only this does not pose a normal user DoS issue, but it certianly is possible that an fanotify listener which can't keep up could OOM the box. This patch implements a default 16k depth. This is the same default depth used by inotify, but given fanotify's better queue merging in many situations this queue will contain many additional useful events by comparison. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 4 ++++ include/linux/fanotify.h | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b265936e92d6..04f2fe47b66a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -16,6 +16,8 @@ #include +#define FANOTIFY_DEFAULT_MAX_EVENTS 16384 + extern const struct fsnotify_ops fanotify_fsnotify_ops; static struct kmem_cache *fanotify_mark_cache __read_mostly; @@ -689,6 +691,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_put_group; } + group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) goto out_put_group; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index a97c96d28c07..ed479b6fef7b 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -12,7 +12,6 @@ #define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ -/* FIXME currently Q's have no limit.... */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ -- cgit v1.2.3 From 5dd03f55fd2f21916ce248bb2e68bbfb39d94fe5 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fanotify: allow userspace to override max queue depth fanotify has a defualt max queue depth. This patch allows processes which explicitly request it to have an 'unlimited' queue depth. These processes need to be very careful to make sure they cannot fall far enough behind that they OOM the box. Thus this flag is gated on CAP_SYS_ADMIN. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 9 ++++++++- include/linux/fanotify.h | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 04f2fe47b66a..43d66d9b2eff 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -691,7 +691,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_put_group; } - group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + if (flags & FAN_UNLIMITED_QUEUE) { + fd = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out_put_group; + group->max_events = UINT_MAX; + } else { + group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index ed479b6fef7b..e37f559c95e1 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -28,12 +28,13 @@ #define FAN_CLASS_NOTIF 0x00000000 #define FAN_CLASS_CONTENT 0x00000004 #define FAN_CLASS_PRE_CONTENT 0x00000008 - #define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) +#define FAN_UNLIMITED_QUEUE 0x00000010 + #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ - FAN_ALL_CLASS_BITS) + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE) /* flags used for fanotify_modify_mark() */ #define FAN_MARK_ADD 0x00000001 -- cgit v1.2.3 From ac7e22dcfafd04c842a02057afd6541c1d613ef9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fanotify: allow userspace to override max marks Some fanotify groups, especially those like AV scanners, will need to place lots of marks, particularly ignore marks. Since ignore marks do not pin inodes in cache and are cleared if the inode is removed from core (usually under memory pressure) we expose an interface for listeners, with CAP_SYS_ADMIN, to override the maximum number of marks and be allowed to set and 'unlimited' number of marks. Programs which make use of this feature will be able to OOM a machine. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 9 ++++++++- include/linux/fanotify.h | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1d33d7db277a..f9216102b426 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -707,7 +707,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; } - group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; + if (flags & FAN_UNLIMITED_MARKS) { + fd = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out_put_group; + group->fanotify_data.max_marks = UINT_MAX; + } else { + group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index e37f559c95e1..7592a366a57b 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -32,9 +32,11 @@ FAN_CLASS_PRE_CONTENT) #define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ - FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE) + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) /* flags used for fanotify_modify_mark() */ #define FAN_MARK_ADD 0x00000001 -- cgit v1.2.3 From 8fcd65280abc4699510f1853ede31f43e8a3783a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:59 -0400 Subject: fanotify: ignore events on directories unless specifically requested fanotify has a very limited number of events it sends on directories. The usefulness of these events is yet to be seen and still we send them. This is particularly painful for mount marks where one might receive many of these useless events. As such this patch will drop events on IS_DIR() inodes unless they were explictly requested with FAN_ON_DIR. This means that a mark on a directory without FAN_EVENT_ON_CHILD or FAN_ON_DIR is meaningless and will result in no events ever (although it will still be allowed since detecting it is hard) Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 5 +++++ fs/notify/fanotify/fanotify_user.c | 12 ++++++++++++ include/linux/fanotify.h | 10 ++++++++-- 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 8d98e1f5817b..b04f88eed09e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -131,6 +131,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); + BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -195,6 +196,10 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, BUG(); } + if (S_ISDIR(path->dentry->d_inode->i_mode) && + (marks_ignored_mask & FS_ISDIR)) + return false; + if (event_mask & marks_mask & ~marks_ignored_mask) return true; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a7d9369482d5..ff1a908c9708 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -570,6 +570,12 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } + + if (!(flags & FAN_MARK_ONDIR)) { + __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR; + fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + } + spin_unlock(&fsn_mark->lock); return mask & ~oldmask; @@ -766,6 +772,12 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, default: return -EINVAL; } + + if (mask & FAN_ONDIR) { + flags |= FAN_MARK_ONDIR; + mask &= ~FAN_ONDIR; + } + #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) #else diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 7592a366a57b..5e0400a80c33 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -10,13 +10,15 @@ #define FAN_CLOSE_NOWRITE 0x00000010 /* Writtable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ -#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ - #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_ONDIR 0x40000000 /* event occurred against dir */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ + /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ @@ -47,6 +49,10 @@ #define FAN_MARK_IGNORED_MASK 0x00000020 #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_FLUSH 0x00000080 +#ifdef __KERNEL__ +/* not valid from userspace, only kernel internal */ +#define FAN_MARK_ONDIR 0x00000100 +#endif #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ -- cgit v1.2.3 From 50e4a98914de13c6f38f50fd1afa06e2c18b3cf7 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 28 Oct 2010 17:21:59 -0400 Subject: fanotify: Fix FAN_CLOSE comments The comments for FAN_CLOSE_WRITE and FAN_CLOSE_NOWRITE do not match FS_CLOSE_WRITE and FS_CLOSE_NOWRITE, respectively. WRITE is for writable files while NOWRITE is for non-writable files. Signed-off-by: Stefan Hajnoczi Signed-off-by: Eric Paris --- include/linux/fanotify.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 5e0400a80c33..0f0121467fc4 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -6,8 +6,8 @@ /* the following events that user-space can register for */ #define FAN_ACCESS 0x00000001 /* File was accessed */ #define FAN_MODIFY 0x00000002 /* File was modified */ -#define FAN_CLOSE_WRITE 0x00000008 /* Unwrittable file closed */ -#define FAN_CLOSE_NOWRITE 0x00000010 /* Writtable file closed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writtable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ -- cgit v1.2.3 From 88d60c32765716289abeb362c44adf6c35c6824c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 8 Nov 2010 18:19:22 -0500 Subject: fanotify: remove packed from access response message Since fanotify has decided to be careful about alignment and packing rather than rely on __attribute__((packed)) for multiarch support. Since this attribute isn't doing anything on fanotify_response we just drop it. This does not break API/ABI. Suggested-by: Tvrtko Ursulin Signed-off-by: Eric Paris --- include/linux/fanotify.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/fanotify.h') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 0f0121467fc4..bdbf9bb29b54 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -96,7 +96,7 @@ struct fanotify_event_metadata { struct fanotify_response { __s32 fd; __u32 response; -} __attribute__ ((packed)); +}; /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 -- cgit v1.2.3 From e9a3854fd4ff3907e6c200a3980e19365ee695e9 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Wed, 24 Nov 2010 18:22:09 +0100 Subject: fanotify: Introduce FAN_NOFD FAN_NOFD is used in fanotify events that do not provide an open file descriptor (like the overflow_event). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fanotify.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/fanotify.h') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index bdbf9bb29b54..c73224315aee 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -101,6 +101,8 @@ struct fanotify_response { /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 #define FAN_DENY 0x02 +/* No fd set in event */ +#define FAN_NOFD -1 /* Helper functions to deal with fanotify_event_metadata buffers */ #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) -- cgit v1.2.3 From 62731fa0c893515dc6cbc3e0a2879a92793c735f Mon Sep 17 00:00:00 2001 From: Alexey Zaytsev Date: Mon, 22 Nov 2010 00:33:03 +0000 Subject: fanotify: split version into version and metadata_len To implement per event type optional headers we are interested in knowing how long the metadata structure is. This patch slits the __u32 version field into a __u8 version and a __u16 metadata_len field (with __u8 left over). This should allow for backwards compat ABI. Signed-off-by: Alexey Zaytsev [rewrote descrtion and changed object sizes and ordering - eparis] Signed-off-by: Eric Paris --- include/linux/fanotify.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/fanotify.h') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index c73224315aee..6c6133f76e16 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -83,11 +83,13 @@ FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW) -#define FANOTIFY_METADATA_VERSION 2 +#define FANOTIFY_METADATA_VERSION 3 struct fanotify_event_metadata { __u32 event_len; - __u32 vers; + __u8 vers; + __u8 reserved; + __u16 metadata_len; __aligned_u64 mask; __s32 fd; __s32 pid; -- cgit v1.2.3