summaryrefslogtreecommitdiff
path: root/fs/notify
diff options
context:
space:
mode:
Diffstat (limited to 'fs/notify')
-rw-r--r--fs/notify/dnotify/dnotify.c10
-rw-r--r--fs/notify/fanotify/fanotify.c78
-rw-r--r--fs/notify/fanotify/fanotify.h42
-rw-r--r--fs/notify/fanotify/fanotify_user.c296
-rw-r--r--fs/notify/fdinfo.c5
-rw-r--r--fs/notify/fsnotify.c134
-rw-r--r--fs/notify/fsnotify.h11
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/notify/mark.c14
10 files changed, 489 insertions, 105 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 6004dfdfdf0f..9fb73bafd41d 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -20,7 +20,7 @@
static int dir_notify_enable __read_mostly = 1;
#ifdef CONFIG_SYSCTL
-static struct ctl_table dnotify_sysctls[] = {
+static const struct ctl_table dnotify_sysctls[] = {
{
.procname = "dir-notify-enable",
.data = &dir_notify_enable,
@@ -308,6 +308,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
+ error = file_f_owner_allocate(filp);
+ if (error)
+ goto out_err;
+
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
if (!new_dn_mark) {
@@ -315,10 +319,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
- error = file_f_owner_allocate(filp);
- if (error)
- goto out_err;
-
/* set up the new_fsn_mark and new_dn_mark */
new_fsn_mark = &new_dn_mark->fsn_mark;
fsnotify_init_mark(new_fsn_mark, dnotify_group);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 24c7c5df4998..bfe884d624e7 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
return fanotify_error_event_equal(FANOTIFY_EE(old),
FANOTIFY_EE(new));
+ case FANOTIFY_EVENT_TYPE_MNT:
+ return false;
default:
WARN_ON_ONCE(1);
}
@@ -223,7 +225,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
struct fanotify_perm_event *event,
struct fsnotify_iter_info *iter_info)
{
- int ret;
+ int ret, errno;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -262,14 +264,23 @@ static int fanotify_get_response(struct fsnotify_group *group,
ret = 0;
break;
case FAN_DENY:
+ /* Check custom errno from pre-content events */
+ errno = fanotify_get_response_errno(event->response);
+ if (errno) {
+ ret = -errno;
+ break;
+ }
+ fallthrough;
default:
ret = -EPERM;
}
/* Check if the response should be audited */
- if (event->response & FAN_AUDIT)
- audit_fanotify(event->response & ~FAN_AUDIT,
- &event->audit_rule);
+ if (event->response & FAN_AUDIT) {
+ u32 response = event->response &
+ (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS);
+ audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule);
+ }
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
@@ -303,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
- if (!fid_mode) {
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
+ if (data_type != FSNOTIFY_EVENT_MNT)
+ return 0;
+ } else if (!fid_mode) {
/* Do we have path to open a file descriptor? */
if (!path)
return 0;
@@ -401,7 +415,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
{
int dwords, type = 0;
char *ext_buf = NULL;
- void *buf = fh->buf;
+ void *buf = fh + 1;
int err;
fh->type = FILEID_ROOT;
@@ -440,7 +454,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
dwords = fh_len >> 2;
type = exportfs_encode_fid(inode, buf, &dwords);
err = -EINVAL;
- if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2)
+ /*
+ * Unlike file_handle, type and len of struct fanotify_fh are u8.
+ * Traditionally, filesystem return handle_type < 0xff, but there
+ * is no enforecement for that in vfs.
+ */
+ BUILD_BUG_ON(MAX_HANDLE_SZ > 0xff || FILEID_INVALID > 0xff);
+ if (type <= 0 || type >= FILEID_INVALID || fh_len != dwords << 2)
goto out_err;
fh->type = type;
@@ -548,9 +568,27 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
return &pevent->fae;
}
-static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
+static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
+{
+ struct fanotify_mnt_event *pevent;
+
+ pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
+ if (!pevent)
+ return NULL;
+
+ pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
+ pevent->mnt_id = mnt_id;
+
+ return &pevent->fae;
+}
+
+static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
+ int data_type,
gfp_t gfp)
{
+ const struct path *path = fsnotify_data_path(data, data_type);
+ const struct file_range *range =
+ fsnotify_data_file_range(data, data_type);
struct fanotify_perm_event *pevent;
pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
@@ -564,6 +602,9 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
pevent->hdr.len = 0;
pevent->state = FAN_EVENT_INIT;
pevent->path = *path;
+ /* NULL ppos means no range info */
+ pevent->ppos = range ? &range->pos : NULL;
+ pevent->count = range ? range->count : 0;
path_get(path);
return &pevent->fae;
@@ -715,6 +756,7 @@ static struct fanotify_event *fanotify_alloc_event(
fid_mode);
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
const struct path *path = fsnotify_data_path(data, data_type);
+ u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
struct mem_cgroup *old_memcg;
struct dentry *moved = NULL;
struct inode *child = NULL;
@@ -801,7 +843,7 @@ static struct fanotify_event *fanotify_alloc_event(
old_memcg = set_active_memcg(group->memcg);
if (fanotify_is_perm_event(mask)) {
- event = fanotify_alloc_perm_event(path, gfp);
+ event = fanotify_alloc_perm_event(data, data_type, gfp);
} else if (fanotify_is_error_event(mask)) {
event = fanotify_alloc_error_event(group, fsid, data,
data_type, &hash);
@@ -810,8 +852,12 @@ static struct fanotify_event *fanotify_alloc_event(
moved, &hash, gfp);
} else if (fid_mode) {
event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
- } else {
+ } else if (path) {
event = fanotify_alloc_path_event(path, &hash, gfp);
+ } else if (mnt_id) {
+ event = fanotify_alloc_mnt_event(mnt_id, gfp);
+ } else {
+ WARN_ON_ONCE(1);
}
if (!event)
@@ -909,8 +955,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
+ BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
- BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);
mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
@@ -968,6 +1015,7 @@ finish:
static void fanotify_free_group_priv(struct fsnotify_group *group)
{
+ put_user_ns(group->user_ns);
kfree(group->fanotify_data.merge_hash);
if (group->fanotify_data.ucounts)
dec_ucount(group->fanotify_data.ucounts,
@@ -1011,6 +1059,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
mempool_free(fee, &group->fanotify_data.error_events_pool);
}
+static void fanotify_free_mnt_event(struct fanotify_event *event)
+{
+ kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
+}
+
static void fanotify_free_event(struct fsnotify_group *group,
struct fsnotify_event *fsn_event)
{
@@ -1037,6 +1090,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
fanotify_free_error_event(group, event);
break;
+ case FANOTIFY_EVENT_TYPE_MNT:
+ fanotify_free_mnt_event(event);
+ break;
default:
WARN_ON_ONCE(1);
}
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index e5ab33cae6a7..b78308975082 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep;
extern struct kmem_cache *fanotify_path_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
+extern struct kmem_cache *fanotify_mnt_event_cachep;
/* Possible states of the permission event */
enum {
@@ -24,7 +25,7 @@ enum {
* stored in either the first or last 2 dwords.
*/
#define FANOTIFY_INLINE_FH_LEN (3 << 2)
-#define FANOTIFY_FH_HDR_LEN offsetof(struct fanotify_fh, buf)
+#define FANOTIFY_FH_HDR_LEN sizeof(struct fanotify_fh)
/* Fixed size struct for file handle */
struct fanotify_fh {
@@ -33,7 +34,6 @@ struct fanotify_fh {
#define FANOTIFY_FH_FLAG_EXT_BUF 1
u8 flags;
u8 pad;
- unsigned char buf[];
} __aligned(4);
/* Variable size struct for dir file handle + child file handle + name */
@@ -91,7 +91,7 @@ static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh)
BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4);
BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) >
FANOTIFY_INLINE_FH_LEN);
- return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *));
+ return (char **)ALIGN((unsigned long)(fh + 1), __alignof__(char *));
}
static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh)
@@ -101,7 +101,7 @@ static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh)
static inline void *fanotify_fh_buf(struct fanotify_fh *fh)
{
- return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf;
+ return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh + 1;
}
static inline int fanotify_info_dir_fh_len(struct fanotify_info *info)
@@ -244,6 +244,7 @@ enum fanotify_event_type {
FANOTIFY_EVENT_TYPE_PATH_PERM,
FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
+ FANOTIFY_EVENT_TYPE_MNT,
__FANOTIFY_EVENT_TYPE_NUM
};
@@ -276,7 +277,7 @@ static inline void fanotify_init_event(struct fanotify_event *event,
#define FANOTIFY_INLINE_FH(name, size) \
struct { \
struct fanotify_fh name; \
- /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \
+ /* Space for filehandle - access with fanotify_fh_buf() */ \
unsigned char _inline_fh_buf[size]; \
}
@@ -409,12 +410,23 @@ struct fanotify_path_event {
struct path path;
};
+struct fanotify_mnt_event {
+ struct fanotify_event fae;
+ u64 mnt_id;
+};
+
static inline struct fanotify_path_event *
FANOTIFY_PE(struct fanotify_event *event)
{
return container_of(event, struct fanotify_path_event, fae);
}
+static inline struct fanotify_mnt_event *
+FANOTIFY_ME(struct fanotify_event *event)
+{
+ return container_of(event, struct fanotify_mnt_event, fae);
+}
+
/*
* Structure for permission fanotify events. It gets allocated and freed in
* fanotify_handle_event() since we wait there for user response. When the
@@ -425,6 +437,8 @@ FANOTIFY_PE(struct fanotify_event *event)
struct fanotify_perm_event {
struct fanotify_event fae;
struct path path;
+ const loff_t *ppos; /* optional file range info */
+ size_t count;
u32 response; /* userspace answer to the event */
unsigned short state; /* state of the event */
int fd; /* fd we passed to userspace for this event */
@@ -446,6 +460,14 @@ static inline bool fanotify_is_perm_event(u32 mask)
mask & FANOTIFY_PERM_EVENTS;
}
+static inline bool fanotify_event_has_access_range(struct fanotify_event *event)
+{
+ if (!(event->mask & FANOTIFY_PRE_CONTENT_EVENTS))
+ return false;
+
+ return FANOTIFY_PERM(event)->ppos;
+}
+
static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
{
return container_of(fse, struct fanotify_event, fse);
@@ -456,6 +478,11 @@ static inline bool fanotify_is_error_event(u32 mask)
return mask & FAN_FS_ERROR;
}
+static inline bool fanotify_is_mnt_event(u32 mask)
+{
+ return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
+}
+
static inline const struct path *fanotify_event_path(struct fanotify_event *event)
{
if (event->type == FANOTIFY_EVENT_TYPE_PATH)
@@ -518,3 +545,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
return mflags;
}
+
+static inline u32 fanotify_get_response_errno(int res)
+{
+ return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK;
+}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 2d85c71717d6..b192ee068a7a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -58,7 +58,7 @@ static int fanotify_max_queued_events __read_mostly;
static long ft_zero = 0;
static long ft_int_max = INT_MAX;
-static struct ctl_table fanotify_table[] = {
+static const struct ctl_table fanotify_table[] = {
{
.procname = "max_user_groups",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS],
@@ -100,8 +100,7 @@ static void __init fanotify_sysctls_init(void)
*
* Internal and external open flags are stored together in field f_flags of
* struct file. Only external open flags shall be allowed in event_f_flags.
- * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be
- * excluded.
+ * Internal flags like FMODE_EXEC shall be excluded.
*/
#define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \
O_ACCMODE | O_APPEND | O_NONBLOCK | \
@@ -114,14 +113,19 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
+struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;
#define FANOTIFY_EVENT_ALIGN 4
#define FANOTIFY_FID_INFO_HDR_LEN \
(sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
-#define FANOTIFY_PIDFD_INFO_HDR_LEN \
+#define FANOTIFY_PIDFD_INFO_LEN \
sizeof(struct fanotify_event_info_pidfd)
#define FANOTIFY_ERROR_INFO_LEN \
(sizeof(struct fanotify_event_info_error))
+#define FANOTIFY_RANGE_INFO_LEN \
+ (sizeof(struct fanotify_event_info_range))
+#define FANOTIFY_MNT_INFO_LEN \
+ (sizeof(struct fanotify_event_info_mnt))
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -159,9 +163,6 @@ static size_t fanotify_event_len(unsigned int info_mode,
int fh_len;
int dot_len = 0;
- if (!info_mode)
- return event_len;
-
if (fanotify_is_error_event(event->mask))
event_len += FANOTIFY_ERROR_INFO_LEN;
@@ -176,13 +177,18 @@ static size_t fanotify_event_len(unsigned int info_mode,
dot_len = 1;
}
- if (info_mode & FAN_REPORT_PIDFD)
- event_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
-
if (fanotify_event_has_object_fh(event)) {
fh_len = fanotify_event_object_fh_len(event);
event_len += fanotify_fid_info_len(fh_len, dot_len);
}
+ if (fanotify_is_mnt_event(event->mask))
+ event_len += FANOTIFY_MNT_INFO_LEN;
+
+ if (info_mode & FAN_REPORT_PIDFD)
+ event_len += FANOTIFY_PIDFD_INFO_LEN;
+
+ if (fanotify_event_has_access_range(event))
+ event_len += FANOTIFY_RANGE_INFO_LEN;
return event_len;
}
@@ -258,12 +264,11 @@ static int create_fd(struct fsnotify_group *group, const struct path *path,
return client_fd;
/*
- * we need a new file handle for the userspace program so it can read even if it was
- * originally opened O_WRONLY.
+ * We provide an fd for the userspace program, so it could access the
+ * file without generating fanotify events itself.
*/
- new_file = dentry_open(path,
- group->fanotify_data.f_flags | __FMODE_NONOTIFY,
- current_cred());
+ new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags,
+ current_cred());
if (IS_ERR(new_file)) {
put_unused_fd(client_fd);
client_fd = PTR_ERR(new_file);
@@ -327,11 +332,12 @@ static int process_access_response(struct fsnotify_group *group,
struct fanotify_perm_event *event;
int fd = response_struct->fd;
u32 response = response_struct->response;
+ int errno = fanotify_get_response_errno(response);
int ret = info_len;
struct fanotify_response_info_audit_rule friar;
- pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__,
- group, fd, response, info, info_len);
+ pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n",
+ __func__, group, fd, response, errno, info, info_len);
/*
* make sure the response is valid, if invalid we do nothing and either
* userspace can send a valid response or we will clean it up after the
@@ -342,7 +348,31 @@ static int process_access_response(struct fsnotify_group *group,
switch (response & FANOTIFY_RESPONSE_ACCESS) {
case FAN_ALLOW:
+ if (errno)
+ return -EINVAL;
+ break;
case FAN_DENY:
+ /* Custom errno is supported only for pre-content groups */
+ if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT)
+ return -EINVAL;
+
+ /*
+ * Limit errno to values expected on open(2)/read(2)/write(2)
+ * of regular files.
+ */
+ switch (errno) {
+ case 0:
+ case EIO:
+ case EPERM:
+ case EBUSY:
+ case ETXTBSY:
+ case EAGAIN:
+ case ENOSPC:
+ case EDQUOT:
+ break;
+ default:
+ return -EINVAL;
+ }
break;
default:
return -EINVAL;
@@ -380,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
+static size_t copy_mnt_info_to_user(struct fanotify_event *event,
+ char __user *buf, int count)
+{
+ struct fanotify_event_info_mnt info = { };
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
+ info.hdr.len = FANOTIFY_MNT_INFO_LEN;
+
+ if (WARN_ON(count < info.hdr.len))
+ return -EFAULT;
+
+ info.mnt_id = FANOTIFY_ME(event)->mnt_id;
+
+ if (copy_to_user(buf, &info, sizeof(info)))
+ return -EFAULT;
+
+ return info.hdr.len;
+}
+
static size_t copy_error_info_to_user(struct fanotify_event *event,
char __user *buf, int count)
{
@@ -506,7 +555,7 @@ static int copy_pidfd_info_to_user(int pidfd,
size_t count)
{
struct fanotify_event_info_pidfd info = { };
- size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN;
+ size_t info_len = FANOTIFY_PIDFD_INFO_LEN;
if (WARN_ON_ONCE(info_len > count))
return -EFAULT;
@@ -521,6 +570,30 @@ static int copy_pidfd_info_to_user(int pidfd,
return info_len;
}
+static size_t copy_range_info_to_user(struct fanotify_event *event,
+ char __user *buf, int count)
+{
+ struct fanotify_perm_event *pevent = FANOTIFY_PERM(event);
+ struct fanotify_event_info_range info = { };
+ size_t info_len = FANOTIFY_RANGE_INFO_LEN;
+
+ if (WARN_ON_ONCE(info_len > count))
+ return -EFAULT;
+
+ if (WARN_ON_ONCE(!pevent->ppos))
+ return -EINVAL;
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE;
+ info.hdr.len = info_len;
+ info.offset = *(pevent->ppos);
+ info.count = pevent->count;
+
+ if (copy_to_user(buf, &info, info_len))
+ return -EFAULT;
+
+ return info_len;
+}
+
static int copy_info_records_to_user(struct fanotify_event *event,
struct fanotify_info *info,
unsigned int info_mode, int pidfd,
@@ -642,6 +715,24 @@ static int copy_info_records_to_user(struct fanotify_event *event,
total_bytes += ret;
}
+ if (fanotify_event_has_access_range(event)) {
+ ret = copy_range_info_to_user(event, buf, count);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ if (fanotify_is_mnt_event(event->mask)) {
+ ret = copy_mnt_info_to_user(event, buf, count);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
return total_bytes;
}
@@ -756,12 +847,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
buf += FAN_EVENT_METADATA_LEN;
count -= FAN_EVENT_METADATA_LEN;
- if (info_mode) {
- ret = copy_info_records_to_user(event, info, info_mode, pidfd,
- buf, count);
- if (ret < 0)
- goto out_close_fd;
- }
+ ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+ buf, count);
+ if (ret < 0)
+ goto out_close_fd;
if (f)
fd_install(fd, f);
@@ -1245,6 +1334,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
* A group with FAN_UNLIMITED_MARKS does not contribute to mark count
* in the limited groups account.
*/
+ BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS));
if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) &&
!inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS))
return ERR_PTR(-ENOSPC);
@@ -1294,7 +1384,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
}
static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
- unsigned int fan_flags)
+ __u32 mask, unsigned int fan_flags)
{
/*
* Non evictable mark cannot be downgraded to evictable mark.
@@ -1321,6 +1411,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
return -EEXIST;
+ /* For now pre-content events are not generated for directories */
+ mask |= fsn_mark->mask;
+ if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+ return -EEXIST;
+
return 0;
}
@@ -1347,7 +1442,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
/*
* Check if requested mark flags conflict with an existing mark flags.
*/
- ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
+ ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags);
if (ret)
goto out;
@@ -1404,11 +1499,13 @@ static struct hlist_head *fanotify_alloc_merge_hash(void)
/* fanotify syscalls */
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{
+ struct user_namespace *user_ns = current_user_ns();
struct fsnotify_group *group;
int f_flags, fd;
unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
unsigned int class = flags & FANOTIFY_CLASS_BITS;
unsigned int internal_flags = 0;
+ struct file *file;
pr_debug("%s: flags=%x event_f_flags=%x\n",
__func__, flags, event_f_flags);
@@ -1417,10 +1514,11 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
/*
* An unprivileged user can setup an fanotify group with
* limited functionality - an unprivileged group is limited to
- * notification events with file handles and it cannot use
- * unlimited queue/marks.
+ * notification events with file handles or mount ids and it
+ * cannot use unlimited queue/marks.
*/
- if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode)
+ if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) ||
+ !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT)))
return -EPERM;
/*
@@ -1446,6 +1544,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
return -EINVAL;
+ /* Don't allow mixing mnt events with inode events for now */
+ if (flags & FAN_REPORT_MNT) {
+ if (class != FAN_CLASS_NOTIF)
+ return -EINVAL;
+ if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR))
+ return -EINVAL;
+ }
+
if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
return -EINVAL;
@@ -1477,7 +1583,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
(!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
return -EINVAL;
- f_flags = O_RDWR | __FMODE_NONOTIFY;
+ f_flags = O_RDWR;
if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC;
if (flags & FAN_NONBLOCK)
@@ -1491,8 +1597,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
}
/* Enforce groups limits per user in all containing user ns */
- group->fanotify_data.ucounts = inc_ucount(current_user_ns(),
- current_euid(),
+ group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(),
UCOUNT_FANOTIFY_GROUPS);
if (!group->fanotify_data.ucounts) {
fd = -EMFILE;
@@ -1501,6 +1606,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
group->fanotify_data.flags = flags | internal_flags;
group->memcg = get_mem_cgroup_from_mm(current->mm);
+ group->user_ns = get_user_ns(user_ns);
group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
if (!group->fanotify_data.merge_hash) {
@@ -1534,31 +1640,31 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
goto out_destroy_group;
}
+ BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE));
if (flags & FAN_UNLIMITED_QUEUE) {
- fd = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- goto out_destroy_group;
group->max_events = UINT_MAX;
} else {
group->max_events = fanotify_max_queued_events;
}
- if (flags & FAN_UNLIMITED_MARKS) {
- fd = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- goto out_destroy_group;
- }
-
if (flags & FAN_ENABLE_AUDIT) {
fd = -EPERM;
if (!capable(CAP_AUDIT_WRITE))
goto out_destroy_group;
}
- fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
+ fd = get_unused_fd_flags(f_flags);
if (fd < 0)
goto out_destroy_group;
+ file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group,
+ f_flags, FMODE_NONOTIFY);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(file);
+ goto out_destroy_group;
+ }
+ fd_install(fd, file);
return fd;
out_destroy_group:
@@ -1638,12 +1744,24 @@ static int fanotify_events_supported(struct fsnotify_group *group,
unsigned int flags)
{
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+ bool is_dir = d_is_dir(path->dentry);
/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
(mask & FAN_RENAME) ||
(flags & FAN_MARK_IGNORE);
/*
+ * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
+ * and they are only supported on regular files and directories.
+ */
+ if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
+ if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
+ return -EOPNOTSUPP;
+ if (!is_dir && !d_is_reg(path->dentry))
+ return -EINVAL;
+ }
+
+ /*
* Some filesystems such as 'proc' acquire unusual locks when opening
* files. For them fanotify permission events have high chances of
* deadlocking the system - open done when reporting fanotify event
@@ -1675,7 +1793,7 @@ static int fanotify_events_supported(struct fsnotify_group *group,
* but because we always allowed it, error only when using new APIs.
*/
if (strict_dir_events && mark_type == FAN_MARK_INODE &&
- !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
+ !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
return -ENOTDIR;
return 0;
@@ -1685,16 +1803,17 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
struct inode *inode = NULL;
- struct vfsmount *mnt = NULL;
struct fsnotify_group *group;
struct path path;
struct fan_fsid __fsid, *fsid = NULL;
+ struct user_namespace *user_ns = NULL;
+ struct mnt_namespace *mntns;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
- void *obj;
+ void *obj = NULL;
u32 umask = 0;
int ret;
@@ -1718,6 +1837,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
case FAN_MARK_FILESYSTEM:
obj_type = FSNOTIFY_OBJ_TYPE_SB;
break;
+ case FAN_MARK_MNTNS:
+ obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
+ break;
default:
return -EINVAL;
}
@@ -1765,21 +1887,36 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
group = fd_file(f)->private_data;
+ /* Only report mount events on mnt namespace */
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
+ if (mask & ~FANOTIFY_MOUNT_EVENTS)
+ return -EINVAL;
+ if (mark_type != FAN_MARK_MNTNS)
+ return -EINVAL;
+ } else {
+ if (mask & FANOTIFY_MOUNT_EVENTS)
+ return -EINVAL;
+ if (mark_type == FAN_MARK_MNTNS)
+ return -EINVAL;
+ }
+
/*
- * An unprivileged user is not allowed to setup mount nor filesystem
- * marks. This also includes setting up such marks by a group that
- * was initialized by an unprivileged user.
+ * A user is allowed to setup sb/mount/mntns marks only if it is
+ * capable in the user ns where the group was created.
*/
- if ((!capable(CAP_SYS_ADMIN) ||
- FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) &&
+ if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) &&
mark_type != FAN_MARK_INODE)
return -EPERM;
/*
- * Permission events require minimum priority FAN_CLASS_CONTENT.
+ * Permission events are not allowed for FAN_CLASS_NOTIF.
+ * Pre-content permission events are not allowed for FAN_CLASS_CONTENT.
*/
if (mask & FANOTIFY_PERM_EVENTS &&
- group->priority < FSNOTIFY_PRIO_CONTENT)
+ group->priority == FSNOTIFY_PRIO_NORMAL)
+ return -EINVAL;
+ else if (mask & FANOTIFY_PRE_CONTENT_EVENTS &&
+ group->priority == FSNOTIFY_PRIO_CONTENT)
return -EINVAL;
if (mask & FAN_FS_ERROR &&
@@ -1802,7 +1939,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* point.
*/
fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
- if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
+ if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) &&
(!fid_mode || mark_type == FAN_MARK_MOUNT))
return -EINVAL;
@@ -1814,13 +1951,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
return -EINVAL;
+ /* Pre-content events are not currently generated for directories. */
+ if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+ return -EINVAL;
+
if (mark_cmd == FAN_MARK_FLUSH) {
- if (mark_type == FAN_MARK_MOUNT)
- fsnotify_clear_vfsmount_marks_by_group(group);
- else if (mark_type == FAN_MARK_FILESYSTEM)
- fsnotify_clear_sb_marks_by_group(group);
- else
- fsnotify_clear_inode_marks_by_group(group);
+ fsnotify_clear_marks_by_group(group, obj_type);
return 0;
}
@@ -1847,18 +1983,35 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
fsid = &__fsid;
}
- /* inode held in place by reference to path; group by fget on fd */
- if (mark_type == FAN_MARK_INODE) {
+ /*
+ * In addition to being capable in the user ns where group was created,
+ * the user also needs to be capable in the user ns associated with
+ * the filesystem or in the user ns associated with the mntns
+ * (when marking mntns).
+ */
+ if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = path.dentry->d_inode;
obj = inode;
- } else {
- mnt = path.mnt;
- if (mark_type == FAN_MARK_MOUNT)
- obj = mnt;
- else
- obj = mnt->mnt_sb;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+ user_ns = path.mnt->mnt_sb->s_user_ns;
+ obj = path.mnt;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
+ user_ns = path.mnt->mnt_sb->s_user_ns;
+ obj = path.mnt->mnt_sb;
+ } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ mntns = mnt_ns_from_dentry(path.dentry);
+ user_ns = mntns->user_ns;
+ obj = mntns;
}
+ ret = -EPERM;
+ if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ goto path_put_and_out;
+
+ ret = -EINVAL;
+ if (!obj)
+ goto path_put_and_out;
+
/*
* If some other task has this inode open for write we should not add
* an ignore mask, unless that ignore mask is supposed to survive
@@ -1866,10 +2019,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
*/
if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
- ret = mnt ? -EINVAL : -EISDIR;
+ ret = !inode ? -EINVAL : -EISDIR;
/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
if (ignore == FAN_MARK_IGNORE &&
- (mnt || S_ISDIR(inode->i_mode)))
+ (!inode || S_ISDIR(inode->i_mode)))
goto path_put_and_out;
ret = 0;
@@ -1878,7 +2031,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
- if (mnt || !S_ISDIR(inode->i_mode)) {
+ if (!inode || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
umask = FAN_EVENT_ON_CHILD;
/*
@@ -1952,7 +2105,7 @@ static int __init fanotify_user_setup(void)
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
@@ -1965,6 +2118,7 @@ static int __init fanotify_user_setup(void)
fanotify_perm_event_cachep =
KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
}
+ fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);
fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index e933f9c65d90..1161eabf11ee 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
sb->s_dev, mflags, mark->mask, mark->ignore_mask);
+ } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector);
+
+ seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n",
+ mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask);
}
}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index f976949d2634..e2b4f17a48bb 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
fsnotify_clear_marks_by_mount(mnt);
}
+void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{
+ fsnotify_clear_marks_by_mntns(mntns);
+}
+
/**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @sb: superblock being unmounted.
@@ -193,7 +198,7 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask,
return mask & marks_mask;
}
-/* Are there any inode/mount/sb objects that are interested in this event? */
+/* Are there any inode/mount/sb objects that watch for these events? */
static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
__u32 mask)
{
@@ -203,6 +208,24 @@ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
return mask & marks_mask & ALL_FSNOTIFY_EVENTS;
}
+/* Report pre-content event with optional range info */
+int fsnotify_pre_content(const struct path *path, const loff_t *ppos,
+ size_t count)
+{
+ struct file_range range;
+
+ /* Report page aligned range only when pos is known */
+ if (!ppos)
+ return fsnotify_path(path, FS_PRE_ACCESS);
+
+ range.path = path;
+ range.pos = PAGE_ALIGN_DOWN(*ppos);
+ range.count = PAGE_ALIGN(*ppos + count) - range.pos;
+
+ return fsnotify_parent(path->dentry, FS_PRE_ACCESS, &range,
+ FSNOTIFY_EVENT_FILE_RANGE);
+}
+
/*
* Notify this dentry's parent about a child's events with child name info
* if parent is watching or if inode/sb/mount are interested in events with
@@ -402,7 +425,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
file_name, cookie, iter_info);
}
-static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
+static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp)
{
struct fsnotify_mark_connector *conn;
struct hlist_node *node = NULL;
@@ -520,14 +543,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
{
const struct path *path = fsnotify_data_path(data, data_type);
struct super_block *sb = fsnotify_data_sb(data, data_type);
- struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+ const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
+ struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL;
struct fsnotify_iter_info iter_info = {};
struct mount *mnt = NULL;
struct inode *inode2 = NULL;
struct dentry *moved;
int inode2_type;
int ret = 0;
- __u32 test_mask, marks_mask;
+ __u32 test_mask, marks_mask = 0;
if (path)
mnt = real_mount(path->mnt);
@@ -560,17 +584,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
if ((!sbinfo || !sbinfo->sb_marks) &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
- (!inode2 || !inode2->i_fsnotify_marks))
+ (!inode2 || !inode2->i_fsnotify_marks) &&
+ (!mnt_data || !mnt_data->ns->n_fsnotify_marks))
return 0;
- marks_mask = READ_ONCE(sb->s_fsnotify_mask);
+ if (sb)
+ marks_mask |= READ_ONCE(sb->s_fsnotify_mask);
if (mnt)
marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask);
if (inode)
marks_mask |= READ_ONCE(inode->i_fsnotify_mask);
if (inode2)
marks_mask |= READ_ONCE(inode2->i_fsnotify_mask);
-
+ if (mnt_data)
+ marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask);
/*
* If this is a modify event we may need to clear some ignore masks.
@@ -600,6 +627,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.marks[inode2_type] =
fsnotify_first_mark(&inode2->i_fsnotify_marks);
}
+ if (mnt_data) {
+ iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] =
+ fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks);
+ }
/*
* We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
@@ -623,11 +654,98 @@ out:
}
EXPORT_SYMBOL_GPL(fsnotify);
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+/*
+ * At open time we check fsnotify_sb_has_priority_watchers() and set the
+ * FMODE_NONOTIFY_ mode bits accordignly.
+ * Later, fsnotify permission hooks do not check if there are permission event
+ * watches, but that there were permission event watches at open time.
+ */
+void file_set_fsnotify_mode_from_watchers(struct file *file)
+{
+ struct dentry *dentry = file->f_path.dentry, *parent;
+ struct super_block *sb = dentry->d_sb;
+ __u32 mnt_mask, p_mask;
+
+ /* Is it a file opened by fanotify? */
+ if (FMODE_FSNOTIFY_NONE(file->f_mode))
+ return;
+
+ /*
+ * Permission events is a super set of pre-content events, so if there
+ * are no permission event watchers, there are also no pre-content event
+ * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit.
+ */
+ if (likely(!fsnotify_sb_has_priority_watchers(sb,
+ FSNOTIFY_PRIO_CONTENT))) {
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM);
+ return;
+ }
+
+ /*
+ * If there are permission event watchers but no pre-content event
+ * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that.
+ */
+ if ((!d_is_dir(dentry) && !d_is_reg(dentry)) ||
+ likely(!fsnotify_sb_has_priority_watchers(sb,
+ FSNOTIFY_PRIO_PRE_CONTENT))) {
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM);
+ return;
+ }
+
+ /*
+ * OK, there are some pre-content watchers. Check if anybody is
+ * watching for pre-content events on *this* file.
+ */
+ mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask);
+ if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask,
+ FSNOTIFY_PRE_CONTENT_EVENTS))) {
+ /* Enable pre-content events */
+ file_set_fsnotify_mode(file, 0);
+ return;
+ }
+
+ /* Is parent watching for pre-content events on this file? */
+ if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) {
+ parent = dget_parent(dentry);
+ p_mask = fsnotify_inode_watches_children(d_inode(parent));
+ dput(parent);
+ if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) {
+ /* Enable pre-content events */
+ file_set_fsnotify_mode(file, 0);
+ return;
+ }
+ }
+ /* Nobody watching for pre-content events from this file */
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM);
+}
+#endif
+
+void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+ struct fsnotify_mnt data = {
+ .ns = ns,
+ .mnt_id = real_mount(mnt)->mnt_id_unique,
+ };
+
+ if (WARN_ON_ONCE(!ns))
+ return;
+
+ /*
+ * This is an optimization as well as making sure fsnotify_init() has
+ * been called.
+ */
+ if (!ns->n_fsnotify_marks)
+ return;
+
+ fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0);
+}
+
static __init int fsnotify_init(void)
{
int ret;
- BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 663759ed6fbc..5950c7a67f41 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -33,6 +33,12 @@ static inline struct super_block *fsnotify_conn_sb(
return conn->obj;
}
+static inline struct mnt_namespace *fsnotify_conn_mntns(
+ struct fsnotify_mark_connector *conn)
+{
+ return conn->obj;
+}
+
static inline struct super_block *fsnotify_object_sb(void *obj,
enum fsnotify_obj_type obj_type)
{
@@ -89,6 +95,11 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
fsnotify_destroy_marks(fsnotify_sb_marks(sb));
}
+static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns)
+{
+ fsnotify_destroy_marks(&mntns->n_fsnotify_marks);
+}
+
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 993375f0db67..cd7d11b0eb08 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
event->sync_cookie = cookie;
event->name_len = len;
if (len)
- strcpy(event->name, name->name);
+ strscpy(event->name, name->name, event->name_len + 1);
ret = fsnotify_add_event(group, fsn_event, inotify_merge);
if (ret) {
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e0c48956608a..b372fb2c56bd 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -58,7 +58,7 @@ struct kmem_cache *inotify_inode_mark_cachep __ro_after_init;
static long it_zero = 0;
static long it_int_max = INT_MAX;
-static struct ctl_table inotify_table[] = {
+static const struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 4981439e6209..798340db69d7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -107,6 +107,8 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj,
return &real_mount(obj)->mnt_fsnotify_marks;
case FSNOTIFY_OBJ_TYPE_SB:
return fsnotify_sb_marks(obj);
+ case FSNOTIFY_OBJ_TYPE_MNTNS:
+ return &((struct mnt_namespace *)obj)->n_fsnotify_marks;
default:
return NULL;
}
@@ -120,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
+ else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS)
+ return &fsnotify_conn_mntns(conn)->n_fsnotify_mask;
return NULL;
}
@@ -346,12 +350,15 @@ static void *fsnotify_detach_connector_from_object(
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
+ } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0;
}
rcu_assign_pointer(*connp, NULL);
conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
- fsnotify_update_sb_watchers(sb, conn);
+ if (sb)
+ fsnotify_update_sb_watchers(sb, conn);
return inode;
}
@@ -724,7 +731,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
* Attach the sb info before attaching a connector to any object on sb.
* The sb info will remain attached as long as sb lives.
*/
- if (!fsnotify_sb_info(sb)) {
+ if (sb && !fsnotify_sb_info(sb)) {
err = fsnotify_attach_info_to_sb(sb);
if (err)
return err;
@@ -770,7 +777,8 @@ restart:
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
- fsnotify_update_sb_watchers(sb, conn);
+ if (sb)
+ fsnotify_update_sb_watchers(sb, conn);
/*
* Since connector is attached to object using cmpxchg() we are
* guaranteed that connector initialization is fully visible by anyone