From 3503895788d402d6a3814085ed582c364ec3e903 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 31 Oct 2023 12:02:06 -0400 Subject: virtio_pci: move structure to a header These are guest/host interfaces, so they belong in the header where e.g. qemu will know to find them. Note: we added a new structure as opposed to extending existing one because someone might be relying on the size of the existing structure staying unchanged. Add a warning to avoid using sizeof. Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- include/uapi/linux/virtio_pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index f703afc7ad31..44f4dd2add18 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -166,6 +166,17 @@ struct virtio_pci_common_cfg { __le32 queue_used_hi; /* read-write */ }; +/* + * Warning: do not use sizeof on this: use offsetofend for + * specific fields you need. + */ +struct virtio_pci_modern_common_cfg { + struct virtio_pci_common_cfg cfg; + + __le16 queue_notify_data; /* read-write */ + __le16 queue_reset; /* read-write */ +}; + /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ struct virtio_pci_cfg_cap { struct virtio_pci_cap cap; -- cgit v1.2.3 From d3933152442b7f94419e9ea71835d71b620baf0e Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 3 Nov 2023 11:38:04 -0700 Subject: btrfs: make OWNER_REF_KEY type value smallest among inline refs BTRFS_EXTENT_OWNER_REF_KEY is the type of simple quotas extent owner refs. This special inline ref goes in front of all other inline refs. In general, inline refs have a required sorted order s.t. type never decreases (among other requirements). This was recently reified into a tree-checker and fsck rule, which broke simple quotas. To be fair, though, in a sense, the new owner ref item had also violated that not yet fully enforced requirement. This fix brings the owner ref item into compliance with the requirement that inline ref type never decrease. btrfs/301 exercises this behavior and should pass again with this fix. Fixes: d9a620f77e33 ("btrfs: new inline ref storing owning subvol of data extents") Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index c25fc9614594..d24e8e121507 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -219,6 +219,22 @@ */ #define BTRFS_METADATA_ITEM_KEY 169 +/* + * Special inline ref key which stores the id of the subvolume which originally + * created the extent. This subvolume owns the extent permanently from the + * perspective of simple quotas. Needed to know which subvolume to free quota + * usage from when the extent is deleted. + * + * Stored as an inline ref rather to avoid wasting space on a separate item on + * top of the existing extent item. However, unlike the other inline refs, + * there is one one owner ref per extent rather than one per extent. + * + * Because of this, it goes at the front of the list of inline refs, and thus + * must have a lower type value than any other inline ref type (to satisfy the + * disk format rule that inline refs have non-decreasing type). + */ +#define BTRFS_EXTENT_OWNER_REF_KEY 172 + #define BTRFS_TREE_BLOCK_REF_KEY 176 #define BTRFS_EXTENT_DATA_REF_KEY 178 @@ -233,14 +249,6 @@ #define BTRFS_SHARED_DATA_REF_KEY 184 -/* - * Special inline ref key which stores the id of the subvolume which originally - * created the extent. This subvolume owns the extent permanently from the - * perspective of simple quotas. Needed to know which subvolume to free quota - * usage from when the extent is deleted. - */ -#define BTRFS_EXTENT_OWNER_REF_KEY 188 - /* * block groups give us hints into the extent allocation trees. Which * blocks are free etc etc -- cgit v1.2.3 From 8a924db2d7b5eb69ba08b1a0af46e9f1359a9bdf Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Mon, 2 Oct 2023 08:57:33 -0400 Subject: fs: Pass AT_GETATTR_NOSEC flag to getattr interface function When vfs_getattr_nosec() calls a filesystem's getattr interface function then the 'nosec' should propagate into this function so that vfs_getattr_nosec() can again be called from the filesystem's gettattr rather than vfs_getattr(). The latter would add unnecessary security checks that the initial vfs_getattr_nosec() call wanted to avoid. Therefore, introduce the getattr flag GETATTR_NOSEC and allow to pass with the new getattr_flags parameter to the getattr interface function. In overlayfs and ecryptfs use this flag to determine which one of the two functions to call. In a recent code change introduced to IMA vfs_getattr_nosec() ended up calling vfs_getattr() in overlayfs, which in turn called security_inode_getattr() on an exiting process that did not have current->fs set anymore, which then caused a kernel NULL pointer dereference. With this change the call to security_inode_getattr() can be avoided, thus avoiding the NULL pointer dereference. Reported-by: Fixes: db1d1e8b9867 ("IMA: use vfs_getattr_nosec to get the i_version") Cc: Alexander Viro Cc: Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Tyler Hicks Cc: Mimi Zohar Suggested-by: Christian Brauner Co-developed-by: Amir Goldstein Signed-off-by: Stefan Berger Link: https://lore.kernel.org/r/20231002125733.1251467-1-stefanb@linux.vnet.ibm.com Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/ecryptfs/inode.c | 12 ++++++++++-- fs/overlayfs/inode.c | 10 +++++----- fs/overlayfs/overlayfs.h | 8 ++++++++ fs/stat.c | 6 +++++- include/uapi/linux/fcntl.h | 3 +++ 5 files changed, 31 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a25dd3d20008..b0e8774c435a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -998,6 +998,14 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap, return rc; } +static int ecryptfs_do_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + if (flags & AT_GETATTR_NOSEC) + return vfs_getattr_nosec(path, stat, request_mask, flags); + return vfs_getattr(path, stat, request_mask, flags); +} + static int ecryptfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) @@ -1006,8 +1014,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap, struct kstat lower_stat; int rc; - rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat, - request_mask, flags); + rc = ecryptfs_do_getattr(ecryptfs_dentry_to_lower_path(dentry), + &lower_stat, request_mask, flags); if (!rc) { fsstack_copy_attr_all(d_inode(dentry), ecryptfs_inode_to_lower(d_inode(dentry))); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 345b8f161ca4..c63b31a460be 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -171,7 +171,7 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat, request_mask, flags); + err = ovl_do_getattr(&realpath, stat, request_mask, flags); if (err) goto out; @@ -196,8 +196,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, (!is_dir ? STATX_NLINK : 0); ovl_path_lower(dentry, &realpath); - err = vfs_getattr(&realpath, &lowerstat, - lowermask, flags); + err = ovl_do_getattr(&realpath, &lowerstat, lowermask, + flags); if (err) goto out; @@ -249,8 +249,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, ovl_path_lowerdata(dentry, &realpath); if (realpath.dentry) { - err = vfs_getattr(&realpath, &lowerdatastat, - lowermask, flags); + err = ovl_do_getattr(&realpath, &lowerdatastat, + lowermask, flags); if (err) goto out; } else { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ca88b2636a57..05c3dd597fa8 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -408,6 +408,14 @@ static inline bool ovl_open_flags_need_copy_up(int flags) return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } +static inline int ovl_do_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + if (flags & AT_GETATTR_NOSEC) + return vfs_getattr_nosec(path, stat, request_mask, flags); + return vfs_getattr(path, stat, request_mask, flags); +} + /* util.c */ int ovl_get_write_access(struct dentry *dentry); void ovl_put_write_access(struct dentry *dentry); diff --git a/fs/stat.c b/fs/stat.c index 24bb0209e459..f721d26ec3f7 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -133,7 +133,8 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) return inode->i_op->getattr(idmap, path, stat, - request_mask, query_flags); + request_mask, + query_flags | AT_GETATTR_NOSEC); generic_fillattr(idmap, request_mask, inode, stat); return 0; @@ -166,6 +167,9 @@ int vfs_getattr(const struct path *path, struct kstat *stat, { int retval; + if (WARN_ON_ONCE(query_flags & AT_GETATTR_NOSEC)) + return -EPERM; + retval = security_inode_getattr(path); if (retval) return retval; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6c80f96049bd..282e90aeb163 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -116,5 +116,8 @@ #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to compare object identity and may not be usable to open_by_handle_at(2) */ +#if defined(__KERNEL__) +#define AT_GETATTR_NOSEC 0x80000000 +#endif #endif /* _UAPI_LINUX_FCNTL_H */ -- cgit v1.2.3 From 63ee44540205d993854f143a5ab1d7d9e63ffcf1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Aug 2023 14:54:55 -0700 Subject: dma-buf/sync_file: Add SET_DEADLINE ioctl The initial purpose is for igt tests, but this would also be useful for compositors that wait until close to vblank deadline to make decisions about which frame to show. The igt tests can be found at: https://gitlab.freedesktop.org/robclark/igt-gpu-tools/-/commits/fence-deadline v2: Clarify the timebase, add link to igt tests v3: Use u64 value in ns to express deadline. v4: More doc Signed-off-by: Rob Clark Acked-by: Pekka Paalanen Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20230823215458.203366-3-robdclark@gmail.com --- drivers/dma-buf/dma-fence.c | 3 ++- drivers/dma-buf/sync_file.c | 19 +++++++++++++++++++ include/uapi/linux/sync_file.h | 22 ++++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 8aa8f8cb7071..e0fd99e61a2d 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -934,7 +934,8 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout); * the GPU's devfreq to reduce frequency, when in fact the opposite is what is * needed. * - * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline. + * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline + * (or indirectly via userspace facing ioctls like &sync_set_deadline). * The deadline hint provides a way for the waiting driver, or userspace, to * convey an appropriate sense of urgency to the signaling driver. * diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 2e9a316c596a..d9b1c1b2a72b 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -347,6 +347,22 @@ out: return ret; } +static int sync_file_ioctl_set_deadline(struct sync_file *sync_file, + unsigned long arg) +{ + struct sync_set_deadline ts; + + if (copy_from_user(&ts, (void __user *)arg, sizeof(ts))) + return -EFAULT; + + if (ts.pad) + return -EINVAL; + + dma_fence_set_deadline(sync_file->fence, ns_to_ktime(ts.deadline_ns)); + + return 0; +} + static long sync_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -359,6 +375,9 @@ static long sync_file_ioctl(struct file *file, unsigned int cmd, case SYNC_IOC_FILE_INFO: return sync_file_ioctl_fence_info(sync_file, arg); + case SYNC_IOC_SET_DEADLINE: + return sync_file_ioctl_set_deadline(sync_file, arg); + default: return -ENOTTY; } diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h index ff0a931833e2..ff1f38889dcf 100644 --- a/include/uapi/linux/sync_file.h +++ b/include/uapi/linux/sync_file.h @@ -76,6 +76,27 @@ struct sync_file_info { __u64 sync_fence_info; }; +/** + * struct sync_set_deadline - SYNC_IOC_SET_DEADLINE - set a deadline hint on a fence + * @deadline_ns: absolute time of the deadline + * @pad: must be zero + * + * Allows userspace to set a deadline on a fence, see &dma_fence_set_deadline + * + * The timebase for the deadline is CLOCK_MONOTONIC (same as vblank). For + * example + * + * clock_gettime(CLOCK_MONOTONIC, &t); + * deadline_ns = (t.tv_sec * 1000000000L) + t.tv_nsec + ns_until_deadline + */ +struct sync_set_deadline { + __u64 deadline_ns; + /* Not strictly needed for alignment but gives some possibility + * for future extension: + */ + __u64 pad; +}; + #define SYNC_IOC_MAGIC '>' /* @@ -87,5 +108,6 @@ struct sync_file_info { #define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data) #define SYNC_IOC_FILE_INFO _IOWR(SYNC_IOC_MAGIC, 4, struct sync_file_info) +#define SYNC_IOC_SET_DEADLINE _IOW(SYNC_IOC_MAGIC, 5, struct sync_set_deadline) #endif /* _UAPI_LINUX_SYNC_H */ -- cgit v1.2.3