diff options
author | Jens Axboe <axboe@kernel.dk> | 2022-05-09 15:35:28 +0300 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2022-05-09 15:35:28 +0300 |
commit | b5ba65df47cabcba6fe7a03f8f57513e9f78f72f (patch) | |
tree | dc417a35c3f0865d2151d10951f1e3aee6926e03 | |
parent | 1308689906ad35b017eec8e595a2beb6f2f972fb (diff) | |
parent | 033b87d24f7257c45506bd043ad85ed24a9925e2 (diff) | |
download | linux-b5ba65df47cabcba6fe7a03f8f57513e9f78f72f.tar.xz |
Merge branch 'for-5.19/io_uring-socket' into for-5.19/io_uring-passthrough
* for-5.19/io_uring-socket:
io_uring: use the text representation of ops in trace
io_uring: rename op -> opcode
io_uring: add io_uring_get_opcode
io_uring: add type to op enum
io_uring: add socket(2) support
net: add __sys_socket_file()
io_uring: fix trace for reduced sqe padding
io_uring: add fgetxattr and getxattr support
io_uring: add fsetxattr and setxattr support
fs: split off do_getxattr from getxattr
fs: split off setxattr_copy and do_setxattr function from setxattr
-rw-r--r-- | fs/internal.h | 29 | ||||
-rw-r--r-- | fs/io_uring.c | 471 | ||||
-rw-r--r-- | fs/xattr.c | 143 | ||||
-rw-r--r-- | include/linux/io_uring.h | 5 | ||||
-rw-r--r-- | include/linux/socket.h | 1 | ||||
-rw-r--r-- | include/trace/events/io_uring.h | 45 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 11 | ||||
-rw-r--r-- | net/socket.c | 52 |
8 files changed, 680 insertions, 77 deletions
diff --git a/fs/internal.h b/fs/internal.h index 08503dc68d2b..9a6c233ee7f1 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in, struct pipe_inode_info *opipe, loff_t *offset, size_t len, unsigned int flags); + +/* + * fs/xattr.c: + */ +struct xattr_name { + char name[XATTR_NAME_MAX + 1]; +}; + +struct xattr_ctx { + /* Value of attribute */ + union { + const void __user *cvalue; + void __user *value; + }; + void *kvalue; + size_t size; + /* Attribute name */ + struct xattr_name *kname; + unsigned int flags; +}; + + +ssize_t do_getxattr(struct user_namespace *mnt_userns, + struct dentry *d, + struct xattr_ctx *ctx); + +int setxattr_copy(const char __user *name, struct xattr_ctx *ctx); +int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct xattr_ctx *ctx); diff --git a/fs/io_uring.c b/fs/io_uring.c index 9f340f44827b..53e54fc05488 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -80,6 +80,7 @@ #include <linux/io_uring.h> #include <linux/audit.h> #include <linux/security.h> +#include <linux/xattr.h> #define CREATE_TRACE_POINTS #include <trace/events/io_uring.h> @@ -578,6 +579,16 @@ struct io_accept { unsigned long nofile; }; +struct io_socket { + struct file *file; + int domain; + int type; + int protocol; + int flags; + u32 file_slot; + unsigned long nofile; +}; + struct io_sync { struct file *file; loff_t len; @@ -782,6 +793,12 @@ struct io_async_rw { struct wait_page_queue wpq; }; +struct io_xattr { + struct file *file; + struct xattr_ctx ctx; + struct filename *filename; +}; + enum { REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, @@ -946,6 +963,8 @@ struct io_kiocb { struct io_symlink symlink; struct io_hardlink hardlink; struct io_msg msg; + struct io_xattr xattr; + struct io_socket sock; }; u8 opcode; @@ -1246,6 +1265,17 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .iopoll = 1, }, + [IORING_OP_FSETXATTR] = { + .needs_file = 1 + }, + [IORING_OP_SETXATTR] = {}, + [IORING_OP_FGETXATTR] = { + .needs_file = 1 + }, + [IORING_OP_GETXATTR] = {}, + [IORING_OP_SOCKET] = { + .audit_skip = 1, + }, }; /* requests with any of those set should undergo io_disarm_next() */ @@ -1290,6 +1320,107 @@ static struct kmem_cache *req_cachep; static const struct file_operations io_uring_fops; +const char *io_uring_get_opcode(u8 opcode) +{ + switch ((enum io_uring_op)opcode) { + case IORING_OP_NOP: + return "NOP"; + case IORING_OP_READV: + return "READV"; + case IORING_OP_WRITEV: + return "WRITEV"; + case IORING_OP_FSYNC: + return "FSYNC"; + case IORING_OP_READ_FIXED: + return "READ_FIXED"; + case IORING_OP_WRITE_FIXED: + return "WRITE_FIXED"; + case IORING_OP_POLL_ADD: + return "POLL_ADD"; + case IORING_OP_POLL_REMOVE: + return "POLL_REMOVE"; + case IORING_OP_SYNC_FILE_RANGE: + return "SYNC_FILE_RANGE"; + case IORING_OP_SENDMSG: + return "SENDMSG"; + case IORING_OP_RECVMSG: + return "RECVMSG"; + case IORING_OP_TIMEOUT: + return "TIMEOUT"; + case IORING_OP_TIMEOUT_REMOVE: + return "TIMEOUT_REMOVE"; + case IORING_OP_ACCEPT: + return "ACCEPT"; + case IORING_OP_ASYNC_CANCEL: + return "ASYNC_CANCEL"; + case IORING_OP_LINK_TIMEOUT: + return "LINK_TIMEOUT"; + case IORING_OP_CONNECT: + return "CONNECT"; + case IORING_OP_FALLOCATE: + return "FALLOCATE"; + case IORING_OP_OPENAT: + return "OPENAT"; + case IORING_OP_CLOSE: + return "CLOSE"; + case IORING_OP_FILES_UPDATE: + return "FILES_UPDATE"; + case IORING_OP_STATX: + return "STATX"; + case IORING_OP_READ: + return "READ"; + case IORING_OP_WRITE: + return "WRITE"; + case IORING_OP_FADVISE: + return "FADVISE"; + case IORING_OP_MADVISE: + return "MADVISE"; + case IORING_OP_SEND: + return "SEND"; + case IORING_OP_RECV: + return "RECV"; + case IORING_OP_OPENAT2: + return "OPENAT2"; + case IORING_OP_EPOLL_CTL: + return "EPOLL_CTL"; + case IORING_OP_SPLICE: + return "SPLICE"; + case IORING_OP_PROVIDE_BUFFERS: + return "PROVIDE_BUFFERS"; + case IORING_OP_REMOVE_BUFFERS: + return "REMOVE_BUFFERS"; + case IORING_OP_TEE: + return "TEE"; + case IORING_OP_SHUTDOWN: + return "SHUTDOWN"; + case IORING_OP_RENAMEAT: + return "RENAMEAT"; + case IORING_OP_UNLINKAT: + return "UNLINKAT"; + case IORING_OP_MKDIRAT: + return "MKDIRAT"; + case IORING_OP_SYMLINKAT: + return "SYMLINKAT"; + case IORING_OP_LINKAT: + return "LINKAT"; + case IORING_OP_MSG_RING: + return "MSG_RING"; + case IORING_OP_FSETXATTR: + return "FSETXATTR"; + case IORING_OP_SETXATTR: + return "SETXATTR"; + case IORING_OP_FGETXATTR: + return "FGETXATTR"; + case IORING_OP_GETXATTR: + return "GETXATTR"; + case IORING_OP_SOCKET: + return "SOCKET"; + case IORING_OP_LAST: + return "INVALID"; + } + return "INVALID"; +} + struct sock *io_uring_get_socket(struct file *file) { #if defined(CONFIG_UNIX) @@ -4205,6 +4336,257 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags) return 0; } +static inline void __io_xattr_finish(struct io_kiocb *req) +{ + struct io_xattr *ix = &req->xattr; + + if (ix->filename) + putname(ix->filename); + + kfree(ix->ctx.kname); + kvfree(ix->ctx.kvalue); +} + +static void io_xattr_finish(struct io_kiocb *req, int ret) +{ + req->flags &= ~REQ_F_NEED_CLEANUP; + + __io_xattr_finish(req); + if (ret < 0) + req_set_fail(req); + + io_req_complete(req, ret); +} + +static int __io_getxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_xattr *ix = &req->xattr; + const char __user *name; + int ret; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(sqe->ioprio)) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + ix->filename = NULL; + ix->ctx.kvalue = NULL; + name = u64_to_user_ptr(READ_ONCE(sqe->addr)); + ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + ix->ctx.size = READ_ONCE(sqe->len); + ix->ctx.flags = READ_ONCE(sqe->xattr_flags); + + if (ix->ctx.flags) + return -EINVAL; + + ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); + if (!ix->ctx.kname) + return -ENOMEM; + + ret = strncpy_from_user(ix->ctx.kname->name, name, + sizeof(ix->ctx.kname->name)); + if (!ret || ret == sizeof(ix->ctx.kname->name)) + ret = -ERANGE; + if (ret < 0) { + kfree(ix->ctx.kname); + return ret; + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_fgetxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + return __io_getxattr_prep(req, sqe); +} + +static int io_getxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_xattr *ix = &req->xattr; + const char __user *path; + int ret; + + ret = __io_getxattr_prep(req, sqe); + if (ret) + return ret; + + path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); + + ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); + if (IS_ERR(ix->filename)) { + ret = PTR_ERR(ix->filename); + ix->filename = NULL; + } + + return ret; +} + +static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_xattr *ix = &req->xattr; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt), + req->file->f_path.dentry, + &ix->ctx); + + io_xattr_finish(req, ret); + return 0; +} + +static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_xattr *ix = &req->xattr; + unsigned int lookup_flags = LOOKUP_FOLLOW; + struct path path; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + +retry: + ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); + if (!ret) { + ret = do_getxattr(mnt_user_ns(path.mnt), + path.dentry, + &ix->ctx); + + path_put(&path); + if (retry_estale(ret, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } + } + + io_xattr_finish(req, ret); + return 0; +} + +static int __io_setxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_xattr *ix = &req->xattr; + const char __user *name; + int ret; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(sqe->ioprio)) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + ix->filename = NULL; + name = u64_to_user_ptr(READ_ONCE(sqe->addr)); + ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + ix->ctx.kvalue = NULL; + ix->ctx.size = READ_ONCE(sqe->len); + ix->ctx.flags = READ_ONCE(sqe->xattr_flags); + + ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); + if (!ix->ctx.kname) + return -ENOMEM; + + ret = setxattr_copy(name, &ix->ctx); + if (ret) { + kfree(ix->ctx.kname); + return ret; + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_setxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_xattr *ix = &req->xattr; + const char __user *path; + int ret; + + ret = __io_setxattr_prep(req, sqe); + if (ret) + return ret; + + path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); + + ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); + if (IS_ERR(ix->filename)) { + ret = PTR_ERR(ix->filename); + ix->filename = NULL; + } + + return ret; +} + +static int io_fsetxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + return __io_setxattr_prep(req, sqe); +} + +static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags, + struct path *path) +{ + struct io_xattr *ix = &req->xattr; + int ret; + + ret = mnt_want_write(path->mnt); + if (!ret) { + ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx); + mnt_drop_write(path->mnt); + } + + return ret; +} + +static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags) +{ + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = __io_setxattr(req, issue_flags, &req->file->f_path); + io_xattr_finish(req, ret); + + return 0; +} + +static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_xattr *ix = &req->xattr; + unsigned int lookup_flags = LOOKUP_FOLLOW; + struct path path; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + +retry: + ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); + if (!ret) { + ret = __io_setxattr(req, issue_flags, &path); + path_put(&path); + if (retry_estale(ret, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } + } + + io_xattr_finish(req, ret); + return 0; +} + static int io_unlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -5760,6 +6142,62 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags) return 0; } +static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_socket *sock = &req->sock; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index) + return -EINVAL; + + sock->domain = READ_ONCE(sqe->fd); + sock->type = READ_ONCE(sqe->off); + sock->protocol = READ_ONCE(sqe->len); + sock->file_slot = READ_ONCE(sqe->file_index); + sock->nofile = rlimit(RLIMIT_NOFILE); + + sock->flags = sock->type & ~SOCK_TYPE_MASK; + if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) + return -EINVAL; + if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + return 0; +} + +static int io_socket(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_socket *sock = &req->sock; + bool fixed = !!sock->file_slot; + struct file *file; + int ret, fd; + + if (!fixed) { + fd = __get_unused_fd_flags(sock->flags, sock->nofile); + if (unlikely(fd < 0)) + return fd; + } + file = __sys_socket_file(sock->domain, sock->type, sock->protocol); + if (IS_ERR(file)) { + if (!fixed) + put_unused_fd(fd); + ret = PTR_ERR(file); + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); + } else if (!fixed) { + fd_install(fd, file); + ret = fd; + } else { + ret = io_install_fixed_file(req, file, issue_flags, + sock->file_slot - 1); + } + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + static int io_connect_prep_async(struct io_kiocb *req) { struct io_async_connect *io = req->async_data; @@ -5845,6 +6283,7 @@ IO_NETOP_PREP_ASYNC(sendmsg); IO_NETOP_PREP_ASYNC(recvmsg); IO_NETOP_PREP_ASYNC(connect); IO_NETOP_PREP(accept); +IO_NETOP_PREP(socket); IO_NETOP_FN(send); IO_NETOP_FN(recv); #endif /* CONFIG_NET */ @@ -7147,6 +7586,16 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_linkat_prep(req, sqe); case IORING_OP_MSG_RING: return io_msg_ring_prep(req, sqe); + case IORING_OP_FSETXATTR: + return io_fsetxattr_prep(req, sqe); + case IORING_OP_SETXATTR: + return io_setxattr_prep(req, sqe); + case IORING_OP_FGETXATTR: + return io_fgetxattr_prep(req, sqe); + case IORING_OP_GETXATTR: + return io_getxattr_prep(req, sqe); + case IORING_OP_SOCKET: + return io_socket_prep(req, sqe); } printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", @@ -7296,6 +7745,12 @@ static void io_clean_op(struct io_kiocb *req) if (req->statx.filename) putname(req->statx.filename); break; + case IORING_OP_SETXATTR: + case IORING_OP_FSETXATTR: + case IORING_OP_GETXATTR: + case IORING_OP_FGETXATTR: + __io_xattr_finish(req); + break; } } if ((req->flags & REQ_F_POLLED) && req->apoll) { @@ -7452,6 +7907,21 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) case IORING_OP_MSG_RING: ret = io_msg_ring(req, issue_flags); break; + case IORING_OP_FSETXATTR: + ret = io_fsetxattr(req, issue_flags); + break; + case IORING_OP_SETXATTR: + ret = io_setxattr(req, issue_flags); + break; + case IORING_OP_FGETXATTR: + ret = io_fgetxattr(req, issue_flags); + break; + case IORING_OP_GETXATTR: + ret = io_getxattr(req, issue_flags); + break; + case IORING_OP_SOCKET: + ret = io_socket(req, issue_flags); + break; default: ret = -EINVAL; break; @@ -12025,6 +12495,7 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(42, __u16, personality); BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); BUILD_BUG_SQE_ELEM(44, __u32, file_index); + BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_ON(sizeof(struct io_uring_files_update) != sizeof(struct io_uring_rsrc_update)); diff --git a/fs/xattr.c b/fs/xattr.c index 998045165916..e8dd03e4561e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -25,6 +25,8 @@ #include <linux/uaccess.h> +#include "internal.h" + static const char * strcmp_prefix(const char *a, const char *a_prefix) { @@ -539,44 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); /* * Extended attribute SET operations */ -static long -setxattr(struct user_namespace *mnt_userns, struct dentry *d, - const char __user *name, const void __user *value, size_t size, - int flags) + +int setxattr_copy(const char __user *name, struct xattr_ctx *ctx) { int error; - void *kvalue = NULL; - char kname[XATTR_NAME_MAX + 1]; - if (flags & ~(XATTR_CREATE|XATTR_REPLACE)) + if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE)) return -EINVAL; - error = strncpy_from_user(kname, name, sizeof(kname)); - if (error == 0 || error == sizeof(kname)) - error = -ERANGE; + error = strncpy_from_user(ctx->kname->name, name, + sizeof(ctx->kname->name)); + if (error == 0 || error == sizeof(ctx->kname->name)) + return -ERANGE; if (error < 0) return error; - if (size) { - if (size > XATTR_SIZE_MAX) + error = 0; + if (ctx->size) { + if (ctx->size > XATTR_SIZE_MAX) return -E2BIG; - kvalue = kvmalloc(size, GFP_KERNEL); - if (!kvalue) - return -ENOMEM; - if (copy_from_user(kvalue, value, size)) { - error = -EFAULT; - goto out; + + ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size); + if (IS_ERR(ctx->kvalue)) { + error = PTR_ERR(ctx->kvalue); + ctx->kvalue = NULL; } - if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || - (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) - posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d), - kvalue, size); } - error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags); -out: - kvfree(kvalue); + return error; +} + +static void setxattr_convert(struct user_namespace *mnt_userns, + struct dentry *d, struct xattr_ctx *ctx) +{ + if (ctx->size && + ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || + (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))) + posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d), + ctx->kvalue, ctx->size); +} + +int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct xattr_ctx *ctx) +{ + setxattr_convert(mnt_userns, dentry, ctx); + return vfs_setxattr(mnt_userns, dentry, ctx->kname->name, + ctx->kvalue, ctx->size, ctx->flags); +} + +static long +setxattr(struct user_namespace *mnt_userns, struct dentry *d, + const char __user *name, const void __user *value, size_t size, + int flags) +{ + struct xattr_name kname; + struct xattr_ctx ctx = { + .cvalue = value, + .kvalue = NULL, + .size = size, + .kname = &kname, + .flags = flags, + }; + int error; + error = setxattr_copy(name, &ctx); + if (error) + return error; + + error = do_setxattr(mnt_userns, d, &ctx); + + kvfree(ctx.kvalue); return error; } @@ -642,44 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, /* * Extended attribute GET operations */ -static ssize_t -getxattr(struct user_namespace *mnt_userns, struct dentry *d, - const char __user *name, void __user *value, size_t size) +ssize_t +do_getxattr(struct user_namespace *mnt_userns, struct dentry *d, + struct xattr_ctx *ctx) { ssize_t error; - void *kvalue = NULL; - char kname[XATTR_NAME_MAX + 1]; - - error = strncpy_from_user(kname, name, sizeof(kname)); - if (error == 0 || error == sizeof(kname)) - error = -ERANGE; - if (error < 0) - return error; + char *kname = ctx->kname->name; - if (size) { - if (size > XATTR_SIZE_MAX) - size = XATTR_SIZE_MAX; - kvalue = kvzalloc(size, GFP_KERNEL); - if (!kvalue) + if (ctx->size) { + if (ctx->size > XATTR_SIZE_MAX) + ctx->size = XATTR_SIZE_MAX; + ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL); + if (!ctx->kvalue) return -ENOMEM; } - error = vfs_getxattr(mnt_userns, d, kname, kvalue, size); + error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size); if (error > 0) { if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d), - kvalue, error); - if (size && copy_to_user(value, kvalue, error)) + ctx->kvalue, error); + if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error)) error = -EFAULT; - } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { + } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) { /* The file system tried to returned a value bigger than XATTR_SIZE_MAX bytes. Not possible. */ error = -E2BIG; } - kvfree(kvalue); + return error; +} + +static ssize_t +getxattr(struct user_namespace *mnt_userns, struct dentry *d, + const char __user *name, void __user *value, size_t size) +{ + ssize_t error; + struct xattr_name kname; + struct xattr_ctx ctx = { + .value = value, + .kvalue = NULL, + .size = size, + .kname = &kname, + .flags = 0, + }; + + error = strncpy_from_user(kname.name, name, sizeof(kname.name)); + if (error == 0 || error == sizeof(kname.name)) + error = -ERANGE; + if (error < 0) + return error; + + error = do_getxattr(mnt_userns, d, &ctx); + kvfree(ctx.kvalue); return error; } diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 1814e698d861..24651c229ed2 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -10,6 +10,7 @@ struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); +const char *io_uring_get_opcode(u8 opcode); static inline void io_uring_files_cancel(void) { @@ -42,6 +43,10 @@ static inline void io_uring_files_cancel(void) static inline void io_uring_free(struct task_struct *tsk) { } +static inline const char *io_uring_get_opcode(u8 opcode) +{ + return ""; +} #endif #endif diff --git a/include/linux/socket.h b/include/linux/socket.h index 6f85f5d957ef..a1882e1e71d2 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -434,6 +434,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags, extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); +extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 42534ec2ab9d..3f2961baebcc 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -7,6 +7,7 @@ #include <linux/tracepoint.h> #include <uapi/linux/io_uring.h> +#include <linux/io_uring.h> struct io_wq_work; @@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work, __entry->rw = rw; ), - TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p", - __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p", + __entry->ctx, __entry->req, __entry->user_data, + io_uring_get_opcode(__entry->opcode), __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) ); @@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer, __entry->opcode = opcode; ), - TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d", - __entry->ctx, __entry->req, __entry->data, __entry->opcode) + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s", + __entry->ctx, __entry->req, __entry->data, + io_uring_get_opcode(__entry->opcode)) ); /** @@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link, __entry->link = link; ), - TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p", - __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, - __entry->link) + TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p", + __entry->ctx, __entry->req, __entry->user_data, + io_uring_get_opcode(__entry->opcode), __entry->link) ); /** @@ -389,9 +392,9 @@ TRACE_EVENT(io_uring_submit_sqe, __entry->sq_thread = sq_thread; ), - TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, " + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " "non block %d, sq_thread %d", __entry->ctx, __entry->req, - __entry->user_data, __entry->opcode, + __entry->user_data, io_uring_get_opcode(__entry->opcode), __entry->flags, __entry->force_nonblock, __entry->sq_thread) ); @@ -433,8 +436,9 @@ TRACE_EVENT(io_uring_poll_arm, __entry->events = events; ), - TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x", - __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x", + __entry->ctx, __entry->req, __entry->user_data, + io_uring_get_opcode(__entry->opcode), __entry->mask, __entry->events) ); @@ -470,8 +474,9 @@ TRACE_EVENT(io_uring_task_add, __entry->mask = mask; ), - TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x", - __entry->ctx, __entry->req, __entry->user_data, __entry->opcode, + TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x", + __entry->ctx, __entry->req, __entry->user_data, + io_uring_get_opcode(__entry->opcode), __entry->mask) ); @@ -506,7 +511,7 @@ TRACE_EVENT(io_uring_req_failed, __field( u16, personality ) __field( u32, file_index ) __field( u64, pad1 ) - __field( u64, pad2 ) + __field( u64, addr3 ) __field( int, error ) ), @@ -525,22 +530,24 @@ TRACE_EVENT(io_uring_req_failed, __entry->personality = sqe->personality; __entry->file_index = sqe->file_index; __entry->pad1 = sqe->__pad2[0]; - __entry->pad2 = sqe->__pad2[1]; + __entry->addr3 = sqe->addr3; __entry->error = error; ), TP_printk("ring %p, req %p, user_data 0x%llx, " - "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, " + "opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, " "len=%u, rw_flags=0x%x, buf_index=%d, " - "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", + "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, " + "error=%d", __entry->ctx, __entry->req, __entry->user_data, - __entry->opcode, __entry->flags, __entry->ioprio, + io_uring_get_opcode(__entry->opcode), + __entry->flags, __entry->ioprio, (unsigned long long)__entry->off, (unsigned long long) __entry->addr, __entry->len, __entry->op_flags, __entry->buf_index, __entry->personality, __entry->file_index, (unsigned long long) __entry->pad1, - (unsigned long long) __entry->pad2, __entry->error) + (unsigned long long) __entry->addr3, __entry->error) ); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 06621a278cb6..31e719f38615 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -45,6 +45,7 @@ struct io_uring_sqe { __u32 rename_flags; __u32 unlink_flags; __u32 hardlink_flags; + __u32 xattr_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -60,7 +61,8 @@ struct io_uring_sqe { __s32 splice_fd_in; __u32 file_index; }; - __u64 __pad2[2]; + __u64 addr3; + __u64 __pad2[1]; }; enum { @@ -117,7 +119,7 @@ enum { */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) -enum { +enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, IORING_OP_WRITEV, @@ -159,6 +161,11 @@ enum { IORING_OP_SYMLINKAT, IORING_OP_LINKAT, IORING_OP_MSG_RING, + IORING_OP_FSETXATTR, + IORING_OP_SETXATTR, + IORING_OP_FGETXATTR, + IORING_OP_GETXATTR, + IORING_OP_SOCKET, /* this goes last, obviously */ IORING_OP_LAST, diff --git a/net/socket.c b/net/socket.c index 6887840682bb..bb6a1a12fbde 100644 --- a/net/socket.c +++ b/net/socket.c @@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags) struct socket *sock_from_file(struct file *file) { if (file->f_op == &socket_file_ops) - return file->private_data; /* set in sock_map_fd */ + return file->private_data; /* set in sock_alloc_file */ return NULL; } @@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct } EXPORT_SYMBOL(sock_create_kern); -int __sys_socket(int family, int type, int protocol) +static struct socket *__sys_socket_create(int family, int type, int protocol) { - int retval; struct socket *sock; - int flags; + int retval; /* Check the SOCK_* constants for consistency. */ BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); @@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol) BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); - flags = type & ~SOCK_TYPE_MASK; - if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) - return -EINVAL; + if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return ERR_PTR(-EINVAL); type &= SOCK_TYPE_MASK; + retval = sock_create(family, type, protocol, &sock); + if (retval < 0) + return ERR_PTR(retval); + + return sock; +} + +struct file *__sys_socket_file(int family, int type, int protocol) +{ + struct socket *sock; + struct file *file; + int flags; + + sock = __sys_socket_create(family, type, protocol); + if (IS_ERR(sock)) + return ERR_CAST(sock); + + flags = type & ~SOCK_TYPE_MASK; if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - retval = sock_create(family, type, protocol, &sock); - if (retval < 0) - return retval; + file = sock_alloc_file(sock, flags, NULL); + if (IS_ERR(file)) + sock_release(sock); + + return file; +} + +int __sys_socket(int family, int type, int protocol) +{ + struct socket *sock; + int flags; + + sock = __sys_socket_create(family, type, protocol); + if (IS_ERR(sock)) + return PTR_ERR(sock); + + flags = type & ~SOCK_TYPE_MASK; + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); } |