summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2022-05-09 15:35:28 +0300
committerJens Axboe <axboe@kernel.dk>2022-05-09 15:35:28 +0300
commitb5ba65df47cabcba6fe7a03f8f57513e9f78f72f (patch)
treedc417a35c3f0865d2151d10951f1e3aee6926e03
parent1308689906ad35b017eec8e595a2beb6f2f972fb (diff)
parent033b87d24f7257c45506bd043ad85ed24a9925e2 (diff)
downloadlinux-b5ba65df47cabcba6fe7a03f8f57513e9f78f72f.tar.xz
Merge branch 'for-5.19/io_uring-socket' into for-5.19/io_uring-passthrough
* for-5.19/io_uring-socket: io_uring: use the text representation of ops in trace io_uring: rename op -> opcode io_uring: add io_uring_get_opcode io_uring: add type to op enum io_uring: add socket(2) support net: add __sys_socket_file() io_uring: fix trace for reduced sqe padding io_uring: add fgetxattr and getxattr support io_uring: add fsetxattr and setxattr support fs: split off do_getxattr from getxattr fs: split off setxattr_copy and do_setxattr function from setxattr
-rw-r--r--fs/internal.h29
-rw-r--r--fs/io_uring.c471
-rw-r--r--fs/xattr.c143
-rw-r--r--include/linux/io_uring.h5
-rw-r--r--include/linux/socket.h1
-rw-r--r--include/trace/events/io_uring.h45
-rw-r--r--include/uapi/linux/io_uring.h11
-rw-r--r--net/socket.c52
8 files changed, 680 insertions, 77 deletions
diff --git a/fs/internal.h b/fs/internal.h
index 08503dc68d2b..9a6c233ee7f1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe,
loff_t *offset,
size_t len, unsigned int flags);
+
+/*
+ * fs/xattr.c:
+ */
+struct xattr_name {
+ char name[XATTR_NAME_MAX + 1];
+};
+
+struct xattr_ctx {
+ /* Value of attribute */
+ union {
+ const void __user *cvalue;
+ void __user *value;
+ };
+ void *kvalue;
+ size_t size;
+ /* Attribute name */
+ struct xattr_name *kname;
+ unsigned int flags;
+};
+
+
+ssize_t do_getxattr(struct user_namespace *mnt_userns,
+ struct dentry *d,
+ struct xattr_ctx *ctx);
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct xattr_ctx *ctx);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9f340f44827b..53e54fc05488 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -80,6 +80,7 @@
#include <linux/io_uring.h>
#include <linux/audit.h>
#include <linux/security.h>
+#include <linux/xattr.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -578,6 +579,16 @@ struct io_accept {
unsigned long nofile;
};
+struct io_socket {
+ struct file *file;
+ int domain;
+ int type;
+ int protocol;
+ int flags;
+ u32 file_slot;
+ unsigned long nofile;
+};
+
struct io_sync {
struct file *file;
loff_t len;
@@ -782,6 +793,12 @@ struct io_async_rw {
struct wait_page_queue wpq;
};
+struct io_xattr {
+ struct file *file;
+ struct xattr_ctx ctx;
+ struct filename *filename;
+};
+
enum {
REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
@@ -946,6 +963,8 @@ struct io_kiocb {
struct io_symlink symlink;
struct io_hardlink hardlink;
struct io_msg msg;
+ struct io_xattr xattr;
+ struct io_socket sock;
};
u8 opcode;
@@ -1246,6 +1265,17 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.iopoll = 1,
},
+ [IORING_OP_FSETXATTR] = {
+ .needs_file = 1
+ },
+ [IORING_OP_SETXATTR] = {},
+ [IORING_OP_FGETXATTR] = {
+ .needs_file = 1
+ },
+ [IORING_OP_GETXATTR] = {},
+ [IORING_OP_SOCKET] = {
+ .audit_skip = 1,
+ },
};
/* requests with any of those set should undergo io_disarm_next() */
@@ -1290,6 +1320,107 @@ static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops;
+const char *io_uring_get_opcode(u8 opcode)
+{
+ switch ((enum io_uring_op)opcode) {
+ case IORING_OP_NOP:
+ return "NOP";
+ case IORING_OP_READV:
+ return "READV";
+ case IORING_OP_WRITEV:
+ return "WRITEV";
+ case IORING_OP_FSYNC:
+ return "FSYNC";
+ case IORING_OP_READ_FIXED:
+ return "READ_FIXED";
+ case IORING_OP_WRITE_FIXED:
+ return "WRITE_FIXED";
+ case IORING_OP_POLL_ADD:
+ return "POLL_ADD";
+ case IORING_OP_POLL_REMOVE:
+ return "POLL_REMOVE";
+ case IORING_OP_SYNC_FILE_RANGE:
+ return "SYNC_FILE_RANGE";
+ case IORING_OP_SENDMSG:
+ return "SENDMSG";
+ case IORING_OP_RECVMSG:
+ return "RECVMSG";
+ case IORING_OP_TIMEOUT:
+ return "TIMEOUT";
+ case IORING_OP_TIMEOUT_REMOVE:
+ return "TIMEOUT_REMOVE";
+ case IORING_OP_ACCEPT:
+ return "ACCEPT";
+ case IORING_OP_ASYNC_CANCEL:
+ return "ASYNC_CANCEL";
+ case IORING_OP_LINK_TIMEOUT:
+ return "LINK_TIMEOUT";
+ case IORING_OP_CONNECT:
+ return "CONNECT";
+ case IORING_OP_FALLOCATE:
+ return "FALLOCATE";
+ case IORING_OP_OPENAT:
+ return "OPENAT";
+ case IORING_OP_CLOSE:
+ return "CLOSE";
+ case IORING_OP_FILES_UPDATE:
+ return "FILES_UPDATE";
+ case IORING_OP_STATX:
+ return "STATX";
+ case IORING_OP_READ:
+ return "READ";
+ case IORING_OP_WRITE:
+ return "WRITE";
+ case IORING_OP_FADVISE:
+ return "FADVISE";
+ case IORING_OP_MADVISE:
+ return "MADVISE";
+ case IORING_OP_SEND:
+ return "SEND";
+ case IORING_OP_RECV:
+ return "RECV";
+ case IORING_OP_OPENAT2:
+ return "OPENAT2";
+ case IORING_OP_EPOLL_CTL:
+ return "EPOLL_CTL";
+ case IORING_OP_SPLICE:
+ return "SPLICE";
+ case IORING_OP_PROVIDE_BUFFERS:
+ return "PROVIDE_BUFFERS";
+ case IORING_OP_REMOVE_BUFFERS:
+ return "REMOVE_BUFFERS";
+ case IORING_OP_TEE:
+ return "TEE";
+ case IORING_OP_SHUTDOWN:
+ return "SHUTDOWN";
+ case IORING_OP_RENAMEAT:
+ return "RENAMEAT";
+ case IORING_OP_UNLINKAT:
+ return "UNLINKAT";
+ case IORING_OP_MKDIRAT:
+ return "MKDIRAT";
+ case IORING_OP_SYMLINKAT:
+ return "SYMLINKAT";
+ case IORING_OP_LINKAT:
+ return "LINKAT";
+ case IORING_OP_MSG_RING:
+ return "MSG_RING";
+ case IORING_OP_FSETXATTR:
+ return "FSETXATTR";
+ case IORING_OP_SETXATTR:
+ return "SETXATTR";
+ case IORING_OP_FGETXATTR:
+ return "FGETXATTR";
+ case IORING_OP_GETXATTR:
+ return "GETXATTR";
+ case IORING_OP_SOCKET:
+ return "SOCKET";
+ case IORING_OP_LAST:
+ return "INVALID";
+ }
+ return "INVALID";
+}
+
struct sock *io_uring_get_socket(struct file *file)
{
#if defined(CONFIG_UNIX)
@@ -4205,6 +4336,257 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static inline void __io_xattr_finish(struct io_kiocb *req)
+{
+ struct io_xattr *ix = &req->xattr;
+
+ if (ix->filename)
+ putname(ix->filename);
+
+ kfree(ix->ctx.kname);
+ kvfree(ix->ctx.kvalue);
+}
+
+static void io_xattr_finish(struct io_kiocb *req, int ret)
+{
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+
+ __io_xattr_finish(req);
+ if (ret < 0)
+ req_set_fail(req);
+
+ io_req_complete(req, ret);
+}
+
+static int __io_getxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_xattr *ix = &req->xattr;
+ const char __user *name;
+ int ret;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (unlikely(sqe->ioprio))
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ ix->filename = NULL;
+ ix->ctx.kvalue = NULL;
+ name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ ix->ctx.size = READ_ONCE(sqe->len);
+ ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+ if (ix->ctx.flags)
+ return -EINVAL;
+
+ ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+ if (!ix->ctx.kname)
+ return -ENOMEM;
+
+ ret = strncpy_from_user(ix->ctx.kname->name, name,
+ sizeof(ix->ctx.kname->name));
+ if (!ret || ret == sizeof(ix->ctx.kname->name))
+ ret = -ERANGE;
+ if (ret < 0) {
+ kfree(ix->ctx.kname);
+ return ret;
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_fgetxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return __io_getxattr_prep(req, sqe);
+}
+
+static int io_getxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_xattr *ix = &req->xattr;
+ const char __user *path;
+ int ret;
+
+ ret = __io_getxattr_prep(req, sqe);
+ if (ret)
+ return ret;
+
+ path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+ ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+ if (IS_ERR(ix->filename)) {
+ ret = PTR_ERR(ix->filename);
+ ix->filename = NULL;
+ }
+
+ return ret;
+}
+
+static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_xattr *ix = &req->xattr;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt),
+ req->file->f_path.dentry,
+ &ix->ctx);
+
+ io_xattr_finish(req, ret);
+ return 0;
+}
+
+static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_xattr *ix = &req->xattr;
+ unsigned int lookup_flags = LOOKUP_FOLLOW;
+ struct path path;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+retry:
+ ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
+ if (!ret) {
+ ret = do_getxattr(mnt_user_ns(path.mnt),
+ path.dentry,
+ &ix->ctx);
+
+ path_put(&path);
+ if (retry_estale(ret, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
+ }
+ }
+
+ io_xattr_finish(req, ret);
+ return 0;
+}
+
+static int __io_setxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_xattr *ix = &req->xattr;
+ const char __user *name;
+ int ret;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (unlikely(sqe->ioprio))
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ ix->filename = NULL;
+ name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ ix->ctx.kvalue = NULL;
+ ix->ctx.size = READ_ONCE(sqe->len);
+ ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+ ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+ if (!ix->ctx.kname)
+ return -ENOMEM;
+
+ ret = setxattr_copy(name, &ix->ctx);
+ if (ret) {
+ kfree(ix->ctx.kname);
+ return ret;
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_setxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_xattr *ix = &req->xattr;
+ const char __user *path;
+ int ret;
+
+ ret = __io_setxattr_prep(req, sqe);
+ if (ret)
+ return ret;
+
+ path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+ ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+ if (IS_ERR(ix->filename)) {
+ ret = PTR_ERR(ix->filename);
+ ix->filename = NULL;
+ }
+
+ return ret;
+}
+
+static int io_fsetxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return __io_setxattr_prep(req, sqe);
+}
+
+static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
+ struct path *path)
+{
+ struct io_xattr *ix = &req->xattr;
+ int ret;
+
+ ret = mnt_want_write(path->mnt);
+ if (!ret) {
+ ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx);
+ mnt_drop_write(path->mnt);
+ }
+
+ return ret;
+}
+
+static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = __io_setxattr(req, issue_flags, &req->file->f_path);
+ io_xattr_finish(req, ret);
+
+ return 0;
+}
+
+static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_xattr *ix = &req->xattr;
+ unsigned int lookup_flags = LOOKUP_FOLLOW;
+ struct path path;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+retry:
+ ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
+ if (!ret) {
+ ret = __io_setxattr(req, issue_flags, &path);
+ path_put(&path);
+ if (retry_estale(ret, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
+ }
+ }
+
+ io_xattr_finish(req, ret);
+ return 0;
+}
+
static int io_unlinkat_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -5760,6 +6142,62 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_socket *sock = &req->sock;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index)
+ return -EINVAL;
+
+ sock->domain = READ_ONCE(sqe->fd);
+ sock->type = READ_ONCE(sqe->off);
+ sock->protocol = READ_ONCE(sqe->len);
+ sock->file_slot = READ_ONCE(sqe->file_index);
+ sock->nofile = rlimit(RLIMIT_NOFILE);
+
+ sock->flags = sock->type & ~SOCK_TYPE_MASK;
+ if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
+ return -EINVAL;
+ if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+ return 0;
+}
+
+static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_socket *sock = &req->sock;
+ bool fixed = !!sock->file_slot;
+ struct file *file;
+ int ret, fd;
+
+ if (!fixed) {
+ fd = __get_unused_fd_flags(sock->flags, sock->nofile);
+ if (unlikely(fd < 0))
+ return fd;
+ }
+ file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
+ if (IS_ERR(file)) {
+ if (!fixed)
+ put_unused_fd(fd);
+ ret = PTR_ERR(file);
+ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+ return -EAGAIN;
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
+ } else if (!fixed) {
+ fd_install(fd, file);
+ ret = fd;
+ } else {
+ ret = io_install_fixed_file(req, file, issue_flags,
+ sock->file_slot - 1);
+ }
+ __io_req_complete(req, issue_flags, ret, 0);
+ return 0;
+}
+
static int io_connect_prep_async(struct io_kiocb *req)
{
struct io_async_connect *io = req->async_data;
@@ -5845,6 +6283,7 @@ IO_NETOP_PREP_ASYNC(sendmsg);
IO_NETOP_PREP_ASYNC(recvmsg);
IO_NETOP_PREP_ASYNC(connect);
IO_NETOP_PREP(accept);
+IO_NETOP_PREP(socket);
IO_NETOP_FN(send);
IO_NETOP_FN(recv);
#endif /* CONFIG_NET */
@@ -7147,6 +7586,16 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_linkat_prep(req, sqe);
case IORING_OP_MSG_RING:
return io_msg_ring_prep(req, sqe);
+ case IORING_OP_FSETXATTR:
+ return io_fsetxattr_prep(req, sqe);
+ case IORING_OP_SETXATTR:
+ return io_setxattr_prep(req, sqe);
+ case IORING_OP_FGETXATTR:
+ return io_fgetxattr_prep(req, sqe);
+ case IORING_OP_GETXATTR:
+ return io_getxattr_prep(req, sqe);
+ case IORING_OP_SOCKET:
+ return io_socket_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7296,6 +7745,12 @@ static void io_clean_op(struct io_kiocb *req)
if (req->statx.filename)
putname(req->statx.filename);
break;
+ case IORING_OP_SETXATTR:
+ case IORING_OP_FSETXATTR:
+ case IORING_OP_GETXATTR:
+ case IORING_OP_FGETXATTR:
+ __io_xattr_finish(req);
+ break;
}
}
if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -7452,6 +7907,21 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_MSG_RING:
ret = io_msg_ring(req, issue_flags);
break;
+ case IORING_OP_FSETXATTR:
+ ret = io_fsetxattr(req, issue_flags);
+ break;
+ case IORING_OP_SETXATTR:
+ ret = io_setxattr(req, issue_flags);
+ break;
+ case IORING_OP_FGETXATTR:
+ ret = io_fgetxattr(req, issue_flags);
+ break;
+ case IORING_OP_GETXATTR:
+ ret = io_getxattr(req, issue_flags);
+ break;
+ case IORING_OP_SOCKET:
+ ret = io_socket(req, issue_flags);
+ break;
default:
ret = -EINVAL;
break;
@@ -12025,6 +12495,7 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
BUILD_BUG_SQE_ELEM(44, __u32, file_index);
+ BUILD_BUG_SQE_ELEM(48, __u64, addr3);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
sizeof(struct io_uring_rsrc_update));
diff --git a/fs/xattr.c b/fs/xattr.c
index 998045165916..e8dd03e4561e 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -25,6 +25,8 @@
#include <linux/uaccess.h>
+#include "internal.h"
+
static const char *
strcmp_prefix(const char *a, const char *a_prefix)
{
@@ -539,44 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
/*
* Extended attribute SET operations
*/
-static long
-setxattr(struct user_namespace *mnt_userns, struct dentry *d,
- const char __user *name, const void __user *value, size_t size,
- int flags)
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
{
int error;
- void *kvalue = NULL;
- char kname[XATTR_NAME_MAX + 1];
- if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
+ if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
return -EINVAL;
- error = strncpy_from_user(kname, name, sizeof(kname));
- if (error == 0 || error == sizeof(kname))
- error = -ERANGE;
+ error = strncpy_from_user(ctx->kname->name, name,
+ sizeof(ctx->kname->name));
+ if (error == 0 || error == sizeof(ctx->kname->name))
+ return -ERANGE;
if (error < 0)
return error;
- if (size) {
- if (size > XATTR_SIZE_MAX)
+ error = 0;
+ if (ctx->size) {
+ if (ctx->size > XATTR_SIZE_MAX)
return -E2BIG;
- kvalue = kvmalloc(size, GFP_KERNEL);
- if (!kvalue)
- return -ENOMEM;
- if (copy_from_user(kvalue, value, size)) {
- error = -EFAULT;
- goto out;
+
+ ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
+ if (IS_ERR(ctx->kvalue)) {
+ error = PTR_ERR(ctx->kvalue);
+ ctx->kvalue = NULL;
}
- if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
- (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
- kvalue, size);
}
- error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
-out:
- kvfree(kvalue);
+ return error;
+}
+
+static void setxattr_convert(struct user_namespace *mnt_userns,
+ struct dentry *d, struct xattr_ctx *ctx)
+{
+ if (ctx->size &&
+ ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+ (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
+ posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
+ ctx->kvalue, ctx->size);
+}
+
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct xattr_ctx *ctx)
+{
+ setxattr_convert(mnt_userns, dentry, ctx);
+ return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
+ ctx->kvalue, ctx->size, ctx->flags);
+}
+
+static long
+setxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ const char __user *name, const void __user *value, size_t size,
+ int flags)
+{
+ struct xattr_name kname;
+ struct xattr_ctx ctx = {
+ .cvalue = value,
+ .kvalue = NULL,
+ .size = size,
+ .kname = &kname,
+ .flags = flags,
+ };
+ int error;
+ error = setxattr_copy(name, &ctx);
+ if (error)
+ return error;
+
+ error = do_setxattr(mnt_userns, d, &ctx);
+
+ kvfree(ctx.kvalue);
return error;
}
@@ -642,44 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
/*
* Extended attribute GET operations
*/
-static ssize_t
-getxattr(struct user_namespace *mnt_userns, struct dentry *d,
- const char __user *name, void __user *value, size_t size)
+ssize_t
+do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ struct xattr_ctx *ctx)
{
ssize_t error;
- void *kvalue = NULL;
- char kname[XATTR_NAME_MAX + 1];
-
- error = strncpy_from_user(kname, name, sizeof(kname));
- if (error == 0 || error == sizeof(kname))
- error = -ERANGE;
- if (error < 0)
- return error;
+ char *kname = ctx->kname->name;
- if (size) {
- if (size > XATTR_SIZE_MAX)
- size = XATTR_SIZE_MAX;
- kvalue = kvzalloc(size, GFP_KERNEL);
- if (!kvalue)
+ if (ctx->size) {
+ if (ctx->size > XATTR_SIZE_MAX)
+ ctx->size = XATTR_SIZE_MAX;
+ ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
+ if (!ctx->kvalue)
return -ENOMEM;
}
- error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
+ error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
- kvalue, error);
- if (size && copy_to_user(value, kvalue, error))
+ ctx->kvalue, error);
+ if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT;
- } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
+ } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
/* The file system tried to returned a value bigger
than XATTR_SIZE_MAX bytes. Not possible. */
error = -E2BIG;
}
- kvfree(kvalue);
+ return error;
+}
+
+static ssize_t
+getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ const char __user *name, void __user *value, size_t size)
+{
+ ssize_t error;
+ struct xattr_name kname;
+ struct xattr_ctx ctx = {
+ .value = value,
+ .kvalue = NULL,
+ .size = size,
+ .kname = &kname,
+ .flags = 0,
+ };
+
+ error = strncpy_from_user(kname.name, name, sizeof(kname.name));
+ if (error == 0 || error == sizeof(kname.name))
+ error = -ERANGE;
+ if (error < 0)
+ return error;
+
+ error = do_getxattr(mnt_userns, d, &ctx);
+ kvfree(ctx.kvalue);
return error;
}
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 1814e698d861..24651c229ed2 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -10,6 +10,7 @@ struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void);
+const char *io_uring_get_opcode(u8 opcode);
static inline void io_uring_files_cancel(void)
{
@@ -42,6 +43,10 @@ static inline void io_uring_files_cancel(void)
static inline void io_uring_free(struct task_struct *tsk)
{
}
+static inline const char *io_uring_get_opcode(u8 opcode)
+{
+ return "";
+}
#endif
#endif
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6f85f5d957ef..a1882e1e71d2 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -434,6 +434,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
+extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags);
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 42534ec2ab9d..3f2961baebcc 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -7,6 +7,7 @@
#include <linux/tracepoint.h>
#include <uapi/linux/io_uring.h>
+#include <linux/io_uring.h>
struct io_wq_work;
@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work,
__entry->rw = rw;
),
- TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
- __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
+ __entry->ctx, __entry->req, __entry->user_data,
+ io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
);
@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer,
__entry->opcode = opcode;
),
- TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
- __entry->ctx, __entry->req, __entry->data, __entry->opcode)
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
+ __entry->ctx, __entry->req, __entry->data,
+ io_uring_get_opcode(__entry->opcode))
);
/**
@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link,
__entry->link = link;
),
- TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
- __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
- __entry->link)
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
+ __entry->ctx, __entry->req, __entry->user_data,
+ io_uring_get_opcode(__entry->opcode), __entry->link)
);
/**
@@ -389,9 +392,9 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->sq_thread = sq_thread;
),
- TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
- __entry->user_data, __entry->opcode,
+ __entry->user_data, io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
);
@@ -433,8 +436,9 @@ TRACE_EVENT(io_uring_poll_arm,
__entry->events = events;
),
- TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
- __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
+ __entry->ctx, __entry->req, __entry->user_data,
+ io_uring_get_opcode(__entry->opcode),
__entry->mask, __entry->events)
);
@@ -470,8 +474,9 @@ TRACE_EVENT(io_uring_task_add,
__entry->mask = mask;
),
- TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
- __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
+ __entry->ctx, __entry->req, __entry->user_data,
+ io_uring_get_opcode(__entry->opcode),
__entry->mask)
);
@@ -506,7 +511,7 @@ TRACE_EVENT(io_uring_req_failed,
__field( u16, personality )
__field( u32, file_index )
__field( u64, pad1 )
- __field( u64, pad2 )
+ __field( u64, addr3 )
__field( int, error )
),
@@ -525,22 +530,24 @@ TRACE_EVENT(io_uring_req_failed,
__entry->personality = sqe->personality;
__entry->file_index = sqe->file_index;
__entry->pad1 = sqe->__pad2[0];
- __entry->pad2 = sqe->__pad2[1];
+ __entry->addr3 = sqe->addr3;
__entry->error = error;
),
TP_printk("ring %p, req %p, user_data 0x%llx, "
- "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
+ "opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
"len=%u, rw_flags=0x%x, buf_index=%d, "
- "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+ "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
+ "error=%d",
__entry->ctx, __entry->req, __entry->user_data,
- __entry->opcode, __entry->flags, __entry->ioprio,
+ io_uring_get_opcode(__entry->opcode),
+ __entry->flags, __entry->ioprio,
(unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len,
__entry->op_flags,
__entry->buf_index, __entry->personality, __entry->file_index,
(unsigned long long) __entry->pad1,
- (unsigned long long) __entry->pad2, __entry->error)
+ (unsigned long long) __entry->addr3, __entry->error)
);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 06621a278cb6..31e719f38615 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -45,6 +45,7 @@ struct io_uring_sqe {
__u32 rename_flags;
__u32 unlink_flags;
__u32 hardlink_flags;
+ __u32 xattr_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -60,7 +61,8 @@ struct io_uring_sqe {
__s32 splice_fd_in;
__u32 file_index;
};
- __u64 __pad2[2];
+ __u64 addr3;
+ __u64 __pad2[1];
};
enum {
@@ -117,7 +119,7 @@ enum {
*/
#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
-enum {
+enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
IORING_OP_WRITEV,
@@ -159,6 +161,11 @@ enum {
IORING_OP_SYMLINKAT,
IORING_OP_LINKAT,
IORING_OP_MSG_RING,
+ IORING_OP_FSETXATTR,
+ IORING_OP_SETXATTR,
+ IORING_OP_FGETXATTR,
+ IORING_OP_GETXATTR,
+ IORING_OP_SOCKET,
/* this goes last, obviously */
IORING_OP_LAST,
diff --git a/net/socket.c b/net/socket.c
index 6887840682bb..bb6a1a12fbde 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
struct socket *sock_from_file(struct file *file)
{
if (file->f_op == &socket_file_ops)
- return file->private_data; /* set in sock_map_fd */
+ return file->private_data; /* set in sock_alloc_file */
return NULL;
}
@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
}
EXPORT_SYMBOL(sock_create_kern);
-int __sys_socket(int family, int type, int protocol)
+static struct socket *__sys_socket_create(int family, int type, int protocol)
{
- int retval;
struct socket *sock;
- int flags;
+ int retval;
/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol)
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
- flags = type & ~SOCK_TYPE_MASK;
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
+ if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return ERR_PTR(-EINVAL);
type &= SOCK_TYPE_MASK;
+ retval = sock_create(family, type, protocol, &sock);
+ if (retval < 0)
+ return ERR_PTR(retval);
+
+ return sock;
+}
+
+struct file *__sys_socket_file(int family, int type, int protocol)
+{
+ struct socket *sock;
+ struct file *file;
+ int flags;
+
+ sock = __sys_socket_create(family, type, protocol);
+ if (IS_ERR(sock))
+ return ERR_CAST(sock);
+
+ flags = type & ~SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
- retval = sock_create(family, type, protocol, &sock);
- if (retval < 0)
- return retval;
+ file = sock_alloc_file(sock, flags, NULL);
+ if (IS_ERR(file))
+ sock_release(sock);
+
+ return file;
+}
+
+int __sys_socket(int family, int type, int protocol)
+{
+ struct socket *sock;
+ int flags;
+
+ sock = __sys_socket_create(family, type, protocol);
+ if (IS_ERR(sock))
+ return PTR_ERR(sock);
+
+ flags = type & ~SOCK_TYPE_MASK;
+ if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+ flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}