summaryrefslogtreecommitdiff
path: root/io_uring/msg_ring.c
blob: 976c4ba68ee7ec07052d0a5473a487ed8a1c9822 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/nospec.h>
#include <linux/io_uring.h>

#include <uapi/linux/io_uring.h>

#include "io_uring.h"
#include "rsrc.h"
#include "filetable.h"
#include "msg_ring.h"

struct io_msg {
	struct file			*file;
	u64 user_data;
	u32 len;
	u32 cmd;
	u32 src_fd;
	u32 dst_fd;
	u32 flags;
};

static int io_msg_ring_data(struct io_kiocb *req)
{
	struct io_ring_ctx *target_ctx = req->file->private_data;
	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);

	if (msg->src_fd || msg->dst_fd || msg->flags)
		return -EINVAL;

	if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true))
		return 0;

	return -EOVERFLOW;
}

static void io_double_unlock_ctx(struct io_ring_ctx *ctx,
				 struct io_ring_ctx *octx,
				 unsigned int issue_flags)
{
	if (issue_flags & IO_URING_F_UNLOCKED)
		mutex_unlock(&ctx->uring_lock);
	mutex_unlock(&octx->uring_lock);
}

static int io_double_lock_ctx(struct io_ring_ctx *ctx,
			      struct io_ring_ctx *octx,
			      unsigned int issue_flags)
{
	/*
	 * To ensure proper ordering between the two ctxs, we can only
	 * attempt a trylock on the target. If that fails and we already have
	 * the source ctx lock, punt to io-wq.
	 */
	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
		if (!mutex_trylock(&octx->uring_lock))
			return -EAGAIN;
		return 0;
	}

	/* Always grab smallest value ctx first. We know ctx != octx. */
	if (ctx < octx) {
		mutex_lock(&ctx->uring_lock);
		mutex_lock(&octx->uring_lock);
	} else {
		mutex_lock(&octx->uring_lock);
		mutex_lock(&ctx->uring_lock);
	}

	return 0;
}

static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
{
	struct io_ring_ctx *target_ctx = req->file->private_data;
	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
	struct io_ring_ctx *ctx = req->ctx;
	unsigned long file_ptr;
	struct file *src_file;
	int ret;

	if (target_ctx == ctx)
		return -EINVAL;

	ret = io_double_lock_ctx(ctx, target_ctx, issue_flags);
	if (unlikely(ret))
		return ret;

	ret = -EBADF;
	if (unlikely(msg->src_fd >= ctx->nr_user_files))
		goto out_unlock;

	msg->src_fd = array_index_nospec(msg->src_fd, ctx->nr_user_files);
	file_ptr = io_fixed_file_slot(&ctx->file_table, msg->src_fd)->file_ptr;
	src_file = (struct file *) (file_ptr & FFS_MASK);
	get_file(src_file);

	ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
	if (ret < 0) {
		fput(src_file);
		goto out_unlock;
	}

	if (msg->flags & IORING_MSG_RING_CQE_SKIP)
		goto out_unlock;

	/*
	 * If this fails, the target still received the file descriptor but
	 * wasn't notified of the fact. This means that if this request
	 * completes with -EOVERFLOW, then the sender must ensure that a
	 * later IORING_OP_MSG_RING delivers the message.
	 */
	if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true))
		ret = -EOVERFLOW;
out_unlock:
	io_double_unlock_ctx(ctx, target_ctx, issue_flags);
	return ret;
}

int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);

	if (unlikely(sqe->buf_index || sqe->personality))
		return -EINVAL;

	msg->user_data = READ_ONCE(sqe->off);
	msg->len = READ_ONCE(sqe->len);
	msg->cmd = READ_ONCE(sqe->addr);
	msg->src_fd = READ_ONCE(sqe->addr3);
	msg->dst_fd = READ_ONCE(sqe->file_index);
	msg->flags = READ_ONCE(sqe->msg_ring_flags);
	if (msg->flags & ~IORING_MSG_RING_CQE_SKIP)
		return -EINVAL;

	return 0;
}

int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
{
	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
	int ret;

	ret = -EBADFD;
	if (!io_is_uring_fops(req->file))
		goto done;

	switch (msg->cmd) {
	case IORING_MSG_DATA:
		ret = io_msg_ring_data(req);
		break;
	case IORING_MSG_SEND_FD:
		ret = io_msg_send_fd(req, issue_flags);
		break;
	default:
		ret = -EINVAL;
		break;
	}

done:
	if (ret < 0)
		req_set_fail(req);
	io_req_set_res(req, ret, 0);
	/* put file to avoid an attempt to IOPOLL the req */
	io_put_file(req->file);
	req->file = NULL;
	return IOU_OK;
}