136404b09SJens Axboe // SPDX-License-Identifier: GPL-2.0
236404b09SJens Axboe #include <linux/kernel.h>
336404b09SJens Axboe #include <linux/errno.h>
436404b09SJens Axboe #include <linux/file.h>
536404b09SJens Axboe #include <linux/slab.h>
6e6130ebaSJens Axboe #include <linux/nospec.h>
736404b09SJens Axboe #include <linux/io_uring.h>
836404b09SJens Axboe
936404b09SJens Axboe #include <uapi/linux/io_uring.h>
1036404b09SJens Axboe
1136404b09SJens Axboe #include "io_uring.h"
12e6130ebaSJens Axboe #include "rsrc.h"
13e6130ebaSJens Axboe #include "filetable.h"
1450cf5f38SJens Axboe #include "alloc_cache.h"
1536404b09SJens Axboe #include "msg_ring.h"
1636404b09SJens Axboe
17cbeb47a7SBreno Leitao /* All valid masks for MSG_RING */
18cbeb47a7SBreno Leitao #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
19cbeb47a7SBreno Leitao IORING_MSG_RING_FLAGS_PASS)
20cbeb47a7SBreno Leitao
2136404b09SJens Axboe struct io_msg {
2236404b09SJens Axboe struct file *file;
2311373026SPavel Begunkov struct file *src_file;
246d043ee1SPavel Begunkov struct callback_head tw;
2536404b09SJens Axboe u64 user_data;
2636404b09SJens Axboe u32 len;
27e6130ebaSJens Axboe u32 cmd;
28e6130ebaSJens Axboe u32 src_fd;
29cbeb47a7SBreno Leitao union {
30e6130ebaSJens Axboe u32 dst_fd;
31cbeb47a7SBreno Leitao u32 cqe_flags;
32cbeb47a7SBreno Leitao };
33e6130ebaSJens Axboe u32 flags;
3436404b09SJens Axboe };
3536404b09SJens Axboe
io_double_unlock_ctx(struct io_ring_ctx * octx)36423d5081SJens Axboe static void io_double_unlock_ctx(struct io_ring_ctx *octx)
37423d5081SJens Axboe {
38423d5081SJens Axboe mutex_unlock(&octx->uring_lock);
39423d5081SJens Axboe }
40423d5081SJens Axboe
io_lock_external_ctx(struct io_ring_ctx * octx,unsigned int issue_flags)41b0e9570aSPavel Begunkov static int io_lock_external_ctx(struct io_ring_ctx *octx,
42423d5081SJens Axboe unsigned int issue_flags)
43423d5081SJens Axboe {
44423d5081SJens Axboe /*
45423d5081SJens Axboe * To ensure proper ordering between the two ctxs, we can only
46423d5081SJens Axboe * attempt a trylock on the target. If that fails and we already have
47423d5081SJens Axboe * the source ctx lock, punt to io-wq.
48423d5081SJens Axboe */
49423d5081SJens Axboe if (!(issue_flags & IO_URING_F_UNLOCKED)) {
50423d5081SJens Axboe if (!mutex_trylock(&octx->uring_lock))
51423d5081SJens Axboe return -EAGAIN;
52423d5081SJens Axboe return 0;
53423d5081SJens Axboe }
54423d5081SJens Axboe mutex_lock(&octx->uring_lock);
55423d5081SJens Axboe return 0;
56423d5081SJens Axboe }
57423d5081SJens Axboe
io_msg_ring_cleanup(struct io_kiocb * req)5811373026SPavel Begunkov void io_msg_ring_cleanup(struct io_kiocb *req)
5911373026SPavel Begunkov {
6011373026SPavel Begunkov struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
6111373026SPavel Begunkov
6211373026SPavel Begunkov if (WARN_ON_ONCE(!msg->src_file))
6311373026SPavel Begunkov return;
6411373026SPavel Begunkov
6511373026SPavel Begunkov fput(msg->src_file);
6611373026SPavel Begunkov msg->src_file = NULL;
6711373026SPavel Begunkov }
6811373026SPavel Begunkov
io_msg_need_remote(struct io_ring_ctx * target_ctx)6956d8e318SPavel Begunkov static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
7056d8e318SPavel Begunkov {
71d57afd8bSJens Axboe return target_ctx->task_complete;
7256d8e318SPavel Begunkov }
7356d8e318SPavel Begunkov
io_msg_tw_complete(struct io_kiocb * req,io_tw_token_t tw)74bcf8a029SCaleb Sander Mateos static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw)
7556d8e318SPavel Begunkov {
760617bb50SJens Axboe struct io_ring_ctx *ctx = req->ctx;
778579538cSPavel Begunkov
780617bb50SJens Axboe io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags);
7950cf5f38SJens Axboe if (spin_trylock(&ctx->msg_lock)) {
8050cf5f38SJens Axboe if (io_alloc_cache_put(&ctx->msg_cache, req))
8150cf5f38SJens Axboe req = NULL;
8250cf5f38SJens Axboe spin_unlock(&ctx->msg_lock);
8350cf5f38SJens Axboe }
8450cf5f38SJens Axboe if (req)
85be4f5d9cSJens Axboe kmem_cache_free(req_cachep, req);
860617bb50SJens Axboe percpu_ref_put(&ctx->refs);
8756d8e318SPavel Begunkov }
8856d8e318SPavel Begunkov
io_msg_remote_post(struct io_ring_ctx * ctx,struct io_kiocb * req,int res,u32 cflags,u64 user_data)89b0727b12SJens Axboe static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
900617bb50SJens Axboe int res, u32 cflags, u64 user_data)
916d043ee1SPavel Begunkov {
9269a62e03SJens Axboe if (!READ_ONCE(ctx->submitter_task)) {
93b0727b12SJens Axboe kmem_cache_free(req_cachep, req);
94b0727b12SJens Axboe return -EOWNERDEAD;
95b0727b12SJens Axboe }
969cc0bbdaSPavel Begunkov req->opcode = IORING_OP_NOP;
970617bb50SJens Axboe req->cqe.user_data = user_data;
980617bb50SJens Axboe io_req_set_res(req, res, cflags);
990617bb50SJens Axboe percpu_ref_get(&ctx->refs);
1000617bb50SJens Axboe req->ctx = ctx;
10169a62e03SJens Axboe req->tctx = NULL;
1020617bb50SJens Axboe req->io_task_work.func = io_msg_tw_complete;
103*ea910678SPavel Begunkov io_req_task_work_add_remote(req, IOU_F_TWQ_LAZY_WAKE);
104b0727b12SJens Axboe return 0;
1050617bb50SJens Axboe }
1066d043ee1SPavel Begunkov
io_msg_get_kiocb(struct io_ring_ctx * ctx)10750cf5f38SJens Axboe static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
10850cf5f38SJens Axboe {
10950cf5f38SJens Axboe struct io_kiocb *req = NULL;
11050cf5f38SJens Axboe
11150cf5f38SJens Axboe if (spin_trylock(&ctx->msg_lock)) {
11250cf5f38SJens Axboe req = io_alloc_cache_get(&ctx->msg_cache);
11350cf5f38SJens Axboe spin_unlock(&ctx->msg_lock);
11450cf5f38SJens Axboe if (req)
11550cf5f38SJens Axboe return req;
1160db4618eSJens Axboe }
1170db4618eSJens Axboe return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
11850cf5f38SJens Axboe }
11950cf5f38SJens Axboe
io_msg_data_remote(struct io_ring_ctx * target_ctx,struct io_msg * msg)12095d6c922SJens Axboe static int io_msg_data_remote(struct io_ring_ctx *target_ctx,
12195d6c922SJens Axboe struct io_msg *msg)
1220617bb50SJens Axboe {
1230617bb50SJens Axboe struct io_kiocb *target;
1248572df94SJens Axboe u32 flags = 0;
1258572df94SJens Axboe
12695d6c922SJens Axboe target = io_msg_get_kiocb(target_ctx);
1270617bb50SJens Axboe if (unlikely(!target))
1280617bb50SJens Axboe return -ENOMEM;
1290617bb50SJens Axboe
1308572df94SJens Axboe if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
1318572df94SJens Axboe flags = msg->cqe_flags;
1328572df94SJens Axboe
133b0727b12SJens Axboe return io_msg_remote_post(target_ctx, target, msg->len, flags,
134b0727b12SJens Axboe msg->user_data);
1356d043ee1SPavel Begunkov }
1366d043ee1SPavel Begunkov
__io_msg_ring_data(struct io_ring_ctx * target_ctx,struct io_msg * msg,unsigned int issue_flags)13795d6c922SJens Axboe static int __io_msg_ring_data(struct io_ring_ctx *target_ctx,
13895d6c922SJens Axboe struct io_msg *msg, unsigned int issue_flags)
139e6130ebaSJens Axboe {
140cbeb47a7SBreno Leitao u32 flags = 0;
141e12d7a46SJens Axboe int ret;
142e6130ebaSJens Axboe
143cbeb47a7SBreno Leitao if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)
144cbeb47a7SBreno Leitao return -EINVAL;
145cbeb47a7SBreno Leitao if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
146e6130ebaSJens Axboe return -EINVAL;
1478579538cSPavel Begunkov if (target_ctx->flags & IORING_SETUP_R_DISABLED)
1488579538cSPavel Begunkov return -EBADFD;
149e6130ebaSJens Axboe
15056d8e318SPavel Begunkov if (io_msg_need_remote(target_ctx))
15195d6c922SJens Axboe return io_msg_data_remote(target_ctx, msg);
1526d043ee1SPavel Begunkov
153cbeb47a7SBreno Leitao if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
154cbeb47a7SBreno Leitao flags = msg->cqe_flags;
155cbeb47a7SBreno Leitao
156e12d7a46SJens Axboe ret = -EOVERFLOW;
157e12d7a46SJens Axboe if (target_ctx->flags & IORING_SETUP_IOPOLL) {
158b0e9570aSPavel Begunkov if (unlikely(io_lock_external_ctx(target_ctx, issue_flags)))
159e12d7a46SJens Axboe return -EAGAIN;
160e12d7a46SJens Axboe }
16159b28a6eSJens Axboe if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
16259b28a6eSJens Axboe ret = 0;
16359b28a6eSJens Axboe if (target_ctx->flags & IORING_SETUP_IOPOLL)
16459b28a6eSJens Axboe io_double_unlock_ctx(target_ctx);
165e12d7a46SJens Axboe return ret;
166e6130ebaSJens Axboe }
167e6130ebaSJens Axboe
io_msg_ring_data(struct io_kiocb * req,unsigned int issue_flags)16895d6c922SJens Axboe static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
16995d6c922SJens Axboe {
17095d6c922SJens Axboe struct io_ring_ctx *target_ctx = req->file->private_data;
17195d6c922SJens Axboe struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
17295d6c922SJens Axboe
17395d6c922SJens Axboe return __io_msg_ring_data(target_ctx, msg, issue_flags);
17495d6c922SJens Axboe }
17595d6c922SJens Axboe
io_msg_grab_file(struct io_kiocb * req,unsigned int issue_flags)176b54a1404SJens Axboe static int io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
17711373026SPavel Begunkov {
17811373026SPavel Begunkov struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
17911373026SPavel Begunkov struct io_ring_ctx *ctx = req->ctx;
180b54a1404SJens Axboe struct io_rsrc_node *node;
181b54a1404SJens Axboe int ret = -EBADF;
18211373026SPavel Begunkov
18311373026SPavel Begunkov io_ring_submit_lock(ctx, issue_flags);
184b54a1404SJens Axboe node = io_rsrc_node_lookup(&ctx->file_table.data, msg->src_fd);
185b54a1404SJens Axboe if (node) {
186b54a1404SJens Axboe msg->src_file = io_slot_file(node);
187b54a1404SJens Axboe if (msg->src_file)
188b54a1404SJens Axboe get_file(msg->src_file);
189b54a1404SJens Axboe req->flags |= REQ_F_NEED_CLEANUP;
190b54a1404SJens Axboe ret = 0;
19111373026SPavel Begunkov }
19211373026SPavel Begunkov io_ring_submit_unlock(ctx, issue_flags);
193b54a1404SJens Axboe return ret;
194e6130ebaSJens Axboe }
195e6130ebaSJens Axboe
io_msg_install_complete(struct io_kiocb * req,unsigned int issue_flags)19617211310SPavel Begunkov static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
197e6130ebaSJens Axboe {
198e6130ebaSJens Axboe struct io_ring_ctx *target_ctx = req->file->private_data;
199f2ccb5aeSStefan Metzmacher struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
20011373026SPavel Begunkov struct file *src_file = msg->src_file;
201e6130ebaSJens Axboe int ret;
202e6130ebaSJens Axboe
203b0e9570aSPavel Begunkov if (unlikely(io_lock_external_ctx(target_ctx, issue_flags)))
20411373026SPavel Begunkov return -EAGAIN;
205e6130ebaSJens Axboe
206e6130ebaSJens Axboe ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
20711373026SPavel Begunkov if (ret < 0)
208e6130ebaSJens Axboe goto out_unlock;
20917211310SPavel Begunkov
21011373026SPavel Begunkov msg->src_file = NULL;
21111373026SPavel Begunkov req->flags &= ~REQ_F_NEED_CLEANUP;
212e6130ebaSJens Axboe
213e6130ebaSJens Axboe if (msg->flags & IORING_MSG_RING_CQE_SKIP)
214e6130ebaSJens Axboe goto out_unlock;
215e6130ebaSJens Axboe /*
216e6130ebaSJens Axboe * If this fails, the target still received the file descriptor but
217e6130ebaSJens Axboe * wasn't notified of the fact. This means that if this request
218e6130ebaSJens Axboe * completes with -EOVERFLOW, then the sender must ensure that a
219e6130ebaSJens Axboe * later IORING_OP_MSG_RING delivers the message.
220e6130ebaSJens Axboe */
2215da28eddSPavel Begunkov if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
222e6130ebaSJens Axboe ret = -EOVERFLOW;
223e6130ebaSJens Axboe out_unlock:
224423d5081SJens Axboe io_double_unlock_ctx(target_ctx);
225e6130ebaSJens Axboe return ret;
226e6130ebaSJens Axboe }
227e6130ebaSJens Axboe
io_msg_tw_fd_complete(struct callback_head * head)2286d043ee1SPavel Begunkov static void io_msg_tw_fd_complete(struct callback_head *head)
2296d043ee1SPavel Begunkov {
2306d043ee1SPavel Begunkov struct io_msg *msg = container_of(head, struct io_msg, tw);
2316d043ee1SPavel Begunkov struct io_kiocb *req = cmd_to_io_kiocb(msg);
2326d043ee1SPavel Begunkov int ret = -EOWNERDEAD;
2336d043ee1SPavel Begunkov
2346d043ee1SPavel Begunkov if (!(current->flags & PF_EXITING))
2356d043ee1SPavel Begunkov ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED);
2366d043ee1SPavel Begunkov if (ret < 0)
2376d043ee1SPavel Begunkov req_set_fail(req);
2386d043ee1SPavel Begunkov io_req_queue_tw_complete(req, ret);
2396d043ee1SPavel Begunkov }
2406d043ee1SPavel Begunkov
io_msg_fd_remote(struct io_kiocb * req)2410617bb50SJens Axboe static int io_msg_fd_remote(struct io_kiocb *req)
2420617bb50SJens Axboe {
2430617bb50SJens Axboe struct io_ring_ctx *ctx = req->file->private_data;
2440617bb50SJens Axboe struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
2450617bb50SJens Axboe struct task_struct *task = READ_ONCE(ctx->submitter_task);
2460617bb50SJens Axboe
2470617bb50SJens Axboe if (unlikely(!task))
2480617bb50SJens Axboe return -EOWNERDEAD;
2490617bb50SJens Axboe
2500617bb50SJens Axboe init_task_work(&msg->tw, io_msg_tw_fd_complete);
2510617bb50SJens Axboe if (task_work_add(task, &msg->tw, TWA_SIGNAL))
2520617bb50SJens Axboe return -EOWNERDEAD;
2530617bb50SJens Axboe
2540617bb50SJens Axboe return IOU_ISSUE_SKIP_COMPLETE;
2550617bb50SJens Axboe }
2560617bb50SJens Axboe
io_msg_send_fd(struct io_kiocb * req,unsigned int issue_flags)25717211310SPavel Begunkov static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
25817211310SPavel Begunkov {
25917211310SPavel Begunkov struct io_ring_ctx *target_ctx = req->file->private_data;
26017211310SPavel Begunkov struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
26117211310SPavel Begunkov struct io_ring_ctx *ctx = req->ctx;
26217211310SPavel Begunkov
2635da28eddSPavel Begunkov if (msg->len)
2645da28eddSPavel Begunkov return -EINVAL;
26517211310SPavel Begunkov if (target_ctx == ctx)
26617211310SPavel Begunkov return -EINVAL;
2678579538cSPavel Begunkov if (target_ctx->flags & IORING_SETUP_R_DISABLED)
2688579538cSPavel Begunkov return -EBADFD;
269b54a1404SJens Axboe if (!msg->src_file) {
270b54a1404SJens Axboe int ret = io_msg_grab_file(req, issue_flags);
271b54a1404SJens Axboe if (unlikely(ret))
272b54a1404SJens Axboe return ret;
27317211310SPavel Begunkov }
2746d043ee1SPavel Begunkov
27556d8e318SPavel Begunkov if (io_msg_need_remote(target_ctx))
2760617bb50SJens Axboe return io_msg_fd_remote(req);
27717211310SPavel Begunkov return io_msg_install_complete(req, issue_flags);
27817211310SPavel Begunkov }
27917211310SPavel Begunkov
__io_msg_ring_prep(struct io_msg * msg,const struct io_uring_sqe * sqe)28095d6c922SJens Axboe static int __io_msg_ring_prep(struct io_msg *msg, const struct io_uring_sqe *sqe)
28136404b09SJens Axboe {
282e6130ebaSJens Axboe if (unlikely(sqe->buf_index || sqe->personality))
28336404b09SJens Axboe return -EINVAL;
28436404b09SJens Axboe
28511373026SPavel Begunkov msg->src_file = NULL;
28636404b09SJens Axboe msg->user_data = READ_ONCE(sqe->off);
28736404b09SJens Axboe msg->len = READ_ONCE(sqe->len);
288e6130ebaSJens Axboe msg->cmd = READ_ONCE(sqe->addr);
289e6130ebaSJens Axboe msg->src_fd = READ_ONCE(sqe->addr3);
290e6130ebaSJens Axboe msg->dst_fd = READ_ONCE(sqe->file_index);
291e6130ebaSJens Axboe msg->flags = READ_ONCE(sqe->msg_ring_flags);
292cbeb47a7SBreno Leitao if (msg->flags & ~IORING_MSG_RING_MASK)
293e6130ebaSJens Axboe return -EINVAL;
294e6130ebaSJens Axboe
29536404b09SJens Axboe return 0;
29636404b09SJens Axboe }
29736404b09SJens Axboe
io_msg_ring_prep(struct io_kiocb * req,const struct io_uring_sqe * sqe)29895d6c922SJens Axboe int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
29995d6c922SJens Axboe {
30095d6c922SJens Axboe return __io_msg_ring_prep(io_kiocb_to_cmd(req, struct io_msg), sqe);
30195d6c922SJens Axboe }
30295d6c922SJens Axboe
io_msg_ring(struct io_kiocb * req,unsigned int issue_flags)30336404b09SJens Axboe int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
30436404b09SJens Axboe {
305f2ccb5aeSStefan Metzmacher struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
30636404b09SJens Axboe int ret;
30736404b09SJens Axboe
30836404b09SJens Axboe ret = -EBADFD;
30936404b09SJens Axboe if (!io_is_uring_fops(req->file))
31036404b09SJens Axboe goto done;
31136404b09SJens Axboe
312e6130ebaSJens Axboe switch (msg->cmd) {
313e6130ebaSJens Axboe case IORING_MSG_DATA:
314e12d7a46SJens Axboe ret = io_msg_ring_data(req, issue_flags);
315e6130ebaSJens Axboe break;
316e6130ebaSJens Axboe case IORING_MSG_SEND_FD:
317e6130ebaSJens Axboe ret = io_msg_send_fd(req, issue_flags);
318e6130ebaSJens Axboe break;
319e6130ebaSJens Axboe default:
320e6130ebaSJens Axboe ret = -EINVAL;
321e6130ebaSJens Axboe break;
322e6130ebaSJens Axboe }
32336404b09SJens Axboe
32436404b09SJens Axboe done:
3256d043ee1SPavel Begunkov if (ret < 0) {
3266d043ee1SPavel Begunkov if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE)
3276d043ee1SPavel Begunkov return ret;
32836404b09SJens Axboe req_set_fail(req);
3296d043ee1SPavel Begunkov }
33036404b09SJens Axboe io_req_set_res(req, ret, 0);
33136404b09SJens Axboe return IOU_OK;
33236404b09SJens Axboe }
33350cf5f38SJens Axboe
io_uring_sync_msg_ring(struct io_uring_sqe * sqe)334a3771321SJens Axboe int io_uring_sync_msg_ring(struct io_uring_sqe *sqe)
335a3771321SJens Axboe {
336a3771321SJens Axboe struct io_msg io_msg = { };
337a3771321SJens Axboe int ret;
338a3771321SJens Axboe
339a3771321SJens Axboe ret = __io_msg_ring_prep(&io_msg, sqe);
340a3771321SJens Axboe if (unlikely(ret))
341a3771321SJens Axboe return ret;
342a3771321SJens Axboe
343a3771321SJens Axboe /*
344a3771321SJens Axboe * Only data sending supported, not IORING_MSG_SEND_FD as that one
345a3771321SJens Axboe * doesn't make sense without a source ring to send files from.
346a3771321SJens Axboe */
347a3771321SJens Axboe if (io_msg.cmd != IORING_MSG_DATA)
348a3771321SJens Axboe return -EINVAL;
349a3771321SJens Axboe
35056cec28dSAl Viro CLASS(fd, f)(sqe->fd);
35156cec28dSAl Viro if (fd_empty(f))
35256cec28dSAl Viro return -EBADF;
35356cec28dSAl Viro if (!io_is_uring_fops(fd_file(f)))
35456cec28dSAl Viro return -EBADFD;
35556cec28dSAl Viro return __io_msg_ring_data(fd_file(f)->private_data,
356a3771321SJens Axboe &io_msg, IO_URING_F_UNLOCKED);
357a3771321SJens Axboe }
358