xref: /linux-6.15/fs/fuse/dev_uring.c (revision 2d066800)
124fe962cSBernd Schubert // SPDX-License-Identifier: GPL-2.0
224fe962cSBernd Schubert /*
324fe962cSBernd Schubert  * FUSE: Filesystem in Userspace
424fe962cSBernd Schubert  * Copyright (c) 2023-2024 DataDirect Networks.
524fe962cSBernd Schubert  */
624fe962cSBernd Schubert 
724fe962cSBernd Schubert #include "fuse_i.h"
824fe962cSBernd Schubert #include "dev_uring_i.h"
924fe962cSBernd Schubert #include "fuse_dev_i.h"
1024fe962cSBernd Schubert 
1124fe962cSBernd Schubert #include <linux/fs.h>
1224fe962cSBernd Schubert #include <linux/io_uring/cmd.h>
1324fe962cSBernd Schubert 
1424fe962cSBernd Schubert static bool __read_mostly enable_uring;
1524fe962cSBernd Schubert module_param(enable_uring, bool, 0644);
1624fe962cSBernd Schubert MODULE_PARM_DESC(enable_uring,
1724fe962cSBernd Schubert 		 "Enable userspace communication through io-uring");
1824fe962cSBernd Schubert 
1924fe962cSBernd Schubert #define FUSE_URING_IOV_SEGS 2 /* header and payload */
2024fe962cSBernd Schubert 
2124fe962cSBernd Schubert 
fuse_uring_enabled(void)2224fe962cSBernd Schubert bool fuse_uring_enabled(void)
2324fe962cSBernd Schubert {
2424fe962cSBernd Schubert 	return enable_uring;
2524fe962cSBernd Schubert }
2624fe962cSBernd Schubert 
27c2c9af9aSBernd Schubert struct fuse_uring_pdu {
28c2c9af9aSBernd Schubert 	struct fuse_ring_ent *ent;
29c2c9af9aSBernd Schubert };
30c2c9af9aSBernd Schubert 
31c2c9af9aSBernd Schubert static const struct fuse_iqueue_ops fuse_io_uring_ops;
32c2c9af9aSBernd Schubert 
uring_cmd_set_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_ent * ring_ent)33c2c9af9aSBernd Schubert static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd,
34c2c9af9aSBernd Schubert 				   struct fuse_ring_ent *ring_ent)
35c2c9af9aSBernd Schubert {
36c2c9af9aSBernd Schubert 	struct fuse_uring_pdu *pdu =
37c2c9af9aSBernd Schubert 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
38c2c9af9aSBernd Schubert 
39c2c9af9aSBernd Schubert 	pdu->ent = ring_ent;
40c2c9af9aSBernd Schubert }
41c2c9af9aSBernd Schubert 
uring_cmd_to_ring_ent(struct io_uring_cmd * cmd)42c2c9af9aSBernd Schubert static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd)
43c2c9af9aSBernd Schubert {
44c2c9af9aSBernd Schubert 	struct fuse_uring_pdu *pdu =
45c2c9af9aSBernd Schubert 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
46c2c9af9aSBernd Schubert 
47c2c9af9aSBernd Schubert 	return pdu->ent;
48c2c9af9aSBernd Schubert }
49c2c9af9aSBernd Schubert 
fuse_uring_flush_bg(struct fuse_ring_queue * queue)50857b0263SBernd Schubert static void fuse_uring_flush_bg(struct fuse_ring_queue *queue)
51857b0263SBernd Schubert {
52857b0263SBernd Schubert 	struct fuse_ring *ring = queue->ring;
53857b0263SBernd Schubert 	struct fuse_conn *fc = ring->fc;
54857b0263SBernd Schubert 
55857b0263SBernd Schubert 	lockdep_assert_held(&queue->lock);
56857b0263SBernd Schubert 	lockdep_assert_held(&fc->bg_lock);
57857b0263SBernd Schubert 
58857b0263SBernd Schubert 	/*
59857b0263SBernd Schubert 	 * Allow one bg request per queue, ignoring global fc limits.
60857b0263SBernd Schubert 	 * This prevents a single queue from consuming all resources and
61857b0263SBernd Schubert 	 * eliminates the need for remote queue wake-ups when global
62857b0263SBernd Schubert 	 * limits are met but this queue has no more waiting requests.
63857b0263SBernd Schubert 	 */
64857b0263SBernd Schubert 	while ((fc->active_background < fc->max_background ||
65857b0263SBernd Schubert 		!queue->active_background) &&
66857b0263SBernd Schubert 	       (!list_empty(&queue->fuse_req_bg_queue))) {
67857b0263SBernd Schubert 		struct fuse_req *req;
68857b0263SBernd Schubert 
69857b0263SBernd Schubert 		req = list_first_entry(&queue->fuse_req_bg_queue,
70857b0263SBernd Schubert 				       struct fuse_req, list);
71857b0263SBernd Schubert 		fc->active_background++;
72857b0263SBernd Schubert 		queue->active_background++;
73857b0263SBernd Schubert 
74857b0263SBernd Schubert 		list_move_tail(&req->list, &queue->fuse_req_queue);
75857b0263SBernd Schubert 	}
76857b0263SBernd Schubert }
77857b0263SBernd Schubert 
fuse_uring_req_end(struct fuse_ring_ent * ent,struct fuse_req * req,int error)78c090c8abSBernd Schubert static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
79c090c8abSBernd Schubert 			       int error)
80c090c8abSBernd Schubert {
81857b0263SBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
82857b0263SBernd Schubert 	struct fuse_ring *ring = queue->ring;
83857b0263SBernd Schubert 	struct fuse_conn *fc = ring->fc;
84857b0263SBernd Schubert 
85857b0263SBernd Schubert 	lockdep_assert_not_held(&queue->lock);
86857b0263SBernd Schubert 	spin_lock(&queue->lock);
87c090c8abSBernd Schubert 	ent->fuse_req = NULL;
88857b0263SBernd Schubert 	if (test_bit(FR_BACKGROUND, &req->flags)) {
89857b0263SBernd Schubert 		queue->active_background--;
90857b0263SBernd Schubert 		spin_lock(&fc->bg_lock);
91857b0263SBernd Schubert 		fuse_uring_flush_bg(queue);
92857b0263SBernd Schubert 		spin_unlock(&fc->bg_lock);
93857b0263SBernd Schubert 	}
94857b0263SBernd Schubert 
95857b0263SBernd Schubert 	spin_unlock(&queue->lock);
96857b0263SBernd Schubert 
97c090c8abSBernd Schubert 	if (error)
98c090c8abSBernd Schubert 		req->out.h.error = error;
99c090c8abSBernd Schubert 
100c090c8abSBernd Schubert 	clear_bit(FR_SENT, &req->flags);
101c090c8abSBernd Schubert 	fuse_request_end(req);
102c090c8abSBernd Schubert }
103c090c8abSBernd Schubert 
1044a9bfb9bSBernd Schubert /* Abort all list queued request on the given ring queue */
fuse_uring_abort_end_queue_requests(struct fuse_ring_queue * queue)1054a9bfb9bSBernd Schubert static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue)
1064a9bfb9bSBernd Schubert {
1074a9bfb9bSBernd Schubert 	struct fuse_req *req;
1084a9bfb9bSBernd Schubert 	LIST_HEAD(req_list);
1094a9bfb9bSBernd Schubert 
1104a9bfb9bSBernd Schubert 	spin_lock(&queue->lock);
1114a9bfb9bSBernd Schubert 	list_for_each_entry(req, &queue->fuse_req_queue, list)
1124a9bfb9bSBernd Schubert 		clear_bit(FR_PENDING, &req->flags);
1134a9bfb9bSBernd Schubert 	list_splice_init(&queue->fuse_req_queue, &req_list);
1144a9bfb9bSBernd Schubert 	spin_unlock(&queue->lock);
1154a9bfb9bSBernd Schubert 
1164a9bfb9bSBernd Schubert 	/* must not hold queue lock to avoid order issues with fi->lock */
1174a9bfb9bSBernd Schubert 	fuse_dev_end_requests(&req_list);
1184a9bfb9bSBernd Schubert }
1194a9bfb9bSBernd Schubert 
fuse_uring_abort_end_requests(struct fuse_ring * ring)1204a9bfb9bSBernd Schubert void fuse_uring_abort_end_requests(struct fuse_ring *ring)
1214a9bfb9bSBernd Schubert {
1224a9bfb9bSBernd Schubert 	int qid;
1234a9bfb9bSBernd Schubert 	struct fuse_ring_queue *queue;
124857b0263SBernd Schubert 	struct fuse_conn *fc = ring->fc;
1254a9bfb9bSBernd Schubert 
1264a9bfb9bSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
1274a9bfb9bSBernd Schubert 		queue = READ_ONCE(ring->queues[qid]);
1284a9bfb9bSBernd Schubert 		if (!queue)
1294a9bfb9bSBernd Schubert 			continue;
1304a9bfb9bSBernd Schubert 
1314a9bfb9bSBernd Schubert 		queue->stopped = true;
132857b0263SBernd Schubert 
133857b0263SBernd Schubert 		WARN_ON_ONCE(ring->fc->max_background != UINT_MAX);
134857b0263SBernd Schubert 		spin_lock(&queue->lock);
135857b0263SBernd Schubert 		spin_lock(&fc->bg_lock);
136857b0263SBernd Schubert 		fuse_uring_flush_bg(queue);
137857b0263SBernd Schubert 		spin_unlock(&fc->bg_lock);
138857b0263SBernd Schubert 		spin_unlock(&queue->lock);
1394a9bfb9bSBernd Schubert 		fuse_uring_abort_end_queue_requests(queue);
1404a9bfb9bSBernd Schubert 	}
1414a9bfb9bSBernd Schubert }
1424a9bfb9bSBernd Schubert 
fuse_uring_request_expired(struct fuse_conn * fc)1430f6439f6SJoanne Koong bool fuse_uring_request_expired(struct fuse_conn *fc)
1440f6439f6SJoanne Koong {
1450f6439f6SJoanne Koong 	struct fuse_ring *ring = fc->ring;
1460f6439f6SJoanne Koong 	struct fuse_ring_queue *queue;
1470f6439f6SJoanne Koong 	int qid;
1480f6439f6SJoanne Koong 
1490f6439f6SJoanne Koong 	if (!ring)
1500f6439f6SJoanne Koong 		return false;
1510f6439f6SJoanne Koong 
1520f6439f6SJoanne Koong 	for (qid = 0; qid < ring->nr_queues; qid++) {
1530f6439f6SJoanne Koong 		queue = READ_ONCE(ring->queues[qid]);
1540f6439f6SJoanne Koong 		if (!queue)
1550f6439f6SJoanne Koong 			continue;
1560f6439f6SJoanne Koong 
1570f6439f6SJoanne Koong 		spin_lock(&queue->lock);
1580f6439f6SJoanne Koong 		if (fuse_request_expired(fc, &queue->fuse_req_queue) ||
1590f6439f6SJoanne Koong 		    fuse_request_expired(fc, &queue->fuse_req_bg_queue) ||
1600f6439f6SJoanne Koong 		    fuse_fpq_processing_expired(fc, queue->fpq.processing)) {
1610f6439f6SJoanne Koong 			spin_unlock(&queue->lock);
1620f6439f6SJoanne Koong 			return true;
1630f6439f6SJoanne Koong 		}
1640f6439f6SJoanne Koong 		spin_unlock(&queue->lock);
1650f6439f6SJoanne Koong 	}
1660f6439f6SJoanne Koong 
1670f6439f6SJoanne Koong 	return false;
1680f6439f6SJoanne Koong }
1690f6439f6SJoanne Koong 
fuse_uring_destruct(struct fuse_conn * fc)17024fe962cSBernd Schubert void fuse_uring_destruct(struct fuse_conn *fc)
17124fe962cSBernd Schubert {
17224fe962cSBernd Schubert 	struct fuse_ring *ring = fc->ring;
17324fe962cSBernd Schubert 	int qid;
17424fe962cSBernd Schubert 
17524fe962cSBernd Schubert 	if (!ring)
17624fe962cSBernd Schubert 		return;
17724fe962cSBernd Schubert 
17824fe962cSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
17924fe962cSBernd Schubert 		struct fuse_ring_queue *queue = ring->queues[qid];
180b6236c84SBernd Schubert 		struct fuse_ring_ent *ent, *next;
18124fe962cSBernd Schubert 
18224fe962cSBernd Schubert 		if (!queue)
18324fe962cSBernd Schubert 			continue;
18424fe962cSBernd Schubert 
18524fe962cSBernd Schubert 		WARN_ON(!list_empty(&queue->ent_avail_queue));
186c090c8abSBernd Schubert 		WARN_ON(!list_empty(&queue->ent_w_req_queue));
18724fe962cSBernd Schubert 		WARN_ON(!list_empty(&queue->ent_commit_queue));
188c090c8abSBernd Schubert 		WARN_ON(!list_empty(&queue->ent_in_userspace));
18924fe962cSBernd Schubert 
190b6236c84SBernd Schubert 		list_for_each_entry_safe(ent, next, &queue->ent_released,
191b6236c84SBernd Schubert 					 list) {
192b6236c84SBernd Schubert 			list_del_init(&ent->list);
193b6236c84SBernd Schubert 			kfree(ent);
194b6236c84SBernd Schubert 		}
195b6236c84SBernd Schubert 
196c090c8abSBernd Schubert 		kfree(queue->fpq.processing);
19724fe962cSBernd Schubert 		kfree(queue);
19824fe962cSBernd Schubert 		ring->queues[qid] = NULL;
19924fe962cSBernd Schubert 	}
20024fe962cSBernd Schubert 
20124fe962cSBernd Schubert 	kfree(ring->queues);
20224fe962cSBernd Schubert 	kfree(ring);
20324fe962cSBernd Schubert 	fc->ring = NULL;
20424fe962cSBernd Schubert }
20524fe962cSBernd Schubert 
20624fe962cSBernd Schubert /*
20724fe962cSBernd Schubert  * Basic ring setup for this connection based on the provided configuration
20824fe962cSBernd Schubert  */
fuse_uring_create(struct fuse_conn * fc)20924fe962cSBernd Schubert static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
21024fe962cSBernd Schubert {
21124fe962cSBernd Schubert 	struct fuse_ring *ring;
21224fe962cSBernd Schubert 	size_t nr_queues = num_possible_cpus();
21324fe962cSBernd Schubert 	struct fuse_ring *res = NULL;
21424fe962cSBernd Schubert 	size_t max_payload_size;
21524fe962cSBernd Schubert 
21624fe962cSBernd Schubert 	ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
21724fe962cSBernd Schubert 	if (!ring)
21824fe962cSBernd Schubert 		return NULL;
21924fe962cSBernd Schubert 
22024fe962cSBernd Schubert 	ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
22124fe962cSBernd Schubert 			       GFP_KERNEL_ACCOUNT);
22224fe962cSBernd Schubert 	if (!ring->queues)
22324fe962cSBernd Schubert 		goto out_err;
22424fe962cSBernd Schubert 
22524fe962cSBernd Schubert 	max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
22624fe962cSBernd Schubert 	max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
22724fe962cSBernd Schubert 
22824fe962cSBernd Schubert 	spin_lock(&fc->lock);
22924fe962cSBernd Schubert 	if (fc->ring) {
23024fe962cSBernd Schubert 		/* race, another thread created the ring in the meantime */
23124fe962cSBernd Schubert 		spin_unlock(&fc->lock);
23224fe962cSBernd Schubert 		res = fc->ring;
23324fe962cSBernd Schubert 		goto out_err;
23424fe962cSBernd Schubert 	}
23524fe962cSBernd Schubert 
2364a9bfb9bSBernd Schubert 	init_waitqueue_head(&ring->stop_waitq);
2374a9bfb9bSBernd Schubert 
23824fe962cSBernd Schubert 	ring->nr_queues = nr_queues;
23924fe962cSBernd Schubert 	ring->fc = fc;
24024fe962cSBernd Schubert 	ring->max_payload_sz = max_payload_size;
241*1dfe2a22SJoanne Koong 	smp_store_release(&fc->ring, ring);
24224fe962cSBernd Schubert 
24324fe962cSBernd Schubert 	spin_unlock(&fc->lock);
24424fe962cSBernd Schubert 	return ring;
24524fe962cSBernd Schubert 
24624fe962cSBernd Schubert out_err:
24724fe962cSBernd Schubert 	kfree(ring->queues);
24824fe962cSBernd Schubert 	kfree(ring);
24924fe962cSBernd Schubert 	return res;
25024fe962cSBernd Schubert }
25124fe962cSBernd Schubert 
fuse_uring_create_queue(struct fuse_ring * ring,int qid)25224fe962cSBernd Schubert static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
25324fe962cSBernd Schubert 						       int qid)
25424fe962cSBernd Schubert {
25524fe962cSBernd Schubert 	struct fuse_conn *fc = ring->fc;
25624fe962cSBernd Schubert 	struct fuse_ring_queue *queue;
257c090c8abSBernd Schubert 	struct list_head *pq;
25824fe962cSBernd Schubert 
25924fe962cSBernd Schubert 	queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
26024fe962cSBernd Schubert 	if (!queue)
26124fe962cSBernd Schubert 		return NULL;
262c090c8abSBernd Schubert 	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
263c090c8abSBernd Schubert 	if (!pq) {
264c090c8abSBernd Schubert 		kfree(queue);
265c090c8abSBernd Schubert 		return NULL;
266c090c8abSBernd Schubert 	}
267c090c8abSBernd Schubert 
26824fe962cSBernd Schubert 	queue->qid = qid;
26924fe962cSBernd Schubert 	queue->ring = ring;
27024fe962cSBernd Schubert 	spin_lock_init(&queue->lock);
27124fe962cSBernd Schubert 
27224fe962cSBernd Schubert 	INIT_LIST_HEAD(&queue->ent_avail_queue);
27324fe962cSBernd Schubert 	INIT_LIST_HEAD(&queue->ent_commit_queue);
274c090c8abSBernd Schubert 	INIT_LIST_HEAD(&queue->ent_w_req_queue);
275c090c8abSBernd Schubert 	INIT_LIST_HEAD(&queue->ent_in_userspace);
276c090c8abSBernd Schubert 	INIT_LIST_HEAD(&queue->fuse_req_queue);
277857b0263SBernd Schubert 	INIT_LIST_HEAD(&queue->fuse_req_bg_queue);
278b6236c84SBernd Schubert 	INIT_LIST_HEAD(&queue->ent_released);
279c090c8abSBernd Schubert 
280c090c8abSBernd Schubert 	queue->fpq.processing = pq;
281c090c8abSBernd Schubert 	fuse_pqueue_init(&queue->fpq);
28224fe962cSBernd Schubert 
28324fe962cSBernd Schubert 	spin_lock(&fc->lock);
28424fe962cSBernd Schubert 	if (ring->queues[qid]) {
28524fe962cSBernd Schubert 		spin_unlock(&fc->lock);
286c090c8abSBernd Schubert 		kfree(queue->fpq.processing);
28724fe962cSBernd Schubert 		kfree(queue);
28824fe962cSBernd Schubert 		return ring->queues[qid];
28924fe962cSBernd Schubert 	}
29024fe962cSBernd Schubert 
29124fe962cSBernd Schubert 	/*
29224fe962cSBernd Schubert 	 * write_once and lock as the caller mostly doesn't take the lock at all
29324fe962cSBernd Schubert 	 */
29424fe962cSBernd Schubert 	WRITE_ONCE(ring->queues[qid], queue);
29524fe962cSBernd Schubert 	spin_unlock(&fc->lock);
29624fe962cSBernd Schubert 
29724fe962cSBernd Schubert 	return queue;
29824fe962cSBernd Schubert }
29924fe962cSBernd Schubert 
fuse_uring_stop_fuse_req_end(struct fuse_req * req)3004a9bfb9bSBernd Schubert static void fuse_uring_stop_fuse_req_end(struct fuse_req *req)
3014a9bfb9bSBernd Schubert {
3024a9bfb9bSBernd Schubert 	clear_bit(FR_SENT, &req->flags);
3034a9bfb9bSBernd Schubert 	req->out.h.error = -ECONNABORTED;
3044a9bfb9bSBernd Schubert 	fuse_request_end(req);
3054a9bfb9bSBernd Schubert }
3064a9bfb9bSBernd Schubert 
3074a9bfb9bSBernd Schubert /*
3084a9bfb9bSBernd Schubert  * Release a request/entry on connection tear down
3094a9bfb9bSBernd Schubert  */
fuse_uring_entry_teardown(struct fuse_ring_ent * ent)3104a9bfb9bSBernd Schubert static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
3114a9bfb9bSBernd Schubert {
3124a9bfb9bSBernd Schubert 	struct fuse_req *req;
3134a9bfb9bSBernd Schubert 	struct io_uring_cmd *cmd;
3144a9bfb9bSBernd Schubert 
3154a9bfb9bSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
3164a9bfb9bSBernd Schubert 
3174a9bfb9bSBernd Schubert 	spin_lock(&queue->lock);
3184a9bfb9bSBernd Schubert 	cmd = ent->cmd;
3194a9bfb9bSBernd Schubert 	ent->cmd = NULL;
3204a9bfb9bSBernd Schubert 	req = ent->fuse_req;
3214a9bfb9bSBernd Schubert 	ent->fuse_req = NULL;
3224a9bfb9bSBernd Schubert 	if (req) {
3234a9bfb9bSBernd Schubert 		/* remove entry from queue->fpq->processing */
3244a9bfb9bSBernd Schubert 		list_del_init(&req->list);
3254a9bfb9bSBernd Schubert 	}
326b6236c84SBernd Schubert 
327b6236c84SBernd Schubert 	/*
328b6236c84SBernd Schubert 	 * The entry must not be freed immediately, due to access of direct
329b6236c84SBernd Schubert 	 * pointer access of entries through IO_URING_F_CANCEL - there is a risk
330b6236c84SBernd Schubert 	 * of race between daemon termination (which triggers IO_URING_F_CANCEL
331b6236c84SBernd Schubert 	 * and accesses entries without checking the list state first
332b6236c84SBernd Schubert 	 */
333b6236c84SBernd Schubert 	list_move(&ent->list, &queue->ent_released);
334b6236c84SBernd Schubert 	ent->state = FRRS_RELEASED;
3354a9bfb9bSBernd Schubert 	spin_unlock(&queue->lock);
3364a9bfb9bSBernd Schubert 
3374a9bfb9bSBernd Schubert 	if (cmd)
3384a9bfb9bSBernd Schubert 		io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED);
3394a9bfb9bSBernd Schubert 
3404a9bfb9bSBernd Schubert 	if (req)
3414a9bfb9bSBernd Schubert 		fuse_uring_stop_fuse_req_end(req);
3424a9bfb9bSBernd Schubert }
3434a9bfb9bSBernd Schubert 
fuse_uring_stop_list_entries(struct list_head * head,struct fuse_ring_queue * queue,enum fuse_ring_req_state exp_state)3444a9bfb9bSBernd Schubert static void fuse_uring_stop_list_entries(struct list_head *head,
3454a9bfb9bSBernd Schubert 					 struct fuse_ring_queue *queue,
3464a9bfb9bSBernd Schubert 					 enum fuse_ring_req_state exp_state)
3474a9bfb9bSBernd Schubert {
3484a9bfb9bSBernd Schubert 	struct fuse_ring *ring = queue->ring;
3494a9bfb9bSBernd Schubert 	struct fuse_ring_ent *ent, *next;
3504a9bfb9bSBernd Schubert 	ssize_t queue_refs = SSIZE_MAX;
3514a9bfb9bSBernd Schubert 	LIST_HEAD(to_teardown);
3524a9bfb9bSBernd Schubert 
3534a9bfb9bSBernd Schubert 	spin_lock(&queue->lock);
3544a9bfb9bSBernd Schubert 	list_for_each_entry_safe(ent, next, head, list) {
3554a9bfb9bSBernd Schubert 		if (ent->state != exp_state) {
3564a9bfb9bSBernd Schubert 			pr_warn("entry teardown qid=%d state=%d expected=%d",
3574a9bfb9bSBernd Schubert 				queue->qid, ent->state, exp_state);
3584a9bfb9bSBernd Schubert 			continue;
3594a9bfb9bSBernd Schubert 		}
3604a9bfb9bSBernd Schubert 
361b6236c84SBernd Schubert 		ent->state = FRRS_TEARDOWN;
3624a9bfb9bSBernd Schubert 		list_move(&ent->list, &to_teardown);
3634a9bfb9bSBernd Schubert 	}
3644a9bfb9bSBernd Schubert 	spin_unlock(&queue->lock);
3654a9bfb9bSBernd Schubert 
3664a9bfb9bSBernd Schubert 	/* no queue lock to avoid lock order issues */
3674a9bfb9bSBernd Schubert 	list_for_each_entry_safe(ent, next, &to_teardown, list) {
3684a9bfb9bSBernd Schubert 		fuse_uring_entry_teardown(ent);
3694a9bfb9bSBernd Schubert 		queue_refs = atomic_dec_return(&ring->queue_refs);
3704a9bfb9bSBernd Schubert 		WARN_ON_ONCE(queue_refs < 0);
3714a9bfb9bSBernd Schubert 	}
3724a9bfb9bSBernd Schubert }
3734a9bfb9bSBernd Schubert 
fuse_uring_teardown_entries(struct fuse_ring_queue * queue)3744a9bfb9bSBernd Schubert static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue)
3754a9bfb9bSBernd Schubert {
3764a9bfb9bSBernd Schubert 	fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue,
3774a9bfb9bSBernd Schubert 				     FRRS_USERSPACE);
3784a9bfb9bSBernd Schubert 	fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue,
3794a9bfb9bSBernd Schubert 				     FRRS_AVAILABLE);
3804a9bfb9bSBernd Schubert }
3814a9bfb9bSBernd Schubert 
3824a9bfb9bSBernd Schubert /*
3834a9bfb9bSBernd Schubert  * Log state debug info
3844a9bfb9bSBernd Schubert  */
fuse_uring_log_ent_state(struct fuse_ring * ring)3854a9bfb9bSBernd Schubert static void fuse_uring_log_ent_state(struct fuse_ring *ring)
3864a9bfb9bSBernd Schubert {
3874a9bfb9bSBernd Schubert 	int qid;
3884a9bfb9bSBernd Schubert 	struct fuse_ring_ent *ent;
3894a9bfb9bSBernd Schubert 
3904a9bfb9bSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
3914a9bfb9bSBernd Schubert 		struct fuse_ring_queue *queue = ring->queues[qid];
3924a9bfb9bSBernd Schubert 
3934a9bfb9bSBernd Schubert 		if (!queue)
3944a9bfb9bSBernd Schubert 			continue;
3954a9bfb9bSBernd Schubert 
3964a9bfb9bSBernd Schubert 		spin_lock(&queue->lock);
3974a9bfb9bSBernd Schubert 		/*
3984a9bfb9bSBernd Schubert 		 * Log entries from the intermediate queue, the other queues
3994a9bfb9bSBernd Schubert 		 * should be empty
4004a9bfb9bSBernd Schubert 		 */
4014a9bfb9bSBernd Schubert 		list_for_each_entry(ent, &queue->ent_w_req_queue, list) {
4024a9bfb9bSBernd Schubert 			pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n",
4034a9bfb9bSBernd Schubert 				ring, qid, ent, ent->state);
4044a9bfb9bSBernd Schubert 		}
4054a9bfb9bSBernd Schubert 		list_for_each_entry(ent, &queue->ent_commit_queue, list) {
4064a9bfb9bSBernd Schubert 			pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n",
4074a9bfb9bSBernd Schubert 				ring, qid, ent, ent->state);
4084a9bfb9bSBernd Schubert 		}
4094a9bfb9bSBernd Schubert 		spin_unlock(&queue->lock);
4104a9bfb9bSBernd Schubert 	}
4114a9bfb9bSBernd Schubert 	ring->stop_debug_log = 1;
4124a9bfb9bSBernd Schubert }
4134a9bfb9bSBernd Schubert 
fuse_uring_async_stop_queues(struct work_struct * work)4144a9bfb9bSBernd Schubert static void fuse_uring_async_stop_queues(struct work_struct *work)
4154a9bfb9bSBernd Schubert {
4164a9bfb9bSBernd Schubert 	int qid;
4174a9bfb9bSBernd Schubert 	struct fuse_ring *ring =
4184a9bfb9bSBernd Schubert 		container_of(work, struct fuse_ring, async_teardown_work.work);
4194a9bfb9bSBernd Schubert 
4204a9bfb9bSBernd Schubert 	/* XXX code dup */
4214a9bfb9bSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
4224a9bfb9bSBernd Schubert 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
4234a9bfb9bSBernd Schubert 
4244a9bfb9bSBernd Schubert 		if (!queue)
4254a9bfb9bSBernd Schubert 			continue;
4264a9bfb9bSBernd Schubert 
4274a9bfb9bSBernd Schubert 		fuse_uring_teardown_entries(queue);
4284a9bfb9bSBernd Schubert 	}
4294a9bfb9bSBernd Schubert 
4304a9bfb9bSBernd Schubert 	/*
4314a9bfb9bSBernd Schubert 	 * Some ring entries might be in the middle of IO operations,
4324a9bfb9bSBernd Schubert 	 * i.e. in process to get handled by file_operations::uring_cmd
4334a9bfb9bSBernd Schubert 	 * or on the way to userspace - we could handle that with conditions in
4344a9bfb9bSBernd Schubert 	 * run time code, but easier/cleaner to have an async tear down handler
4354a9bfb9bSBernd Schubert 	 * If there are still queue references left
4364a9bfb9bSBernd Schubert 	 */
4374a9bfb9bSBernd Schubert 	if (atomic_read(&ring->queue_refs) > 0) {
4384a9bfb9bSBernd Schubert 		if (time_after(jiffies,
4394a9bfb9bSBernd Schubert 			       ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT))
4404a9bfb9bSBernd Schubert 			fuse_uring_log_ent_state(ring);
4414a9bfb9bSBernd Schubert 
4424a9bfb9bSBernd Schubert 		schedule_delayed_work(&ring->async_teardown_work,
4434a9bfb9bSBernd Schubert 				      FUSE_URING_TEARDOWN_INTERVAL);
4444a9bfb9bSBernd Schubert 	} else {
4454a9bfb9bSBernd Schubert 		wake_up_all(&ring->stop_waitq);
4464a9bfb9bSBernd Schubert 	}
4474a9bfb9bSBernd Schubert }
4484a9bfb9bSBernd Schubert 
4494a9bfb9bSBernd Schubert /*
4504a9bfb9bSBernd Schubert  * Stop the ring queues
4514a9bfb9bSBernd Schubert  */
fuse_uring_stop_queues(struct fuse_ring * ring)4524a9bfb9bSBernd Schubert void fuse_uring_stop_queues(struct fuse_ring *ring)
4534a9bfb9bSBernd Schubert {
4544a9bfb9bSBernd Schubert 	int qid;
4554a9bfb9bSBernd Schubert 
4564a9bfb9bSBernd Schubert 	for (qid = 0; qid < ring->nr_queues; qid++) {
4574a9bfb9bSBernd Schubert 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
4584a9bfb9bSBernd Schubert 
4594a9bfb9bSBernd Schubert 		if (!queue)
4604a9bfb9bSBernd Schubert 			continue;
4614a9bfb9bSBernd Schubert 
4624a9bfb9bSBernd Schubert 		fuse_uring_teardown_entries(queue);
4634a9bfb9bSBernd Schubert 	}
4644a9bfb9bSBernd Schubert 
4654a9bfb9bSBernd Schubert 	if (atomic_read(&ring->queue_refs) > 0) {
4664a9bfb9bSBernd Schubert 		ring->teardown_time = jiffies;
4674a9bfb9bSBernd Schubert 		INIT_DELAYED_WORK(&ring->async_teardown_work,
4684a9bfb9bSBernd Schubert 				  fuse_uring_async_stop_queues);
4694a9bfb9bSBernd Schubert 		schedule_delayed_work(&ring->async_teardown_work,
4704a9bfb9bSBernd Schubert 				      FUSE_URING_TEARDOWN_INTERVAL);
4714a9bfb9bSBernd Schubert 	} else {
4724a9bfb9bSBernd Schubert 		wake_up_all(&ring->stop_waitq);
4734a9bfb9bSBernd Schubert 	}
4744a9bfb9bSBernd Schubert }
4754a9bfb9bSBernd Schubert 
47624fe962cSBernd Schubert /*
477b6236c84SBernd Schubert  * Handle IO_URING_F_CANCEL, typically should come on daemon termination.
478b6236c84SBernd Schubert  *
479b6236c84SBernd Schubert  * Releasing the last entry should trigger fuse_dev_release() if
480b6236c84SBernd Schubert  * the daemon was terminated
481b6236c84SBernd Schubert  */
fuse_uring_cancel(struct io_uring_cmd * cmd,unsigned int issue_flags)482b6236c84SBernd Schubert static void fuse_uring_cancel(struct io_uring_cmd *cmd,
483b6236c84SBernd Schubert 			      unsigned int issue_flags)
484b6236c84SBernd Schubert {
485b6236c84SBernd Schubert 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
486b6236c84SBernd Schubert 	struct fuse_ring_queue *queue;
487b6236c84SBernd Schubert 	bool need_cmd_done = false;
488b6236c84SBernd Schubert 
489b6236c84SBernd Schubert 	/*
490b6236c84SBernd Schubert 	 * direct access on ent - it must not be destructed as long as
491b6236c84SBernd Schubert 	 * IO_URING_F_CANCEL might come up
492b6236c84SBernd Schubert 	 */
493b6236c84SBernd Schubert 	queue = ent->queue;
494b6236c84SBernd Schubert 	spin_lock(&queue->lock);
495b6236c84SBernd Schubert 	if (ent->state == FRRS_AVAILABLE) {
496b6236c84SBernd Schubert 		ent->state = FRRS_USERSPACE;
497b6236c84SBernd Schubert 		list_move(&ent->list, &queue->ent_in_userspace);
498b6236c84SBernd Schubert 		need_cmd_done = true;
499b6236c84SBernd Schubert 		ent->cmd = NULL;
500b6236c84SBernd Schubert 	}
501b6236c84SBernd Schubert 	spin_unlock(&queue->lock);
502b6236c84SBernd Schubert 
503b6236c84SBernd Schubert 	if (need_cmd_done) {
504b6236c84SBernd Schubert 		/* no queue lock to avoid lock order issues */
505b6236c84SBernd Schubert 		io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags);
506b6236c84SBernd Schubert 	}
507b6236c84SBernd Schubert }
508b6236c84SBernd Schubert 
fuse_uring_prepare_cancel(struct io_uring_cmd * cmd,int issue_flags,struct fuse_ring_ent * ring_ent)509b6236c84SBernd Schubert static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
510b6236c84SBernd Schubert 				      struct fuse_ring_ent *ring_ent)
511b6236c84SBernd Schubert {
512b6236c84SBernd Schubert 	uring_cmd_set_ring_ent(cmd, ring_ent);
513b6236c84SBernd Schubert 	io_uring_cmd_mark_cancelable(cmd, issue_flags);
514b6236c84SBernd Schubert }
515b6236c84SBernd Schubert 
516b6236c84SBernd Schubert /*
517c090c8abSBernd Schubert  * Checks for errors and stores it into the request
518c090c8abSBernd Schubert  */
fuse_uring_out_header_has_err(struct fuse_out_header * oh,struct fuse_req * req,struct fuse_conn * fc)519c090c8abSBernd Schubert static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
520c090c8abSBernd Schubert 					 struct fuse_req *req,
521c090c8abSBernd Schubert 					 struct fuse_conn *fc)
522c090c8abSBernd Schubert {
523c090c8abSBernd Schubert 	int err;
524c090c8abSBernd Schubert 
525c090c8abSBernd Schubert 	err = -EINVAL;
526c090c8abSBernd Schubert 	if (oh->unique == 0) {
527c090c8abSBernd Schubert 		/* Not supported through io-uring yet */
528c090c8abSBernd Schubert 		pr_warn_once("notify through fuse-io-uring not supported\n");
529c090c8abSBernd Schubert 		goto err;
530c090c8abSBernd Schubert 	}
531c090c8abSBernd Schubert 
532c090c8abSBernd Schubert 	if (oh->error <= -ERESTARTSYS || oh->error > 0)
533c090c8abSBernd Schubert 		goto err;
534c090c8abSBernd Schubert 
535c090c8abSBernd Schubert 	if (oh->error) {
536c090c8abSBernd Schubert 		err = oh->error;
537c090c8abSBernd Schubert 		goto err;
538c090c8abSBernd Schubert 	}
539c090c8abSBernd Schubert 
540c090c8abSBernd Schubert 	err = -ENOENT;
541c090c8abSBernd Schubert 	if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
542c090c8abSBernd Schubert 		pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n",
543c090c8abSBernd Schubert 				    req->in.h.unique,
544c090c8abSBernd Schubert 				    oh->unique & ~FUSE_INT_REQ_BIT);
545c090c8abSBernd Schubert 		goto err;
546c090c8abSBernd Schubert 	}
547c090c8abSBernd Schubert 
548c090c8abSBernd Schubert 	/*
549c090c8abSBernd Schubert 	 * Is it an interrupt reply ID?
550c090c8abSBernd Schubert 	 * XXX: Not supported through fuse-io-uring yet, it should not even
551c090c8abSBernd Schubert 	 *      find the request - should not happen.
552c090c8abSBernd Schubert 	 */
553c090c8abSBernd Schubert 	WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT);
554c090c8abSBernd Schubert 
555c090c8abSBernd Schubert 	err = 0;
556c090c8abSBernd Schubert err:
557c090c8abSBernd Schubert 	return err;
558c090c8abSBernd Schubert }
559c090c8abSBernd Schubert 
fuse_uring_copy_from_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)560c090c8abSBernd Schubert static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
561c090c8abSBernd Schubert 				     struct fuse_req *req,
562c090c8abSBernd Schubert 				     struct fuse_ring_ent *ent)
563c090c8abSBernd Schubert {
564c090c8abSBernd Schubert 	struct fuse_copy_state cs;
565c090c8abSBernd Schubert 	struct fuse_args *args = req->args;
566c090c8abSBernd Schubert 	struct iov_iter iter;
567c090c8abSBernd Schubert 	int err;
568c090c8abSBernd Schubert 	struct fuse_uring_ent_in_out ring_in_out;
569c090c8abSBernd Schubert 
570c090c8abSBernd Schubert 	err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out,
571c090c8abSBernd Schubert 			     sizeof(ring_in_out));
572c090c8abSBernd Schubert 	if (err)
573c090c8abSBernd Schubert 		return -EFAULT;
574c090c8abSBernd Schubert 
575c090c8abSBernd Schubert 	err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz,
576c090c8abSBernd Schubert 			  &iter);
577c090c8abSBernd Schubert 	if (err)
578c090c8abSBernd Schubert 		return err;
579c090c8abSBernd Schubert 
580c090c8abSBernd Schubert 	fuse_copy_init(&cs, 0, &iter);
581c090c8abSBernd Schubert 	cs.is_uring = 1;
582c090c8abSBernd Schubert 	cs.req = req;
583c090c8abSBernd Schubert 
584c090c8abSBernd Schubert 	return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
585c090c8abSBernd Schubert }
586c090c8abSBernd Schubert 
587c090c8abSBernd Schubert  /*
588c090c8abSBernd Schubert   * Copy data from the req to the ring buffer
589c090c8abSBernd Schubert   */
fuse_uring_args_to_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)590c090c8abSBernd Schubert static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
591c090c8abSBernd Schubert 				   struct fuse_ring_ent *ent)
592c090c8abSBernd Schubert {
593c090c8abSBernd Schubert 	struct fuse_copy_state cs;
594c090c8abSBernd Schubert 	struct fuse_args *args = req->args;
595c090c8abSBernd Schubert 	struct fuse_in_arg *in_args = args->in_args;
596c090c8abSBernd Schubert 	int num_args = args->in_numargs;
597c090c8abSBernd Schubert 	int err;
598c090c8abSBernd Schubert 	struct iov_iter iter;
599c090c8abSBernd Schubert 	struct fuse_uring_ent_in_out ent_in_out = {
600c090c8abSBernd Schubert 		.flags = 0,
601c090c8abSBernd Schubert 		.commit_id = req->in.h.unique,
602c090c8abSBernd Schubert 	};
603c090c8abSBernd Schubert 
604c090c8abSBernd Schubert 	err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter);
605c090c8abSBernd Schubert 	if (err) {
606c090c8abSBernd Schubert 		pr_info_ratelimited("fuse: Import of user buffer failed\n");
607c090c8abSBernd Schubert 		return err;
608c090c8abSBernd Schubert 	}
609c090c8abSBernd Schubert 
610c090c8abSBernd Schubert 	fuse_copy_init(&cs, 1, &iter);
611c090c8abSBernd Schubert 	cs.is_uring = 1;
612c090c8abSBernd Schubert 	cs.req = req;
613c090c8abSBernd Schubert 
614c090c8abSBernd Schubert 	if (num_args > 0) {
615c090c8abSBernd Schubert 		/*
616c090c8abSBernd Schubert 		 * Expectation is that the first argument is the per op header.
617c090c8abSBernd Schubert 		 * Some op code have that as zero size.
618c090c8abSBernd Schubert 		 */
619c090c8abSBernd Schubert 		if (args->in_args[0].size > 0) {
620c090c8abSBernd Schubert 			err = copy_to_user(&ent->headers->op_in, in_args->value,
621c090c8abSBernd Schubert 					   in_args->size);
622c090c8abSBernd Schubert 			if (err) {
623c090c8abSBernd Schubert 				pr_info_ratelimited(
624c090c8abSBernd Schubert 					"Copying the header failed.\n");
625c090c8abSBernd Schubert 				return -EFAULT;
626c090c8abSBernd Schubert 			}
627c090c8abSBernd Schubert 		}
628c090c8abSBernd Schubert 		in_args++;
629c090c8abSBernd Schubert 		num_args--;
630c090c8abSBernd Schubert 	}
631c090c8abSBernd Schubert 
632c090c8abSBernd Schubert 	/* copy the payload */
633c090c8abSBernd Schubert 	err = fuse_copy_args(&cs, num_args, args->in_pages,
634c090c8abSBernd Schubert 			     (struct fuse_arg *)in_args, 0);
635c090c8abSBernd Schubert 	if (err) {
636c090c8abSBernd Schubert 		pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
637c090c8abSBernd Schubert 		return err;
638c090c8abSBernd Schubert 	}
639c090c8abSBernd Schubert 
640c090c8abSBernd Schubert 	ent_in_out.payload_sz = cs.ring.copied_sz;
641c090c8abSBernd Schubert 	err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out,
642c090c8abSBernd Schubert 			   sizeof(ent_in_out));
643c090c8abSBernd Schubert 	return err ? -EFAULT : 0;
644c090c8abSBernd Schubert }
645c090c8abSBernd Schubert 
fuse_uring_copy_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req)646c090c8abSBernd Schubert static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
647c090c8abSBernd Schubert 				   struct fuse_req *req)
648c090c8abSBernd Schubert {
649c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
650c090c8abSBernd Schubert 	struct fuse_ring *ring = queue->ring;
651c090c8abSBernd Schubert 	int err;
652c090c8abSBernd Schubert 
653c090c8abSBernd Schubert 	err = -EIO;
654c090c8abSBernd Schubert 	if (WARN_ON(ent->state != FRRS_FUSE_REQ)) {
655c090c8abSBernd Schubert 		pr_err("qid=%d ring-req=%p invalid state %d on send\n",
656c090c8abSBernd Schubert 		       queue->qid, ent, ent->state);
657c090c8abSBernd Schubert 		return err;
658c090c8abSBernd Schubert 	}
659c090c8abSBernd Schubert 
660c090c8abSBernd Schubert 	err = -EINVAL;
661c090c8abSBernd Schubert 	if (WARN_ON(req->in.h.unique == 0))
662c090c8abSBernd Schubert 		return err;
663c090c8abSBernd Schubert 
664c090c8abSBernd Schubert 	/* copy the request */
665c090c8abSBernd Schubert 	err = fuse_uring_args_to_ring(ring, req, ent);
666c090c8abSBernd Schubert 	if (unlikely(err)) {
667c090c8abSBernd Schubert 		pr_info_ratelimited("Copy to ring failed: %d\n", err);
668c090c8abSBernd Schubert 		return err;
669c090c8abSBernd Schubert 	}
670c090c8abSBernd Schubert 
671c090c8abSBernd Schubert 	/* copy fuse_in_header */
672c090c8abSBernd Schubert 	err = copy_to_user(&ent->headers->in_out, &req->in.h,
673c090c8abSBernd Schubert 			   sizeof(req->in.h));
674c090c8abSBernd Schubert 	if (err) {
675c090c8abSBernd Schubert 		err = -EFAULT;
676c090c8abSBernd Schubert 		return err;
677c090c8abSBernd Schubert 	}
678c090c8abSBernd Schubert 
679c090c8abSBernd Schubert 	return 0;
680c090c8abSBernd Schubert }
681c090c8abSBernd Schubert 
fuse_uring_prepare_send(struct fuse_ring_ent * ent,struct fuse_req * req)682c090c8abSBernd Schubert static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
683c090c8abSBernd Schubert 				   struct fuse_req *req)
684c090c8abSBernd Schubert {
685c090c8abSBernd Schubert 	int err;
686c090c8abSBernd Schubert 
687c090c8abSBernd Schubert 	err = fuse_uring_copy_to_ring(ent, req);
688c090c8abSBernd Schubert 	if (!err)
689c090c8abSBernd Schubert 		set_bit(FR_SENT, &req->flags);
690c090c8abSBernd Schubert 	else
691c090c8abSBernd Schubert 		fuse_uring_req_end(ent, req, err);
692c090c8abSBernd Schubert 
693c090c8abSBernd Schubert 	return err;
694c090c8abSBernd Schubert }
695c090c8abSBernd Schubert 
696c090c8abSBernd Schubert /*
697c090c8abSBernd Schubert  * Write data to the ring buffer and send the request to userspace,
698c090c8abSBernd Schubert  * userspace will read it
699c090c8abSBernd Schubert  * This is comparable with classical read(/dev/fuse)
700c090c8abSBernd Schubert  */
fuse_uring_send_next_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)701c090c8abSBernd Schubert static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
702c090c8abSBernd Schubert 					struct fuse_req *req,
703c090c8abSBernd Schubert 					unsigned int issue_flags)
704c090c8abSBernd Schubert {
705c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
706c090c8abSBernd Schubert 	int err;
707c090c8abSBernd Schubert 	struct io_uring_cmd *cmd;
708c090c8abSBernd Schubert 
709c090c8abSBernd Schubert 	err = fuse_uring_prepare_send(ent, req);
710c090c8abSBernd Schubert 	if (err)
711c090c8abSBernd Schubert 		return err;
712c090c8abSBernd Schubert 
713c090c8abSBernd Schubert 	spin_lock(&queue->lock);
714c090c8abSBernd Schubert 	cmd = ent->cmd;
715c090c8abSBernd Schubert 	ent->cmd = NULL;
716c090c8abSBernd Schubert 	ent->state = FRRS_USERSPACE;
717c090c8abSBernd Schubert 	list_move(&ent->list, &queue->ent_in_userspace);
718c090c8abSBernd Schubert 	spin_unlock(&queue->lock);
719c090c8abSBernd Schubert 
720c090c8abSBernd Schubert 	io_uring_cmd_done(cmd, 0, 0, issue_flags);
721c090c8abSBernd Schubert 	return 0;
722c090c8abSBernd Schubert }
723c090c8abSBernd Schubert 
724c090c8abSBernd Schubert /*
72524fe962cSBernd Schubert  * Make a ring entry available for fuse_req assignment
72624fe962cSBernd Schubert  */
fuse_uring_ent_avail(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue)72724fe962cSBernd Schubert static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
72824fe962cSBernd Schubert 				 struct fuse_ring_queue *queue)
72924fe962cSBernd Schubert {
73024fe962cSBernd Schubert 	WARN_ON_ONCE(!ent->cmd);
73124fe962cSBernd Schubert 	list_move(&ent->list, &queue->ent_avail_queue);
73224fe962cSBernd Schubert 	ent->state = FRRS_AVAILABLE;
73324fe962cSBernd Schubert }
73424fe962cSBernd Schubert 
735c090c8abSBernd Schubert /* Used to find the request on SQE commit */
fuse_uring_add_to_pq(struct fuse_ring_ent * ent,struct fuse_req * req)736c090c8abSBernd Schubert static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent,
737c090c8abSBernd Schubert 				 struct fuse_req *req)
738c090c8abSBernd Schubert {
739c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
740c090c8abSBernd Schubert 	struct fuse_pqueue *fpq = &queue->fpq;
741c090c8abSBernd Schubert 	unsigned int hash;
742c090c8abSBernd Schubert 
743c090c8abSBernd Schubert 	req->ring_entry = ent;
744c090c8abSBernd Schubert 	hash = fuse_req_hash(req->in.h.unique);
745c090c8abSBernd Schubert 	list_move_tail(&req->list, &fpq->processing[hash]);
746c090c8abSBernd Schubert }
747c090c8abSBernd Schubert 
748c090c8abSBernd Schubert /*
749c090c8abSBernd Schubert  * Assign a fuse queue entry to the given entry
750c090c8abSBernd Schubert  */
fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent * ent,struct fuse_req * req)751c090c8abSBernd Schubert static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
752c090c8abSBernd Schubert 					   struct fuse_req *req)
753c090c8abSBernd Schubert {
754c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
755c090c8abSBernd Schubert 
756c090c8abSBernd Schubert 	lockdep_assert_held(&queue->lock);
757c090c8abSBernd Schubert 
758c090c8abSBernd Schubert 	if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE &&
759c090c8abSBernd Schubert 			 ent->state != FRRS_COMMIT)) {
760c090c8abSBernd Schubert 		pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid,
761c090c8abSBernd Schubert 			ent->state);
762c090c8abSBernd Schubert 	}
763c090c8abSBernd Schubert 
764c090c8abSBernd Schubert 	clear_bit(FR_PENDING, &req->flags);
765c090c8abSBernd Schubert 	ent->fuse_req = req;
766c090c8abSBernd Schubert 	ent->state = FRRS_FUSE_REQ;
767c090c8abSBernd Schubert 	list_move(&ent->list, &queue->ent_w_req_queue);
768c090c8abSBernd Schubert 	fuse_uring_add_to_pq(ent, req);
769c090c8abSBernd Schubert }
770c090c8abSBernd Schubert 
771c090c8abSBernd Schubert /* Fetch the next fuse request if available */
fuse_uring_ent_assign_req(struct fuse_ring_ent * ent)772c090c8abSBernd Schubert static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent)
773c090c8abSBernd Schubert 	__must_hold(&queue->lock)
774c090c8abSBernd Schubert {
775c090c8abSBernd Schubert 	struct fuse_req *req;
776c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
777c090c8abSBernd Schubert 	struct list_head *req_queue = &queue->fuse_req_queue;
778c090c8abSBernd Schubert 
779c090c8abSBernd Schubert 	lockdep_assert_held(&queue->lock);
780c090c8abSBernd Schubert 
781c090c8abSBernd Schubert 	/* get and assign the next entry while it is still holding the lock */
782c090c8abSBernd Schubert 	req = list_first_entry_or_null(req_queue, struct fuse_req, list);
783c090c8abSBernd Schubert 	if (req)
784c090c8abSBernd Schubert 		fuse_uring_add_req_to_ring_ent(ent, req);
785c090c8abSBernd Schubert 
786c090c8abSBernd Schubert 	return req;
787c090c8abSBernd Schubert }
788c090c8abSBernd Schubert 
789c090c8abSBernd Schubert /*
790c090c8abSBernd Schubert  * Read data from the ring buffer, which user space has written to
791c090c8abSBernd Schubert  * This is comparible with handling of classical write(/dev/fuse).
792c090c8abSBernd Schubert  * Also make the ring request available again for new fuse requests.
793c090c8abSBernd Schubert  */
fuse_uring_commit(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)794c090c8abSBernd Schubert static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req,
795c090c8abSBernd Schubert 			      unsigned int issue_flags)
796c090c8abSBernd Schubert {
797c090c8abSBernd Schubert 	struct fuse_ring *ring = ent->queue->ring;
798c090c8abSBernd Schubert 	struct fuse_conn *fc = ring->fc;
799c090c8abSBernd Schubert 	ssize_t err = 0;
800c090c8abSBernd Schubert 
801c090c8abSBernd Schubert 	err = copy_from_user(&req->out.h, &ent->headers->in_out,
802c090c8abSBernd Schubert 			     sizeof(req->out.h));
803c090c8abSBernd Schubert 	if (err) {
804c090c8abSBernd Schubert 		req->out.h.error = -EFAULT;
805c090c8abSBernd Schubert 		goto out;
806c090c8abSBernd Schubert 	}
807c090c8abSBernd Schubert 
808c090c8abSBernd Schubert 	err = fuse_uring_out_header_has_err(&req->out.h, req, fc);
809c090c8abSBernd Schubert 	if (err) {
810c090c8abSBernd Schubert 		/* req->out.h.error already set */
811c090c8abSBernd Schubert 		goto out;
812c090c8abSBernd Schubert 	}
813c090c8abSBernd Schubert 
814c090c8abSBernd Schubert 	err = fuse_uring_copy_from_ring(ring, req, ent);
815c090c8abSBernd Schubert out:
816c090c8abSBernd Schubert 	fuse_uring_req_end(ent, req, err);
817c090c8abSBernd Schubert }
818c090c8abSBernd Schubert 
819c090c8abSBernd Schubert /*
820c090c8abSBernd Schubert  * Get the next fuse req and send it
821c090c8abSBernd Schubert  */
fuse_uring_next_fuse_req(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue,unsigned int issue_flags)822c090c8abSBernd Schubert static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent,
823c090c8abSBernd Schubert 				     struct fuse_ring_queue *queue,
824c090c8abSBernd Schubert 				     unsigned int issue_flags)
825c090c8abSBernd Schubert {
826c090c8abSBernd Schubert 	int err;
827c090c8abSBernd Schubert 	struct fuse_req *req;
828c090c8abSBernd Schubert 
829c090c8abSBernd Schubert retry:
830c090c8abSBernd Schubert 	spin_lock(&queue->lock);
831c090c8abSBernd Schubert 	fuse_uring_ent_avail(ent, queue);
832c090c8abSBernd Schubert 	req = fuse_uring_ent_assign_req(ent);
833c090c8abSBernd Schubert 	spin_unlock(&queue->lock);
834c090c8abSBernd Schubert 
835c090c8abSBernd Schubert 	if (req) {
836c090c8abSBernd Schubert 		err = fuse_uring_send_next_to_ring(ent, req, issue_flags);
837c090c8abSBernd Schubert 		if (err)
838c090c8abSBernd Schubert 			goto retry;
839c090c8abSBernd Schubert 	}
840c090c8abSBernd Schubert }
841c090c8abSBernd Schubert 
fuse_ring_ent_set_commit(struct fuse_ring_ent * ent)842c090c8abSBernd Schubert static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent)
843c090c8abSBernd Schubert {
844c090c8abSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
845c090c8abSBernd Schubert 
846c090c8abSBernd Schubert 	lockdep_assert_held(&queue->lock);
847c090c8abSBernd Schubert 
848c090c8abSBernd Schubert 	if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE))
849c090c8abSBernd Schubert 		return -EIO;
850c090c8abSBernd Schubert 
851c090c8abSBernd Schubert 	ent->state = FRRS_COMMIT;
852c090c8abSBernd Schubert 	list_move(&ent->list, &queue->ent_commit_queue);
853c090c8abSBernd Schubert 
854c090c8abSBernd Schubert 	return 0;
855c090c8abSBernd Schubert }
856c090c8abSBernd Schubert 
857c090c8abSBernd Schubert /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */
fuse_uring_commit_fetch(struct io_uring_cmd * cmd,int issue_flags,struct fuse_conn * fc)858c090c8abSBernd Schubert static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
859c090c8abSBernd Schubert 				   struct fuse_conn *fc)
860c090c8abSBernd Schubert {
861c090c8abSBernd Schubert 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
862c090c8abSBernd Schubert 	struct fuse_ring_ent *ent;
863c090c8abSBernd Schubert 	int err;
864c090c8abSBernd Schubert 	struct fuse_ring *ring = fc->ring;
865c090c8abSBernd Schubert 	struct fuse_ring_queue *queue;
866c090c8abSBernd Schubert 	uint64_t commit_id = READ_ONCE(cmd_req->commit_id);
867c090c8abSBernd Schubert 	unsigned int qid = READ_ONCE(cmd_req->qid);
868c090c8abSBernd Schubert 	struct fuse_pqueue *fpq;
869c090c8abSBernd Schubert 	struct fuse_req *req;
870c090c8abSBernd Schubert 
871c090c8abSBernd Schubert 	err = -ENOTCONN;
872c090c8abSBernd Schubert 	if (!ring)
873c090c8abSBernd Schubert 		return err;
874c090c8abSBernd Schubert 
875c090c8abSBernd Schubert 	if (qid >= ring->nr_queues)
876c090c8abSBernd Schubert 		return -EINVAL;
877c090c8abSBernd Schubert 
878c090c8abSBernd Schubert 	queue = ring->queues[qid];
879c090c8abSBernd Schubert 	if (!queue)
880c090c8abSBernd Schubert 		return err;
881c090c8abSBernd Schubert 	fpq = &queue->fpq;
882c090c8abSBernd Schubert 
8834a9bfb9bSBernd Schubert 	if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped))
8844a9bfb9bSBernd Schubert 		return err;
8854a9bfb9bSBernd Schubert 
886c090c8abSBernd Schubert 	spin_lock(&queue->lock);
887c090c8abSBernd Schubert 	/* Find a request based on the unique ID of the fuse request
888c090c8abSBernd Schubert 	 * This should get revised, as it needs a hash calculation and list
889c090c8abSBernd Schubert 	 * search. And full struct fuse_pqueue is needed (memory overhead).
890c090c8abSBernd Schubert 	 * As well as the link from req to ring_ent.
891c090c8abSBernd Schubert 	 */
892c090c8abSBernd Schubert 	req = fuse_request_find(fpq, commit_id);
893c090c8abSBernd Schubert 	err = -ENOENT;
894c090c8abSBernd Schubert 	if (!req) {
895c090c8abSBernd Schubert 		pr_info("qid=%d commit_id %llu not found\n", queue->qid,
896c090c8abSBernd Schubert 			commit_id);
897c090c8abSBernd Schubert 		spin_unlock(&queue->lock);
898c090c8abSBernd Schubert 		return err;
899c090c8abSBernd Schubert 	}
900c090c8abSBernd Schubert 	list_del_init(&req->list);
901c090c8abSBernd Schubert 	ent = req->ring_entry;
902c090c8abSBernd Schubert 	req->ring_entry = NULL;
903c090c8abSBernd Schubert 
904c090c8abSBernd Schubert 	err = fuse_ring_ent_set_commit(ent);
905c090c8abSBernd Schubert 	if (err != 0) {
906c090c8abSBernd Schubert 		pr_info_ratelimited("qid=%d commit_id %llu state %d",
907c090c8abSBernd Schubert 				    queue->qid, commit_id, ent->state);
908c090c8abSBernd Schubert 		spin_unlock(&queue->lock);
909c090c8abSBernd Schubert 		req->out.h.error = err;
910c090c8abSBernd Schubert 		clear_bit(FR_SENT, &req->flags);
911c090c8abSBernd Schubert 		fuse_request_end(req);
912c090c8abSBernd Schubert 		return err;
913c090c8abSBernd Schubert 	}
914c090c8abSBernd Schubert 
915c090c8abSBernd Schubert 	ent->cmd = cmd;
916c090c8abSBernd Schubert 	spin_unlock(&queue->lock);
917c090c8abSBernd Schubert 
918c090c8abSBernd Schubert 	/* without the queue lock, as other locks are taken */
919b6236c84SBernd Schubert 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
920c090c8abSBernd Schubert 	fuse_uring_commit(ent, req, issue_flags);
921c090c8abSBernd Schubert 
922c090c8abSBernd Schubert 	/*
923c090c8abSBernd Schubert 	 * Fetching the next request is absolutely required as queued
924c090c8abSBernd Schubert 	 * fuse requests would otherwise not get processed - committing
925c090c8abSBernd Schubert 	 * and fetching is done in one step vs legacy fuse, which has separated
926c090c8abSBernd Schubert 	 * read (fetch request) and write (commit result).
927c090c8abSBernd Schubert 	 */
928c090c8abSBernd Schubert 	fuse_uring_next_fuse_req(ent, queue, issue_flags);
929c090c8abSBernd Schubert 	return 0;
930c090c8abSBernd Schubert }
931c090c8abSBernd Schubert 
is_ring_ready(struct fuse_ring * ring,int current_qid)932c2c9af9aSBernd Schubert static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
933c2c9af9aSBernd Schubert {
934c2c9af9aSBernd Schubert 	int qid;
935c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue;
936c2c9af9aSBernd Schubert 	bool ready = true;
937c2c9af9aSBernd Schubert 
938c2c9af9aSBernd Schubert 	for (qid = 0; qid < ring->nr_queues && ready; qid++) {
939c2c9af9aSBernd Schubert 		if (current_qid == qid)
940c2c9af9aSBernd Schubert 			continue;
941c2c9af9aSBernd Schubert 
942c2c9af9aSBernd Schubert 		queue = ring->queues[qid];
943c2c9af9aSBernd Schubert 		if (!queue) {
944c2c9af9aSBernd Schubert 			ready = false;
945c2c9af9aSBernd Schubert 			break;
946c2c9af9aSBernd Schubert 		}
947c2c9af9aSBernd Schubert 
948c2c9af9aSBernd Schubert 		spin_lock(&queue->lock);
949c2c9af9aSBernd Schubert 		if (list_empty(&queue->ent_avail_queue))
950c2c9af9aSBernd Schubert 			ready = false;
951c2c9af9aSBernd Schubert 		spin_unlock(&queue->lock);
952c2c9af9aSBernd Schubert 	}
953c2c9af9aSBernd Schubert 
954c2c9af9aSBernd Schubert 	return ready;
955c2c9af9aSBernd Schubert }
956c2c9af9aSBernd Schubert 
95724fe962cSBernd Schubert /*
95824fe962cSBernd Schubert  * fuse_uring_req_fetch command handling
95924fe962cSBernd Schubert  */
fuse_uring_do_register(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,unsigned int issue_flags)96024fe962cSBernd Schubert static void fuse_uring_do_register(struct fuse_ring_ent *ent,
96124fe962cSBernd Schubert 				   struct io_uring_cmd *cmd,
96224fe962cSBernd Schubert 				   unsigned int issue_flags)
96324fe962cSBernd Schubert {
96424fe962cSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
965c2c9af9aSBernd Schubert 	struct fuse_ring *ring = queue->ring;
966c2c9af9aSBernd Schubert 	struct fuse_conn *fc = ring->fc;
967c2c9af9aSBernd Schubert 	struct fuse_iqueue *fiq = &fc->iq;
96824fe962cSBernd Schubert 
969b6236c84SBernd Schubert 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
970b6236c84SBernd Schubert 
97124fe962cSBernd Schubert 	spin_lock(&queue->lock);
97224fe962cSBernd Schubert 	ent->cmd = cmd;
97324fe962cSBernd Schubert 	fuse_uring_ent_avail(ent, queue);
97424fe962cSBernd Schubert 	spin_unlock(&queue->lock);
975c2c9af9aSBernd Schubert 
976c2c9af9aSBernd Schubert 	if (!ring->ready) {
977c2c9af9aSBernd Schubert 		bool ready = is_ring_ready(ring, queue->qid);
978c2c9af9aSBernd Schubert 
979c2c9af9aSBernd Schubert 		if (ready) {
980c2c9af9aSBernd Schubert 			WRITE_ONCE(fiq->ops, &fuse_io_uring_ops);
981c2c9af9aSBernd Schubert 			WRITE_ONCE(ring->ready, true);
9823393ff96SBernd Schubert 			wake_up_all(&fc->blocked_waitq);
983c2c9af9aSBernd Schubert 		}
984c2c9af9aSBernd Schubert 	}
98524fe962cSBernd Schubert }
98624fe962cSBernd Schubert 
98724fe962cSBernd Schubert /*
98824fe962cSBernd Schubert  * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
98924fe962cSBernd Schubert  * the payload
99024fe962cSBernd Schubert  */
fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe * sqe,struct iovec iov[FUSE_URING_IOV_SEGS])99124fe962cSBernd Schubert static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
99224fe962cSBernd Schubert 					 struct iovec iov[FUSE_URING_IOV_SEGS])
99324fe962cSBernd Schubert {
99424fe962cSBernd Schubert 	struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
99524fe962cSBernd Schubert 	struct iov_iter iter;
99624fe962cSBernd Schubert 	ssize_t ret;
99724fe962cSBernd Schubert 
99824fe962cSBernd Schubert 	if (sqe->len != FUSE_URING_IOV_SEGS)
99924fe962cSBernd Schubert 		return -EINVAL;
100024fe962cSBernd Schubert 
100124fe962cSBernd Schubert 	/*
100224fe962cSBernd Schubert 	 * Direction for buffer access will actually be READ and WRITE,
100324fe962cSBernd Schubert 	 * using write for the import should include READ access as well.
100424fe962cSBernd Schubert 	 */
100524fe962cSBernd Schubert 	ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
100624fe962cSBernd Schubert 			   FUSE_URING_IOV_SEGS, &iov, &iter);
100724fe962cSBernd Schubert 	if (ret < 0)
100824fe962cSBernd Schubert 		return ret;
100924fe962cSBernd Schubert 
101024fe962cSBernd Schubert 	return 0;
101124fe962cSBernd Schubert }
101224fe962cSBernd Schubert 
101324fe962cSBernd Schubert static struct fuse_ring_ent *
fuse_uring_create_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_queue * queue)101424fe962cSBernd Schubert fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
101524fe962cSBernd Schubert 			   struct fuse_ring_queue *queue)
101624fe962cSBernd Schubert {
101724fe962cSBernd Schubert 	struct fuse_ring *ring = queue->ring;
101824fe962cSBernd Schubert 	struct fuse_ring_ent *ent;
101924fe962cSBernd Schubert 	size_t payload_size;
102024fe962cSBernd Schubert 	struct iovec iov[FUSE_URING_IOV_SEGS];
102124fe962cSBernd Schubert 	int err;
102224fe962cSBernd Schubert 
102324fe962cSBernd Schubert 	err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
102424fe962cSBernd Schubert 	if (err) {
102524fe962cSBernd Schubert 		pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
102624fe962cSBernd Schubert 				    err);
102724fe962cSBernd Schubert 		return ERR_PTR(err);
102824fe962cSBernd Schubert 	}
102924fe962cSBernd Schubert 
103024fe962cSBernd Schubert 	err = -EINVAL;
103124fe962cSBernd Schubert 	if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
103224fe962cSBernd Schubert 		pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
103324fe962cSBernd Schubert 		return ERR_PTR(err);
103424fe962cSBernd Schubert 	}
103524fe962cSBernd Schubert 
103624fe962cSBernd Schubert 	payload_size = iov[1].iov_len;
103724fe962cSBernd Schubert 	if (payload_size < ring->max_payload_sz) {
103824fe962cSBernd Schubert 		pr_info_ratelimited("Invalid req payload len %zu\n",
103924fe962cSBernd Schubert 				    payload_size);
104024fe962cSBernd Schubert 		return ERR_PTR(err);
104124fe962cSBernd Schubert 	}
104224fe962cSBernd Schubert 
104324fe962cSBernd Schubert 	err = -ENOMEM;
104424fe962cSBernd Schubert 	ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
104524fe962cSBernd Schubert 	if (!ent)
104624fe962cSBernd Schubert 		return ERR_PTR(err);
104724fe962cSBernd Schubert 
104824fe962cSBernd Schubert 	INIT_LIST_HEAD(&ent->list);
104924fe962cSBernd Schubert 
105024fe962cSBernd Schubert 	ent->queue = queue;
105124fe962cSBernd Schubert 	ent->headers = iov[0].iov_base;
105224fe962cSBernd Schubert 	ent->payload = iov[1].iov_base;
105324fe962cSBernd Schubert 
10544a9bfb9bSBernd Schubert 	atomic_inc(&ring->queue_refs);
105524fe962cSBernd Schubert 	return ent;
105624fe962cSBernd Schubert }
105724fe962cSBernd Schubert 
105824fe962cSBernd Schubert /*
105924fe962cSBernd Schubert  * Register header and payload buffer with the kernel and puts the
106024fe962cSBernd Schubert  * entry as "ready to get fuse requests" on the queue
106124fe962cSBernd Schubert  */
fuse_uring_register(struct io_uring_cmd * cmd,unsigned int issue_flags,struct fuse_conn * fc)106224fe962cSBernd Schubert static int fuse_uring_register(struct io_uring_cmd *cmd,
106324fe962cSBernd Schubert 			       unsigned int issue_flags, struct fuse_conn *fc)
106424fe962cSBernd Schubert {
106524fe962cSBernd Schubert 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
1066*1dfe2a22SJoanne Koong 	struct fuse_ring *ring = smp_load_acquire(&fc->ring);
106724fe962cSBernd Schubert 	struct fuse_ring_queue *queue;
106824fe962cSBernd Schubert 	struct fuse_ring_ent *ent;
106924fe962cSBernd Schubert 	int err;
107024fe962cSBernd Schubert 	unsigned int qid = READ_ONCE(cmd_req->qid);
107124fe962cSBernd Schubert 
107224fe962cSBernd Schubert 	err = -ENOMEM;
107324fe962cSBernd Schubert 	if (!ring) {
107424fe962cSBernd Schubert 		ring = fuse_uring_create(fc);
107524fe962cSBernd Schubert 		if (!ring)
107624fe962cSBernd Schubert 			return err;
107724fe962cSBernd Schubert 	}
107824fe962cSBernd Schubert 
107924fe962cSBernd Schubert 	if (qid >= ring->nr_queues) {
108024fe962cSBernd Schubert 		pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
108124fe962cSBernd Schubert 		return -EINVAL;
108224fe962cSBernd Schubert 	}
108324fe962cSBernd Schubert 
108424fe962cSBernd Schubert 	queue = ring->queues[qid];
108524fe962cSBernd Schubert 	if (!queue) {
108624fe962cSBernd Schubert 		queue = fuse_uring_create_queue(ring, qid);
108724fe962cSBernd Schubert 		if (!queue)
108824fe962cSBernd Schubert 			return err;
108924fe962cSBernd Schubert 	}
109024fe962cSBernd Schubert 
109124fe962cSBernd Schubert 	/*
109224fe962cSBernd Schubert 	 * The created queue above does not need to be destructed in
109324fe962cSBernd Schubert 	 * case of entry errors below, will be done at ring destruction time.
109424fe962cSBernd Schubert 	 */
109524fe962cSBernd Schubert 
109624fe962cSBernd Schubert 	ent = fuse_uring_create_ring_ent(cmd, queue);
109724fe962cSBernd Schubert 	if (IS_ERR(ent))
109824fe962cSBernd Schubert 		return PTR_ERR(ent);
109924fe962cSBernd Schubert 
110024fe962cSBernd Schubert 	fuse_uring_do_register(ent, cmd, issue_flags);
110124fe962cSBernd Schubert 
110224fe962cSBernd Schubert 	return 0;
110324fe962cSBernd Schubert }
110424fe962cSBernd Schubert 
110524fe962cSBernd Schubert /*
110624fe962cSBernd Schubert  * Entry function from io_uring to handle the given passthrough command
110724fe962cSBernd Schubert  * (op code IORING_OP_URING_CMD)
110824fe962cSBernd Schubert  */
fuse_uring_cmd(struct io_uring_cmd * cmd,unsigned int issue_flags)1109786412a7SBernd Schubert int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
111024fe962cSBernd Schubert {
111124fe962cSBernd Schubert 	struct fuse_dev *fud;
111224fe962cSBernd Schubert 	struct fuse_conn *fc;
111324fe962cSBernd Schubert 	u32 cmd_op = cmd->cmd_op;
111424fe962cSBernd Schubert 	int err;
111524fe962cSBernd Schubert 
1116b6236c84SBernd Schubert 	if ((unlikely(issue_flags & IO_URING_F_CANCEL))) {
1117b6236c84SBernd Schubert 		fuse_uring_cancel(cmd, issue_flags);
1118b6236c84SBernd Schubert 		return 0;
1119b6236c84SBernd Schubert 	}
1120b6236c84SBernd Schubert 
112124fe962cSBernd Schubert 	/* This extra SQE size holds struct fuse_uring_cmd_req */
112224fe962cSBernd Schubert 	if (!(issue_flags & IO_URING_F_SQE128))
112324fe962cSBernd Schubert 		return -EINVAL;
112424fe962cSBernd Schubert 
112524fe962cSBernd Schubert 	fud = fuse_get_dev(cmd->file);
112624fe962cSBernd Schubert 	if (!fud) {
112724fe962cSBernd Schubert 		pr_info_ratelimited("No fuse device found\n");
112824fe962cSBernd Schubert 		return -ENOTCONN;
112924fe962cSBernd Schubert 	}
113024fe962cSBernd Schubert 	fc = fud->fc;
113124fe962cSBernd Schubert 
11322d4fde59SBernd Schubert 	/* Once a connection has io-uring enabled on it, it can't be disabled */
11332d4fde59SBernd Schubert 	if (!enable_uring && !fc->io_uring) {
11342d4fde59SBernd Schubert 		pr_info_ratelimited("fuse-io-uring is disabled\n");
11352d4fde59SBernd Schubert 		return -EOPNOTSUPP;
11362d4fde59SBernd Schubert 	}
11372d4fde59SBernd Schubert 
113824fe962cSBernd Schubert 	if (fc->aborted)
113924fe962cSBernd Schubert 		return -ECONNABORTED;
114024fe962cSBernd Schubert 	if (!fc->connected)
114124fe962cSBernd Schubert 		return -ENOTCONN;
114224fe962cSBernd Schubert 
114324fe962cSBernd Schubert 	/*
114424fe962cSBernd Schubert 	 * fuse_uring_register() needs the ring to be initialized,
114524fe962cSBernd Schubert 	 * we need to know the max payload size
114624fe962cSBernd Schubert 	 */
114724fe962cSBernd Schubert 	if (!fc->initialized)
114824fe962cSBernd Schubert 		return -EAGAIN;
114924fe962cSBernd Schubert 
115024fe962cSBernd Schubert 	switch (cmd_op) {
115124fe962cSBernd Schubert 	case FUSE_IO_URING_CMD_REGISTER:
115224fe962cSBernd Schubert 		err = fuse_uring_register(cmd, issue_flags, fc);
115324fe962cSBernd Schubert 		if (err) {
115424fe962cSBernd Schubert 			pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
115524fe962cSBernd Schubert 				     err);
11563393ff96SBernd Schubert 			fc->io_uring = 0;
11573393ff96SBernd Schubert 			wake_up_all(&fc->blocked_waitq);
115824fe962cSBernd Schubert 			return err;
115924fe962cSBernd Schubert 		}
116024fe962cSBernd Schubert 		break;
1161c090c8abSBernd Schubert 	case FUSE_IO_URING_CMD_COMMIT_AND_FETCH:
1162c090c8abSBernd Schubert 		err = fuse_uring_commit_fetch(cmd, issue_flags, fc);
1163c090c8abSBernd Schubert 		if (err) {
1164c090c8abSBernd Schubert 			pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n",
1165c090c8abSBernd Schubert 				     err);
1166c090c8abSBernd Schubert 			return err;
1167c090c8abSBernd Schubert 		}
1168c090c8abSBernd Schubert 		break;
116924fe962cSBernd Schubert 	default:
117024fe962cSBernd Schubert 		return -EINVAL;
117124fe962cSBernd Schubert 	}
117224fe962cSBernd Schubert 
117324fe962cSBernd Schubert 	return -EIOCBQUEUED;
117424fe962cSBernd Schubert }
1175c2c9af9aSBernd Schubert 
fuse_uring_send(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,ssize_t ret,unsigned int issue_flags)1176c2c9af9aSBernd Schubert static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
1177c2c9af9aSBernd Schubert 			    ssize_t ret, unsigned int issue_flags)
1178c2c9af9aSBernd Schubert {
1179c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
1180c2c9af9aSBernd Schubert 
1181c2c9af9aSBernd Schubert 	spin_lock(&queue->lock);
1182c2c9af9aSBernd Schubert 	ent->state = FRRS_USERSPACE;
1183c2c9af9aSBernd Schubert 	list_move(&ent->list, &queue->ent_in_userspace);
1184c2c9af9aSBernd Schubert 	ent->cmd = NULL;
1185c2c9af9aSBernd Schubert 	spin_unlock(&queue->lock);
1186c2c9af9aSBernd Schubert 
1187c2c9af9aSBernd Schubert 	io_uring_cmd_done(cmd, ret, 0, issue_flags);
1188c2c9af9aSBernd Schubert }
1189c2c9af9aSBernd Schubert 
1190c2c9af9aSBernd Schubert /*
1191c2c9af9aSBernd Schubert  * This prepares and sends the ring request in fuse-uring task context.
1192c2c9af9aSBernd Schubert  * User buffers are not mapped yet - the application does not have permission
1193c2c9af9aSBernd Schubert  * to write to it - this has to be executed in ring task context.
1194c2c9af9aSBernd Schubert  */
fuse_uring_send_in_task(struct io_uring_cmd * cmd,unsigned int issue_flags)1195c2c9af9aSBernd Schubert static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
1196c2c9af9aSBernd Schubert 				    unsigned int issue_flags)
1197c2c9af9aSBernd Schubert {
1198c2c9af9aSBernd Schubert 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
1199c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue = ent->queue;
1200c2c9af9aSBernd Schubert 	int err;
1201c2c9af9aSBernd Schubert 
1202c2c9af9aSBernd Schubert 	if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
1203c2c9af9aSBernd Schubert 		err = fuse_uring_prepare_send(ent, ent->fuse_req);
1204c2c9af9aSBernd Schubert 		if (err) {
1205c2c9af9aSBernd Schubert 			fuse_uring_next_fuse_req(ent, queue, issue_flags);
1206c2c9af9aSBernd Schubert 			return;
1207c2c9af9aSBernd Schubert 		}
1208c2c9af9aSBernd Schubert 	} else {
1209c2c9af9aSBernd Schubert 		err = -ECANCELED;
1210c2c9af9aSBernd Schubert 	}
1211c2c9af9aSBernd Schubert 
1212c2c9af9aSBernd Schubert 	fuse_uring_send(ent, cmd, err, issue_flags);
1213c2c9af9aSBernd Schubert }
1214c2c9af9aSBernd Schubert 
fuse_uring_task_to_queue(struct fuse_ring * ring)1215c2c9af9aSBernd Schubert static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
1216c2c9af9aSBernd Schubert {
1217c2c9af9aSBernd Schubert 	unsigned int qid;
1218c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue;
1219c2c9af9aSBernd Schubert 
1220c2c9af9aSBernd Schubert 	qid = task_cpu(current);
1221c2c9af9aSBernd Schubert 
1222c2c9af9aSBernd Schubert 	if (WARN_ONCE(qid >= ring->nr_queues,
1223c2c9af9aSBernd Schubert 		      "Core number (%u) exceeds nr queues (%zu)\n", qid,
1224c2c9af9aSBernd Schubert 		      ring->nr_queues))
1225c2c9af9aSBernd Schubert 		qid = 0;
1226c2c9af9aSBernd Schubert 
1227c2c9af9aSBernd Schubert 	queue = ring->queues[qid];
1228c2c9af9aSBernd Schubert 	WARN_ONCE(!queue, "Missing queue for qid %d\n", qid);
1229c2c9af9aSBernd Schubert 
1230c2c9af9aSBernd Schubert 	return queue;
1231c2c9af9aSBernd Schubert }
1232c2c9af9aSBernd Schubert 
fuse_uring_dispatch_ent(struct fuse_ring_ent * ent)1233c2c9af9aSBernd Schubert static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent)
1234c2c9af9aSBernd Schubert {
1235c2c9af9aSBernd Schubert 	struct io_uring_cmd *cmd = ent->cmd;
1236c2c9af9aSBernd Schubert 
1237c2c9af9aSBernd Schubert 	uring_cmd_set_ring_ent(cmd, ent);
1238c2c9af9aSBernd Schubert 	io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task);
1239c2c9af9aSBernd Schubert }
1240c2c9af9aSBernd Schubert 
1241c2c9af9aSBernd Schubert /* queue a fuse request and send it if a ring entry is available */
fuse_uring_queue_fuse_req(struct fuse_iqueue * fiq,struct fuse_req * req)1242c2c9af9aSBernd Schubert void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
1243c2c9af9aSBernd Schubert {
1244c2c9af9aSBernd Schubert 	struct fuse_conn *fc = req->fm->fc;
1245c2c9af9aSBernd Schubert 	struct fuse_ring *ring = fc->ring;
1246c2c9af9aSBernd Schubert 	struct fuse_ring_queue *queue;
1247c2c9af9aSBernd Schubert 	struct fuse_ring_ent *ent = NULL;
1248c2c9af9aSBernd Schubert 	int err;
1249c2c9af9aSBernd Schubert 
1250c2c9af9aSBernd Schubert 	err = -EINVAL;
1251c2c9af9aSBernd Schubert 	queue = fuse_uring_task_to_queue(ring);
1252c2c9af9aSBernd Schubert 	if (!queue)
1253c2c9af9aSBernd Schubert 		goto err;
1254c2c9af9aSBernd Schubert 
1255c2c9af9aSBernd Schubert 	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
1256c2c9af9aSBernd Schubert 		req->in.h.unique = fuse_get_unique(fiq);
1257c2c9af9aSBernd Schubert 
1258c2c9af9aSBernd Schubert 	spin_lock(&queue->lock);
1259c2c9af9aSBernd Schubert 	err = -ENOTCONN;
1260c2c9af9aSBernd Schubert 	if (unlikely(queue->stopped))
1261c2c9af9aSBernd Schubert 		goto err_unlock;
1262c2c9af9aSBernd Schubert 
126309098e62SBernd Schubert 	set_bit(FR_URING, &req->flags);
126409098e62SBernd Schubert 	req->ring_queue = queue;
1265c2c9af9aSBernd Schubert 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1266c2c9af9aSBernd Schubert 				       struct fuse_ring_ent, list);
1267c2c9af9aSBernd Schubert 	if (ent)
1268c2c9af9aSBernd Schubert 		fuse_uring_add_req_to_ring_ent(ent, req);
1269c2c9af9aSBernd Schubert 	else
1270c2c9af9aSBernd Schubert 		list_add_tail(&req->list, &queue->fuse_req_queue);
1271c2c9af9aSBernd Schubert 	spin_unlock(&queue->lock);
1272c2c9af9aSBernd Schubert 
1273c2c9af9aSBernd Schubert 	if (ent)
1274c2c9af9aSBernd Schubert 		fuse_uring_dispatch_ent(ent);
1275c2c9af9aSBernd Schubert 
1276c2c9af9aSBernd Schubert 	return;
1277c2c9af9aSBernd Schubert 
1278c2c9af9aSBernd Schubert err_unlock:
1279c2c9af9aSBernd Schubert 	spin_unlock(&queue->lock);
1280c2c9af9aSBernd Schubert err:
1281c2c9af9aSBernd Schubert 	req->out.h.error = err;
1282c2c9af9aSBernd Schubert 	clear_bit(FR_PENDING, &req->flags);
1283c2c9af9aSBernd Schubert 	fuse_request_end(req);
1284c2c9af9aSBernd Schubert }
1285c2c9af9aSBernd Schubert 
fuse_uring_queue_bq_req(struct fuse_req * req)1286857b0263SBernd Schubert bool fuse_uring_queue_bq_req(struct fuse_req *req)
1287857b0263SBernd Schubert {
1288857b0263SBernd Schubert 	struct fuse_conn *fc = req->fm->fc;
1289857b0263SBernd Schubert 	struct fuse_ring *ring = fc->ring;
1290857b0263SBernd Schubert 	struct fuse_ring_queue *queue;
1291857b0263SBernd Schubert 	struct fuse_ring_ent *ent = NULL;
1292857b0263SBernd Schubert 
1293857b0263SBernd Schubert 	queue = fuse_uring_task_to_queue(ring);
1294857b0263SBernd Schubert 	if (!queue)
1295857b0263SBernd Schubert 		return false;
1296857b0263SBernd Schubert 
1297857b0263SBernd Schubert 	spin_lock(&queue->lock);
1298857b0263SBernd Schubert 	if (unlikely(queue->stopped)) {
1299857b0263SBernd Schubert 		spin_unlock(&queue->lock);
1300857b0263SBernd Schubert 		return false;
1301857b0263SBernd Schubert 	}
1302857b0263SBernd Schubert 
130309098e62SBernd Schubert 	set_bit(FR_URING, &req->flags);
130409098e62SBernd Schubert 	req->ring_queue = queue;
1305857b0263SBernd Schubert 	list_add_tail(&req->list, &queue->fuse_req_bg_queue);
1306857b0263SBernd Schubert 
1307857b0263SBernd Schubert 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1308857b0263SBernd Schubert 				       struct fuse_ring_ent, list);
1309857b0263SBernd Schubert 	spin_lock(&fc->bg_lock);
1310857b0263SBernd Schubert 	fc->num_background++;
1311857b0263SBernd Schubert 	if (fc->num_background == fc->max_background)
1312857b0263SBernd Schubert 		fc->blocked = 1;
1313857b0263SBernd Schubert 	fuse_uring_flush_bg(queue);
1314857b0263SBernd Schubert 	spin_unlock(&fc->bg_lock);
1315857b0263SBernd Schubert 
1316857b0263SBernd Schubert 	/*
1317857b0263SBernd Schubert 	 * Due to bg_queue flush limits there might be other bg requests
1318857b0263SBernd Schubert 	 * in the queue that need to be handled first. Or no further req
1319857b0263SBernd Schubert 	 * might be available.
1320857b0263SBernd Schubert 	 */
1321857b0263SBernd Schubert 	req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req,
1322857b0263SBernd Schubert 				       list);
1323857b0263SBernd Schubert 	if (ent && req) {
1324857b0263SBernd Schubert 		fuse_uring_add_req_to_ring_ent(ent, req);
1325857b0263SBernd Schubert 		spin_unlock(&queue->lock);
1326857b0263SBernd Schubert 
1327857b0263SBernd Schubert 		fuse_uring_dispatch_ent(ent);
1328857b0263SBernd Schubert 	} else {
1329857b0263SBernd Schubert 		spin_unlock(&queue->lock);
1330857b0263SBernd Schubert 	}
1331857b0263SBernd Schubert 
1332857b0263SBernd Schubert 	return true;
1333857b0263SBernd Schubert }
1334857b0263SBernd Schubert 
fuse_uring_remove_pending_req(struct fuse_req * req)133509098e62SBernd Schubert bool fuse_uring_remove_pending_req(struct fuse_req *req)
133609098e62SBernd Schubert {
133709098e62SBernd Schubert 	struct fuse_ring_queue *queue = req->ring_queue;
133809098e62SBernd Schubert 
133909098e62SBernd Schubert 	return fuse_remove_pending_req(req, &queue->lock);
134009098e62SBernd Schubert }
134109098e62SBernd Schubert 
1342c2c9af9aSBernd Schubert static const struct fuse_iqueue_ops fuse_io_uring_ops = {
1343c2c9af9aSBernd Schubert 	/* should be send over io-uring as enhancement */
1344c2c9af9aSBernd Schubert 	.send_forget = fuse_dev_queue_forget,
1345c2c9af9aSBernd Schubert 
1346c2c9af9aSBernd Schubert 	/*
1347c2c9af9aSBernd Schubert 	 * could be send over io-uring, but interrupts should be rare,
1348c2c9af9aSBernd Schubert 	 * no need to make the code complex
1349c2c9af9aSBernd Schubert 	 */
1350c2c9af9aSBernd Schubert 	.send_interrupt = fuse_dev_queue_interrupt,
1351c2c9af9aSBernd Schubert 	.send_req = fuse_uring_queue_fuse_req,
1352c2c9af9aSBernd Schubert };
1353