xref: /linux-6.15/include/linux/blk-mq.h (revision e00a844a)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef BLK_MQ_H
3 #define BLK_MQ_H
4 
5 #include <linux/blkdev.h>
6 #include <linux/sbitmap.h>
7 #include <linux/srcu.h>
8 
9 struct blk_mq_tags;
10 struct blk_flush_queue;
11 
12 struct blk_mq_hw_ctx {
13 	struct {
14 		spinlock_t		lock;
15 		struct list_head	dispatch;
16 		unsigned long		state;		/* BLK_MQ_S_* flags */
17 	} ____cacheline_aligned_in_smp;
18 
19 	struct delayed_work	run_work;
20 	cpumask_var_t		cpumask;
21 	int			next_cpu;
22 	int			next_cpu_batch;
23 
24 	unsigned long		flags;		/* BLK_MQ_F_* flags */
25 
26 	void			*sched_data;
27 	struct request_queue	*queue;
28 	struct blk_flush_queue	*fq;
29 
30 	void			*driver_data;
31 
32 	struct sbitmap		ctx_map;
33 
34 	struct blk_mq_ctx	*dispatch_from;
35 
36 	struct blk_mq_ctx	**ctxs;
37 	unsigned int		nr_ctx;
38 
39 	wait_queue_entry_t	dispatch_wait;
40 	atomic_t		wait_index;
41 
42 	struct blk_mq_tags	*tags;
43 	struct blk_mq_tags	*sched_tags;
44 
45 	unsigned long		queued;
46 	unsigned long		run;
47 #define BLK_MQ_MAX_DISPATCH_ORDER	7
48 	unsigned long		dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
49 
50 	unsigned int		numa_node;
51 	unsigned int		queue_num;
52 
53 	atomic_t		nr_active;
54 
55 	struct hlist_node	cpuhp_dead;
56 	struct kobject		kobj;
57 
58 	unsigned long		poll_considered;
59 	unsigned long		poll_invoked;
60 	unsigned long		poll_success;
61 
62 #ifdef CONFIG_BLK_DEBUG_FS
63 	struct dentry		*debugfs_dir;
64 	struct dentry		*sched_debugfs_dir;
65 #endif
66 
67 	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
68 	struct srcu_struct	queue_rq_srcu[0];
69 };
70 
71 struct blk_mq_tag_set {
72 	unsigned int		*mq_map;
73 	const struct blk_mq_ops	*ops;
74 	unsigned int		nr_hw_queues;
75 	unsigned int		queue_depth;	/* max hw supported */
76 	unsigned int		reserved_tags;
77 	unsigned int		cmd_size;	/* per-request extra data */
78 	int			numa_node;
79 	unsigned int		timeout;
80 	unsigned int		flags;		/* BLK_MQ_F_* */
81 	void			*driver_data;
82 
83 	struct blk_mq_tags	**tags;
84 
85 	struct mutex		tag_list_lock;
86 	struct list_head	tag_list;
87 };
88 
89 struct blk_mq_queue_data {
90 	struct request *rq;
91 	bool last;
92 };
93 
94 typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
95 		const struct blk_mq_queue_data *);
96 typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
97 typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
98 typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
99 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
100 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
101 typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *,
102 		unsigned int, unsigned int);
103 typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *,
104 		unsigned int);
105 
106 typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
107 		bool);
108 typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
109 typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
110 typedef int (map_queues_fn)(struct blk_mq_tag_set *set);
111 
112 
113 struct blk_mq_ops {
114 	/*
115 	 * Queue request
116 	 */
117 	queue_rq_fn		*queue_rq;
118 
119 	/*
120 	 * Reserve budget before queue request, once .queue_rq is
121 	 * run, it is driver's responsibility to release the
122 	 * reserved budget. Also we have to handle failure case
123 	 * of .get_budget for avoiding I/O deadlock.
124 	 */
125 	get_budget_fn		*get_budget;
126 	put_budget_fn		*put_budget;
127 
128 	/*
129 	 * Called on request timeout
130 	 */
131 	timeout_fn		*timeout;
132 
133 	/*
134 	 * Called to poll for completion of a specific tag.
135 	 */
136 	poll_fn			*poll;
137 
138 	softirq_done_fn		*complete;
139 
140 	/*
141 	 * Called when the block layer side of a hardware queue has been
142 	 * set up, allowing the driver to allocate/init matching structures.
143 	 * Ditto for exit/teardown.
144 	 */
145 	init_hctx_fn		*init_hctx;
146 	exit_hctx_fn		*exit_hctx;
147 
148 	/*
149 	 * Called for every command allocated by the block layer to allow
150 	 * the driver to set up driver specific data.
151 	 *
152 	 * Tag greater than or equal to queue_depth is for setting up
153 	 * flush request.
154 	 *
155 	 * Ditto for exit/teardown.
156 	 */
157 	init_request_fn		*init_request;
158 	exit_request_fn		*exit_request;
159 	/* Called from inside blk_get_request() */
160 	void (*initialize_rq_fn)(struct request *rq);
161 
162 	map_queues_fn		*map_queues;
163 
164 #ifdef CONFIG_BLK_DEBUG_FS
165 	/*
166 	 * Used by the debugfs implementation to show driver-specific
167 	 * information about a request.
168 	 */
169 	void (*show_rq)(struct seq_file *m, struct request *rq);
170 #endif
171 };
172 
173 enum {
174 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
175 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
176 	BLK_MQ_F_SG_MERGE	= 1 << 2,
177 	BLK_MQ_F_BLOCKING	= 1 << 5,
178 	BLK_MQ_F_NO_SCHED	= 1 << 6,
179 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
180 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
181 
182 	BLK_MQ_S_STOPPED	= 0,
183 	BLK_MQ_S_TAG_ACTIVE	= 1,
184 	BLK_MQ_S_SCHED_RESTART	= 2,
185 	BLK_MQ_S_START_ON_RUN	= 3,
186 
187 	BLK_MQ_MAX_DEPTH	= 10240,
188 
189 	BLK_MQ_CPU_WORK_BATCH	= 8,
190 };
191 #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
192 	((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
193 		((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
194 #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
195 	((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
196 		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
197 
198 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
199 struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
200 						  struct request_queue *q);
201 int blk_mq_register_dev(struct device *, struct request_queue *);
202 void blk_mq_unregister_dev(struct device *, struct request_queue *);
203 
204 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
205 void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
206 
207 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
208 
209 void blk_mq_free_request(struct request *rq);
210 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
211 
212 enum {
213 	/* return when out of requests */
214 	BLK_MQ_REQ_NOWAIT	= (__force blk_mq_req_flags_t)(1 << 0),
215 	/* allocate from reserved pool */
216 	BLK_MQ_REQ_RESERVED	= (__force blk_mq_req_flags_t)(1 << 1),
217 	/* allocate internal/sched tag */
218 	BLK_MQ_REQ_INTERNAL	= (__force blk_mq_req_flags_t)(1 << 2),
219 	/* set RQF_PREEMPT */
220 	BLK_MQ_REQ_PREEMPT	= (__force blk_mq_req_flags_t)(1 << 3),
221 };
222 
223 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
224 		blk_mq_req_flags_t flags);
225 struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
226 		unsigned int op, blk_mq_req_flags_t flags,
227 		unsigned int hctx_idx);
228 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
229 
230 enum {
231 	BLK_MQ_UNIQUE_TAG_BITS = 16,
232 	BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1,
233 };
234 
235 u32 blk_mq_unique_tag(struct request *rq);
236 
237 static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag)
238 {
239 	return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS;
240 }
241 
242 static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
243 {
244 	return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
245 }
246 
247 
248 int blk_mq_request_started(struct request *rq);
249 void blk_mq_start_request(struct request *rq);
250 void blk_mq_end_request(struct request *rq, blk_status_t error);
251 void __blk_mq_end_request(struct request *rq, blk_status_t error);
252 
253 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
254 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
255 				bool kick_requeue_list);
256 void blk_mq_kick_requeue_list(struct request_queue *q);
257 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
258 void blk_mq_complete_request(struct request *rq);
259 
260 bool blk_mq_queue_stopped(struct request_queue *q);
261 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
262 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
263 void blk_mq_stop_hw_queues(struct request_queue *q);
264 void blk_mq_start_hw_queues(struct request_queue *q);
265 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
266 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
267 void blk_mq_quiesce_queue(struct request_queue *q);
268 void blk_mq_unquiesce_queue(struct request_queue *q);
269 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
270 bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
271 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
272 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
273 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
274 		busy_tag_iter_fn *fn, void *priv);
275 void blk_mq_freeze_queue(struct request_queue *q);
276 void blk_mq_unfreeze_queue(struct request_queue *q);
277 void blk_freeze_queue_start(struct request_queue *q);
278 void blk_mq_freeze_queue_wait(struct request_queue *q);
279 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
280 				     unsigned long timeout);
281 int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
282 		int (reinit_request)(void *, struct request *));
283 
284 int blk_mq_map_queues(struct blk_mq_tag_set *set);
285 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
286 
287 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
288 
289 /*
290  * Driver command data is immediately after the request. So subtract request
291  * size to get back to the original request, add request size to get the PDU.
292  */
293 static inline struct request *blk_mq_rq_from_pdu(void *pdu)
294 {
295 	return pdu - sizeof(struct request);
296 }
297 static inline void *blk_mq_rq_to_pdu(struct request *rq)
298 {
299 	return rq + 1;
300 }
301 
302 #define queue_for_each_hw_ctx(q, hctx, i)				\
303 	for ((i) = 0; (i) < (q)->nr_hw_queues &&			\
304 	     ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
305 
306 #define hctx_for_each_ctx(hctx, ctx, i)					\
307 	for ((i) = 0; (i) < (hctx)->nr_ctx &&				\
308 	     ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
309 
310 #endif
311