1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef BLK_MQ_H 3 #define BLK_MQ_H 4 5 #include <linux/blkdev.h> 6 #include <linux/sbitmap.h> 7 #include <linux/srcu.h> 8 9 struct blk_mq_tags; 10 struct blk_flush_queue; 11 12 /** 13 * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device 14 */ 15 struct blk_mq_hw_ctx { 16 struct { 17 spinlock_t lock; 18 struct list_head dispatch; 19 unsigned long state; /* BLK_MQ_S_* flags */ 20 } ____cacheline_aligned_in_smp; 21 22 struct delayed_work run_work; 23 cpumask_var_t cpumask; 24 int next_cpu; 25 int next_cpu_batch; 26 27 unsigned long flags; /* BLK_MQ_F_* flags */ 28 29 void *sched_data; 30 struct request_queue *queue; 31 struct blk_flush_queue *fq; 32 33 void *driver_data; 34 35 struct sbitmap ctx_map; 36 37 struct blk_mq_ctx *dispatch_from; 38 unsigned int dispatch_busy; 39 40 unsigned int nr_ctx; 41 struct blk_mq_ctx **ctxs; 42 43 spinlock_t dispatch_wait_lock; 44 wait_queue_entry_t dispatch_wait; 45 atomic_t wait_index; 46 47 struct blk_mq_tags *tags; 48 struct blk_mq_tags *sched_tags; 49 50 unsigned long queued; 51 unsigned long run; 52 #define BLK_MQ_MAX_DISPATCH_ORDER 7 53 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; 54 55 unsigned int numa_node; 56 unsigned int queue_num; 57 58 atomic_t nr_active; 59 unsigned int nr_expired; 60 61 struct hlist_node cpuhp_dead; 62 struct kobject kobj; 63 64 unsigned long poll_considered; 65 unsigned long poll_invoked; 66 unsigned long poll_success; 67 68 #ifdef CONFIG_BLK_DEBUG_FS 69 struct dentry *debugfs_dir; 70 struct dentry *sched_debugfs_dir; 71 #endif 72 73 /* Must be the last member - see also blk_mq_hw_ctx_size(). */ 74 struct srcu_struct srcu[0]; 75 }; 76 77 struct blk_mq_tag_set { 78 unsigned int *mq_map; 79 const struct blk_mq_ops *ops; 80 unsigned int nr_hw_queues; 81 unsigned int queue_depth; /* max hw supported */ 82 unsigned int reserved_tags; 83 unsigned int cmd_size; /* per-request extra data */ 84 int numa_node; 85 unsigned int timeout; 86 unsigned int flags; /* BLK_MQ_F_* */ 87 void *driver_data; 88 89 struct blk_mq_tags **tags; 90 91 struct mutex tag_list_lock; 92 struct list_head tag_list; 93 }; 94 95 struct blk_mq_queue_data { 96 struct request *rq; 97 bool last; 98 }; 99 100 typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, 101 const struct blk_mq_queue_data *); 102 typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); 103 typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); 104 typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); 105 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); 106 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); 107 typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, 108 unsigned int, unsigned int); 109 typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, 110 unsigned int); 111 112 typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, 113 bool); 114 typedef void (busy_tag_iter_fn)(struct request *, void *, bool); 115 typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); 116 typedef int (map_queues_fn)(struct blk_mq_tag_set *set); 117 118 119 struct blk_mq_ops { 120 /* 121 * Queue request 122 */ 123 queue_rq_fn *queue_rq; 124 125 /* 126 * Reserve budget before queue request, once .queue_rq is 127 * run, it is driver's responsibility to release the 128 * reserved budget. Also we have to handle failure case 129 * of .get_budget for avoiding I/O deadlock. 130 */ 131 get_budget_fn *get_budget; 132 put_budget_fn *put_budget; 133 134 /* 135 * Called on request timeout 136 */ 137 timeout_fn *timeout; 138 139 /* 140 * Called to poll for completion of a specific tag. 141 */ 142 poll_fn *poll; 143 144 softirq_done_fn *complete; 145 146 /* 147 * Called when the block layer side of a hardware queue has been 148 * set up, allowing the driver to allocate/init matching structures. 149 * Ditto for exit/teardown. 150 */ 151 init_hctx_fn *init_hctx; 152 exit_hctx_fn *exit_hctx; 153 154 /* 155 * Called for every command allocated by the block layer to allow 156 * the driver to set up driver specific data. 157 * 158 * Tag greater than or equal to queue_depth is for setting up 159 * flush request. 160 * 161 * Ditto for exit/teardown. 162 */ 163 init_request_fn *init_request; 164 exit_request_fn *exit_request; 165 /* Called from inside blk_get_request() */ 166 void (*initialize_rq_fn)(struct request *rq); 167 168 map_queues_fn *map_queues; 169 170 #ifdef CONFIG_BLK_DEBUG_FS 171 /* 172 * Used by the debugfs implementation to show driver-specific 173 * information about a request. 174 */ 175 void (*show_rq)(struct seq_file *m, struct request *rq); 176 #endif 177 }; 178 179 enum { 180 BLK_MQ_F_SHOULD_MERGE = 1 << 0, 181 BLK_MQ_F_TAG_SHARED = 1 << 1, 182 BLK_MQ_F_SG_MERGE = 1 << 2, 183 BLK_MQ_F_BLOCKING = 1 << 5, 184 BLK_MQ_F_NO_SCHED = 1 << 6, 185 BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, 186 BLK_MQ_F_ALLOC_POLICY_BITS = 1, 187 188 BLK_MQ_S_STOPPED = 0, 189 BLK_MQ_S_TAG_ACTIVE = 1, 190 BLK_MQ_S_SCHED_RESTART = 2, 191 192 BLK_MQ_MAX_DEPTH = 10240, 193 194 BLK_MQ_CPU_WORK_BATCH = 8, 195 }; 196 #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ 197 ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ 198 ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) 199 #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ 200 ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ 201 << BLK_MQ_F_ALLOC_POLICY_START_BIT) 202 203 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); 204 struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, 205 struct request_queue *q); 206 int blk_mq_register_dev(struct device *, struct request_queue *); 207 void blk_mq_unregister_dev(struct device *, struct request_queue *); 208 209 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); 210 void blk_mq_free_tag_set(struct blk_mq_tag_set *set); 211 212 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); 213 214 void blk_mq_free_request(struct request *rq); 215 bool blk_mq_can_queue(struct blk_mq_hw_ctx *); 216 217 enum { 218 /* return when out of requests */ 219 BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), 220 /* allocate from reserved pool */ 221 BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), 222 /* allocate internal/sched tag */ 223 BLK_MQ_REQ_INTERNAL = (__force blk_mq_req_flags_t)(1 << 2), 224 /* set RQF_PREEMPT */ 225 BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), 226 }; 227 228 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, 229 blk_mq_req_flags_t flags); 230 struct request *blk_mq_alloc_request_hctx(struct request_queue *q, 231 unsigned int op, blk_mq_req_flags_t flags, 232 unsigned int hctx_idx); 233 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); 234 235 enum { 236 BLK_MQ_UNIQUE_TAG_BITS = 16, 237 BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1, 238 }; 239 240 u32 blk_mq_unique_tag(struct request *rq); 241 242 static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag) 243 { 244 return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS; 245 } 246 247 static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) 248 { 249 return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; 250 } 251 252 253 int blk_mq_request_started(struct request *rq); 254 void blk_mq_start_request(struct request *rq); 255 void blk_mq_end_request(struct request *rq, blk_status_t error); 256 void __blk_mq_end_request(struct request *rq, blk_status_t error); 257 258 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); 259 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, 260 bool kick_requeue_list); 261 void blk_mq_kick_requeue_list(struct request_queue *q); 262 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); 263 void blk_mq_complete_request(struct request *rq); 264 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, 265 struct bio *bio); 266 bool blk_mq_queue_stopped(struct request_queue *q); 267 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); 268 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); 269 void blk_mq_stop_hw_queues(struct request_queue *q); 270 void blk_mq_start_hw_queues(struct request_queue *q); 271 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 272 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); 273 void blk_mq_quiesce_queue(struct request_queue *q); 274 void blk_mq_unquiesce_queue(struct request_queue *q); 275 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); 276 bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 277 void blk_mq_run_hw_queues(struct request_queue *q, bool async); 278 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, 279 busy_tag_iter_fn *fn, void *priv); 280 void blk_mq_freeze_queue(struct request_queue *q); 281 void blk_mq_unfreeze_queue(struct request_queue *q); 282 void blk_freeze_queue_start(struct request_queue *q); 283 void blk_mq_freeze_queue_wait(struct request_queue *q); 284 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, 285 unsigned long timeout); 286 287 int blk_mq_map_queues(struct blk_mq_tag_set *set); 288 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); 289 290 void blk_mq_quiesce_queue_nowait(struct request_queue *q); 291 292 /** 293 * blk_mq_mark_complete() - Set request state to complete 294 * @rq: request to set to complete state 295 * 296 * Returns true if request state was successfully set to complete. If 297 * successful, the caller is responsibile for seeing this request is ended, as 298 * blk_mq_complete_request will not work again. 299 */ 300 static inline bool blk_mq_mark_complete(struct request *rq) 301 { 302 return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) == 303 MQ_RQ_IN_FLIGHT; 304 } 305 306 /* 307 * Driver command data is immediately after the request. So subtract request 308 * size to get back to the original request, add request size to get the PDU. 309 */ 310 static inline struct request *blk_mq_rq_from_pdu(void *pdu) 311 { 312 return pdu - sizeof(struct request); 313 } 314 static inline void *blk_mq_rq_to_pdu(struct request *rq) 315 { 316 return rq + 1; 317 } 318 319 #define queue_for_each_hw_ctx(q, hctx, i) \ 320 for ((i) = 0; (i) < (q)->nr_hw_queues && \ 321 ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) 322 323 #define hctx_for_each_ctx(hctx, ctx, i) \ 324 for ((i) = 0; (i) < (hctx)->nr_ctx && \ 325 ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) 326 327 #endif 328