1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _BLK_CGROUP_H 3 #define _BLK_CGROUP_H 4 /* 5 * Common Block IO controller cgroup interface 6 * 7 * Based on ideas and code from CFQ, CFS and BFQ: 8 * Copyright (C) 2003 Jens Axboe <[email protected]> 9 * 10 * Copyright (C) 2008 Fabio Checconi <[email protected]> 11 * Paolo Valente <[email protected]> 12 * 13 * Copyright (C) 2009 Vivek Goyal <[email protected]> 14 * Nauman Rafique <[email protected]> 15 */ 16 17 #include <linux/cgroup.h> 18 #include <linux/percpu_counter.h> 19 #include <linux/seq_file.h> 20 #include <linux/radix-tree.h> 21 #include <linux/blkdev.h> 22 #include <linux/atomic.h> 23 #include <linux/kthread.h> 24 25 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ 26 #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) 27 28 /* Max limits for throttle policy */ 29 #define THROTL_IOPS_MAX UINT_MAX 30 31 #ifdef CONFIG_BLK_CGROUP 32 33 enum blkg_rwstat_type { 34 BLKG_RWSTAT_READ, 35 BLKG_RWSTAT_WRITE, 36 BLKG_RWSTAT_SYNC, 37 BLKG_RWSTAT_ASYNC, 38 BLKG_RWSTAT_DISCARD, 39 40 BLKG_RWSTAT_NR, 41 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, 42 }; 43 44 struct blkcg_gq; 45 46 struct blkcg { 47 struct cgroup_subsys_state css; 48 spinlock_t lock; 49 50 struct radix_tree_root blkg_tree; 51 struct blkcg_gq __rcu *blkg_hint; 52 struct hlist_head blkg_list; 53 54 struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; 55 56 struct list_head all_blkcgs_node; 57 #ifdef CONFIG_CGROUP_WRITEBACK 58 struct list_head cgwb_list; 59 #endif 60 }; 61 62 /* 63 * blkg_[rw]stat->aux_cnt is excluded for local stats but included for 64 * recursive. Used to carry stats of dead children, and, for blkg_rwstat, 65 * to carry result values from read and sum operations. 66 */ 67 struct blkg_stat { 68 struct percpu_counter cpu_cnt; 69 atomic64_t aux_cnt; 70 }; 71 72 struct blkg_rwstat { 73 struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; 74 atomic64_t aux_cnt[BLKG_RWSTAT_NR]; 75 }; 76 77 /* 78 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a 79 * request_queue (q). This is used by blkcg policies which need to track 80 * information per blkcg - q pair. 81 * 82 * There can be multiple active blkcg policies and each blkg:policy pair is 83 * represented by a blkg_policy_data which is allocated and freed by each 84 * policy's pd_alloc/free_fn() methods. A policy can allocate private data 85 * area by allocating larger data structure which embeds blkg_policy_data 86 * at the beginning. 87 */ 88 struct blkg_policy_data { 89 /* the blkg and policy id this per-policy data belongs to */ 90 struct blkcg_gq *blkg; 91 int plid; 92 bool offline; 93 }; 94 95 /* 96 * Policies that need to keep per-blkcg data which is independent from any 97 * request_queue associated to it should implement cpd_alloc/free_fn() 98 * methods. A policy can allocate private data area by allocating larger 99 * data structure which embeds blkcg_policy_data at the beginning. 100 * cpd_init() is invoked to let each policy handle per-blkcg data. 101 */ 102 struct blkcg_policy_data { 103 /* the blkcg and policy id this per-policy data belongs to */ 104 struct blkcg *blkcg; 105 int plid; 106 }; 107 108 /* association between a blk cgroup and a request queue */ 109 struct blkcg_gq { 110 /* Pointer to the associated request_queue */ 111 struct request_queue *q; 112 struct list_head q_node; 113 struct hlist_node blkcg_node; 114 struct blkcg *blkcg; 115 116 /* 117 * Each blkg gets congested separately and the congestion state is 118 * propagated to the matching bdi_writeback_congested. 119 */ 120 struct bdi_writeback_congested *wb_congested; 121 122 /* all non-root blkcg_gq's are guaranteed to have access to parent */ 123 struct blkcg_gq *parent; 124 125 /* request allocation list for this blkcg-q pair */ 126 struct request_list rl; 127 128 /* reference count */ 129 atomic_t refcnt; 130 131 /* is this blkg online? protected by both blkcg and q locks */ 132 bool online; 133 134 struct blkg_rwstat stat_bytes; 135 struct blkg_rwstat stat_ios; 136 137 struct blkg_policy_data *pd[BLKCG_MAX_POLS]; 138 139 struct rcu_head rcu_head; 140 141 atomic_t use_delay; 142 atomic64_t delay_nsec; 143 atomic64_t delay_start; 144 u64 last_delay; 145 int last_use; 146 }; 147 148 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); 149 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); 150 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); 151 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); 152 typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node); 153 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); 154 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); 155 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); 156 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); 157 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); 158 typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf, 159 size_t size); 160 161 struct blkcg_policy { 162 int plid; 163 /* cgroup files for the policy */ 164 struct cftype *dfl_cftypes; 165 struct cftype *legacy_cftypes; 166 167 /* operations */ 168 blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; 169 blkcg_pol_init_cpd_fn *cpd_init_fn; 170 blkcg_pol_free_cpd_fn *cpd_free_fn; 171 blkcg_pol_bind_cpd_fn *cpd_bind_fn; 172 173 blkcg_pol_alloc_pd_fn *pd_alloc_fn; 174 blkcg_pol_init_pd_fn *pd_init_fn; 175 blkcg_pol_online_pd_fn *pd_online_fn; 176 blkcg_pol_offline_pd_fn *pd_offline_fn; 177 blkcg_pol_free_pd_fn *pd_free_fn; 178 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; 179 blkcg_pol_stat_pd_fn *pd_stat_fn; 180 }; 181 182 extern struct blkcg blkcg_root; 183 extern struct cgroup_subsys_state * const blkcg_root_css; 184 185 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, 186 struct request_queue *q, bool update_hint); 187 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 188 struct request_queue *q); 189 int blkcg_init_queue(struct request_queue *q); 190 void blkcg_drain_queue(struct request_queue *q); 191 void blkcg_exit_queue(struct request_queue *q); 192 193 /* Blkio controller policy registration */ 194 int blkcg_policy_register(struct blkcg_policy *pol); 195 void blkcg_policy_unregister(struct blkcg_policy *pol); 196 int blkcg_activate_policy(struct request_queue *q, 197 const struct blkcg_policy *pol); 198 void blkcg_deactivate_policy(struct request_queue *q, 199 const struct blkcg_policy *pol); 200 201 const char *blkg_dev_name(struct blkcg_gq *blkg); 202 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, 203 u64 (*prfill)(struct seq_file *, 204 struct blkg_policy_data *, int), 205 const struct blkcg_policy *pol, int data, 206 bool show_total); 207 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); 208 u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 209 const struct blkg_rwstat *rwstat); 210 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); 211 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 212 int off); 213 int blkg_print_stat_bytes(struct seq_file *sf, void *v); 214 int blkg_print_stat_ios(struct seq_file *sf, void *v); 215 int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); 216 int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); 217 218 u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, 219 struct blkcg_policy *pol, int off); 220 struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, 221 struct blkcg_policy *pol, int off); 222 223 struct blkg_conf_ctx { 224 struct gendisk *disk; 225 struct blkcg_gq *blkg; 226 char *body; 227 }; 228 229 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 230 char *input, struct blkg_conf_ctx *ctx); 231 void blkg_conf_finish(struct blkg_conf_ctx *ctx); 232 233 234 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) 235 { 236 return css ? container_of(css, struct blkcg, css) : NULL; 237 } 238 239 static inline struct blkcg *bio_blkcg(struct bio *bio) 240 { 241 struct cgroup_subsys_state *css; 242 243 if (bio && bio->bi_css) 244 return css_to_blkcg(bio->bi_css); 245 css = kthread_blkcg(); 246 if (css) 247 return css_to_blkcg(css); 248 return css_to_blkcg(task_css(current, io_cgrp_id)); 249 } 250 251 static inline bool blk_cgroup_congested(void) 252 { 253 struct cgroup_subsys_state *css; 254 bool ret = false; 255 256 rcu_read_lock(); 257 css = kthread_blkcg(); 258 if (!css) 259 css = task_css(current, io_cgrp_id); 260 while (css) { 261 if (atomic_read(&css->cgroup->congestion_count)) { 262 ret = true; 263 break; 264 } 265 css = css->parent; 266 } 267 rcu_read_unlock(); 268 return ret; 269 } 270 271 /** 272 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg 273 * @return: true if this bio needs to be submitted with the root blkg context. 274 * 275 * In order to avoid priority inversions we sometimes need to issue a bio as if 276 * it were attached to the root blkg, and then backcharge to the actual owning 277 * blkg. The idea is we do bio_blkcg() to look up the actual context for the 278 * bio and attach the appropriate blkg to the bio. Then we call this helper and 279 * if it is true run with the root blkg for that queue and then do any 280 * backcharging to the originating cgroup once the io is complete. 281 */ 282 static inline bool bio_issue_as_root_blkg(struct bio *bio) 283 { 284 return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; 285 } 286 287 /** 288 * blkcg_parent - get the parent of a blkcg 289 * @blkcg: blkcg of interest 290 * 291 * Return the parent blkcg of @blkcg. Can be called anytime. 292 */ 293 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) 294 { 295 return css_to_blkcg(blkcg->css.parent); 296 } 297 298 /** 299 * __blkg_lookup - internal version of blkg_lookup() 300 * @blkcg: blkcg of interest 301 * @q: request_queue of interest 302 * @update_hint: whether to update lookup hint with the result or not 303 * 304 * This is internal version and shouldn't be used by policy 305 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of 306 * @q's bypass state. If @update_hint is %true, the caller should be 307 * holding @q->queue_lock and lookup hint is updated on success. 308 */ 309 static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, 310 struct request_queue *q, 311 bool update_hint) 312 { 313 struct blkcg_gq *blkg; 314 315 if (blkcg == &blkcg_root) 316 return q->root_blkg; 317 318 blkg = rcu_dereference(blkcg->blkg_hint); 319 if (blkg && blkg->q == q) 320 return blkg; 321 322 return blkg_lookup_slowpath(blkcg, q, update_hint); 323 } 324 325 /** 326 * blkg_lookup - lookup blkg for the specified blkcg - q pair 327 * @blkcg: blkcg of interest 328 * @q: request_queue of interest 329 * 330 * Lookup blkg for the @blkcg - @q pair. This function should be called 331 * under RCU read lock and is guaranteed to return %NULL if @q is bypassing 332 * - see blk_queue_bypass_start() for details. 333 */ 334 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, 335 struct request_queue *q) 336 { 337 WARN_ON_ONCE(!rcu_read_lock_held()); 338 339 if (unlikely(blk_queue_bypass(q))) 340 return NULL; 341 return __blkg_lookup(blkcg, q, false); 342 } 343 344 /** 345 * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair 346 * @q: request_queue of interest 347 * 348 * Lookup blkg for @q at the root level. See also blkg_lookup(). 349 */ 350 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) 351 { 352 return q->root_blkg; 353 } 354 355 /** 356 * blkg_to_pdata - get policy private data 357 * @blkg: blkg of interest 358 * @pol: policy of interest 359 * 360 * Return pointer to private data associated with the @blkg-@pol pair. 361 */ 362 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 363 struct blkcg_policy *pol) 364 { 365 return blkg ? blkg->pd[pol->plid] : NULL; 366 } 367 368 static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, 369 struct blkcg_policy *pol) 370 { 371 return blkcg ? blkcg->cpd[pol->plid] : NULL; 372 } 373 374 /** 375 * pdata_to_blkg - get blkg associated with policy private data 376 * @pd: policy private data of interest 377 * 378 * @pd is policy private data. Determine the blkg it's associated with. 379 */ 380 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) 381 { 382 return pd ? pd->blkg : NULL; 383 } 384 385 static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) 386 { 387 return cpd ? cpd->blkcg : NULL; 388 } 389 390 /** 391 * blkg_path - format cgroup path of blkg 392 * @blkg: blkg of interest 393 * @buf: target buffer 394 * @buflen: target buffer length 395 * 396 * Format the path of the cgroup of @blkg into @buf. 397 */ 398 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) 399 { 400 return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); 401 } 402 403 /** 404 * blkg_get - get a blkg reference 405 * @blkg: blkg to get 406 * 407 * The caller should be holding an existing reference. 408 */ 409 static inline void blkg_get(struct blkcg_gq *blkg) 410 { 411 WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); 412 atomic_inc(&blkg->refcnt); 413 } 414 415 /** 416 * blkg_try_get - try and get a blkg reference 417 * @blkg: blkg to get 418 * 419 * This is for use when doing an RCU lookup of the blkg. We may be in the midst 420 * of freeing this blkg, so we can only use it if the refcnt is not zero. 421 */ 422 static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) 423 { 424 if (atomic_inc_not_zero(&blkg->refcnt)) 425 return blkg; 426 return NULL; 427 } 428 429 430 void __blkg_release_rcu(struct rcu_head *rcu); 431 432 /** 433 * blkg_put - put a blkg reference 434 * @blkg: blkg to put 435 */ 436 static inline void blkg_put(struct blkcg_gq *blkg) 437 { 438 WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); 439 if (atomic_dec_and_test(&blkg->refcnt)) 440 call_rcu(&blkg->rcu_head, __blkg_release_rcu); 441 } 442 443 /** 444 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants 445 * @d_blkg: loop cursor pointing to the current descendant 446 * @pos_css: used for iteration 447 * @p_blkg: target blkg to walk descendants of 448 * 449 * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU 450 * read locked. If called under either blkcg or queue lock, the iteration 451 * is guaranteed to include all and only online blkgs. The caller may 452 * update @pos_css by calling css_rightmost_descendant() to skip subtree. 453 * @p_blkg is included in the iteration and the first node to be visited. 454 */ 455 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ 456 css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ 457 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ 458 (p_blkg)->q, false))) 459 460 /** 461 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants 462 * @d_blkg: loop cursor pointing to the current descendant 463 * @pos_css: used for iteration 464 * @p_blkg: target blkg to walk descendants of 465 * 466 * Similar to blkg_for_each_descendant_pre() but performs post-order 467 * traversal instead. Synchronization rules are the same. @p_blkg is 468 * included in the iteration and the last node to be visited. 469 */ 470 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ 471 css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ 472 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ 473 (p_blkg)->q, false))) 474 475 /** 476 * blk_get_rl - get request_list to use 477 * @q: request_queue of interest 478 * @bio: bio which will be attached to the allocated request (may be %NULL) 479 * 480 * The caller wants to allocate a request from @q to use for @bio. Find 481 * the request_list to use and obtain a reference on it. Should be called 482 * under queue_lock. This function is guaranteed to return non-%NULL 483 * request_list. 484 */ 485 static inline struct request_list *blk_get_rl(struct request_queue *q, 486 struct bio *bio) 487 { 488 struct blkcg *blkcg; 489 struct blkcg_gq *blkg; 490 491 rcu_read_lock(); 492 493 blkcg = bio_blkcg(bio); 494 495 /* bypass blkg lookup and use @q->root_rl directly for root */ 496 if (blkcg == &blkcg_root) 497 goto root_rl; 498 499 /* 500 * Try to use blkg->rl. blkg lookup may fail under memory pressure 501 * or if either the blkcg or queue is going away. Fall back to 502 * root_rl in such cases. 503 */ 504 blkg = blkg_lookup(blkcg, q); 505 if (unlikely(!blkg)) 506 goto root_rl; 507 508 blkg_get(blkg); 509 rcu_read_unlock(); 510 return &blkg->rl; 511 root_rl: 512 rcu_read_unlock(); 513 return &q->root_rl; 514 } 515 516 /** 517 * blk_put_rl - put request_list 518 * @rl: request_list to put 519 * 520 * Put the reference acquired by blk_get_rl(). Should be called under 521 * queue_lock. 522 */ 523 static inline void blk_put_rl(struct request_list *rl) 524 { 525 if (rl->blkg->blkcg != &blkcg_root) 526 blkg_put(rl->blkg); 527 } 528 529 /** 530 * blk_rq_set_rl - associate a request with a request_list 531 * @rq: request of interest 532 * @rl: target request_list 533 * 534 * Associate @rq with @rl so that accounting and freeing can know the 535 * request_list @rq came from. 536 */ 537 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) 538 { 539 rq->rl = rl; 540 } 541 542 /** 543 * blk_rq_rl - return the request_list a request came from 544 * @rq: request of interest 545 * 546 * Return the request_list @rq is allocated from. 547 */ 548 static inline struct request_list *blk_rq_rl(struct request *rq) 549 { 550 return rq->rl; 551 } 552 553 struct request_list *__blk_queue_next_rl(struct request_list *rl, 554 struct request_queue *q); 555 /** 556 * blk_queue_for_each_rl - iterate through all request_lists of a request_queue 557 * 558 * Should be used under queue_lock. 559 */ 560 #define blk_queue_for_each_rl(rl, q) \ 561 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) 562 563 static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) 564 { 565 int ret; 566 567 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); 568 if (ret) 569 return ret; 570 571 atomic64_set(&stat->aux_cnt, 0); 572 return 0; 573 } 574 575 static inline void blkg_stat_exit(struct blkg_stat *stat) 576 { 577 percpu_counter_destroy(&stat->cpu_cnt); 578 } 579 580 /** 581 * blkg_stat_add - add a value to a blkg_stat 582 * @stat: target blkg_stat 583 * @val: value to add 584 * 585 * Add @val to @stat. The caller must ensure that IRQ on the same CPU 586 * don't re-enter this function for the same counter. 587 */ 588 static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) 589 { 590 percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); 591 } 592 593 /** 594 * blkg_stat_read - read the current value of a blkg_stat 595 * @stat: blkg_stat to read 596 */ 597 static inline uint64_t blkg_stat_read(struct blkg_stat *stat) 598 { 599 return percpu_counter_sum_positive(&stat->cpu_cnt); 600 } 601 602 /** 603 * blkg_stat_reset - reset a blkg_stat 604 * @stat: blkg_stat to reset 605 */ 606 static inline void blkg_stat_reset(struct blkg_stat *stat) 607 { 608 percpu_counter_set(&stat->cpu_cnt, 0); 609 atomic64_set(&stat->aux_cnt, 0); 610 } 611 612 /** 613 * blkg_stat_add_aux - add a blkg_stat into another's aux count 614 * @to: the destination blkg_stat 615 * @from: the source 616 * 617 * Add @from's count including the aux one to @to's aux count. 618 */ 619 static inline void blkg_stat_add_aux(struct blkg_stat *to, 620 struct blkg_stat *from) 621 { 622 atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt), 623 &to->aux_cnt); 624 } 625 626 static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) 627 { 628 int i, ret; 629 630 for (i = 0; i < BLKG_RWSTAT_NR; i++) { 631 ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); 632 if (ret) { 633 while (--i >= 0) 634 percpu_counter_destroy(&rwstat->cpu_cnt[i]); 635 return ret; 636 } 637 atomic64_set(&rwstat->aux_cnt[i], 0); 638 } 639 return 0; 640 } 641 642 static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) 643 { 644 int i; 645 646 for (i = 0; i < BLKG_RWSTAT_NR; i++) 647 percpu_counter_destroy(&rwstat->cpu_cnt[i]); 648 } 649 650 /** 651 * blkg_rwstat_add - add a value to a blkg_rwstat 652 * @rwstat: target blkg_rwstat 653 * @op: REQ_OP and flags 654 * @val: value to add 655 * 656 * Add @val to @rwstat. The counters are chosen according to @rw. The 657 * caller is responsible for synchronizing calls to this function. 658 */ 659 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, 660 unsigned int op, uint64_t val) 661 { 662 struct percpu_counter *cnt; 663 664 if (op_is_discard(op)) 665 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; 666 else if (op_is_write(op)) 667 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; 668 else 669 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; 670 671 percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); 672 673 if (op_is_sync(op)) 674 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; 675 else 676 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; 677 678 percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); 679 } 680 681 /** 682 * blkg_rwstat_read - read the current values of a blkg_rwstat 683 * @rwstat: blkg_rwstat to read 684 * 685 * Read the current snapshot of @rwstat and return it in the aux counts. 686 */ 687 static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) 688 { 689 struct blkg_rwstat result; 690 int i; 691 692 for (i = 0; i < BLKG_RWSTAT_NR; i++) 693 atomic64_set(&result.aux_cnt[i], 694 percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); 695 return result; 696 } 697 698 /** 699 * blkg_rwstat_total - read the total count of a blkg_rwstat 700 * @rwstat: blkg_rwstat to read 701 * 702 * Return the total count of @rwstat regardless of the IO direction. This 703 * function can be called without synchronization and takes care of u64 704 * atomicity. 705 */ 706 static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) 707 { 708 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); 709 710 return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + 711 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); 712 } 713 714 /** 715 * blkg_rwstat_reset - reset a blkg_rwstat 716 * @rwstat: blkg_rwstat to reset 717 */ 718 static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) 719 { 720 int i; 721 722 for (i = 0; i < BLKG_RWSTAT_NR; i++) { 723 percpu_counter_set(&rwstat->cpu_cnt[i], 0); 724 atomic64_set(&rwstat->aux_cnt[i], 0); 725 } 726 } 727 728 /** 729 * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count 730 * @to: the destination blkg_rwstat 731 * @from: the source 732 * 733 * Add @from's count including the aux one to @to's aux count. 734 */ 735 static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, 736 struct blkg_rwstat *from) 737 { 738 u64 sum[BLKG_RWSTAT_NR]; 739 int i; 740 741 for (i = 0; i < BLKG_RWSTAT_NR; i++) 742 sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); 743 744 for (i = 0; i < BLKG_RWSTAT_NR; i++) 745 atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), 746 &to->aux_cnt[i]); 747 } 748 749 #ifdef CONFIG_BLK_DEV_THROTTLING 750 extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, 751 struct bio *bio); 752 #else 753 static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, 754 struct bio *bio) { return false; } 755 #endif 756 757 static inline bool blkcg_bio_issue_check(struct request_queue *q, 758 struct bio *bio) 759 { 760 struct blkcg *blkcg; 761 struct blkcg_gq *blkg; 762 bool throtl = false; 763 764 rcu_read_lock(); 765 blkcg = bio_blkcg(bio); 766 767 /* associate blkcg if bio hasn't attached one */ 768 bio_associate_blkcg(bio, &blkcg->css); 769 770 blkg = blkg_lookup(blkcg, q); 771 if (unlikely(!blkg)) { 772 spin_lock_irq(q->queue_lock); 773 blkg = blkg_lookup_create(blkcg, q); 774 if (IS_ERR(blkg)) 775 blkg = NULL; 776 spin_unlock_irq(q->queue_lock); 777 } 778 779 throtl = blk_throtl_bio(q, blkg, bio); 780 781 if (!throtl) { 782 blkg = blkg ?: q->root_blkg; 783 /* 784 * If the bio is flagged with BIO_QUEUE_ENTERED it means this 785 * is a split bio and we would have already accounted for the 786 * size of the bio. 787 */ 788 if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) 789 blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, 790 bio->bi_iter.bi_size); 791 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); 792 } 793 794 rcu_read_unlock(); 795 return !throtl; 796 } 797 798 static inline void blkcg_use_delay(struct blkcg_gq *blkg) 799 { 800 if (atomic_add_return(1, &blkg->use_delay) == 1) 801 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); 802 } 803 804 static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) 805 { 806 int old = atomic_read(&blkg->use_delay); 807 808 if (old == 0) 809 return 0; 810 811 /* 812 * We do this song and dance because we can race with somebody else 813 * adding or removing delay. If we just did an atomic_dec we'd end up 814 * negative and we'd already be in trouble. We need to subtract 1 and 815 * then check to see if we were the last delay so we can drop the 816 * congestion count on the cgroup. 817 */ 818 while (old) { 819 int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); 820 if (cur == old) 821 break; 822 old = cur; 823 } 824 825 if (old == 0) 826 return 0; 827 if (old == 1) 828 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); 829 return 1; 830 } 831 832 static inline void blkcg_clear_delay(struct blkcg_gq *blkg) 833 { 834 int old = atomic_read(&blkg->use_delay); 835 if (!old) 836 return; 837 /* We only want 1 person clearing the congestion count for this blkg. */ 838 while (old) { 839 int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); 840 if (cur == old) { 841 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); 842 break; 843 } 844 old = cur; 845 } 846 } 847 848 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); 849 void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); 850 void blkcg_maybe_throttle_current(void); 851 #else /* CONFIG_BLK_CGROUP */ 852 853 struct blkcg { 854 }; 855 856 struct blkg_policy_data { 857 }; 858 859 struct blkcg_policy_data { 860 }; 861 862 struct blkcg_gq { 863 }; 864 865 struct blkcg_policy { 866 }; 867 868 #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) 869 870 static inline void blkcg_maybe_throttle_current(void) { } 871 static inline bool blk_cgroup_congested(void) { return false; } 872 873 #ifdef CONFIG_BLOCK 874 875 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } 876 877 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } 878 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) 879 { return NULL; } 880 static inline int blkcg_init_queue(struct request_queue *q) { return 0; } 881 static inline void blkcg_drain_queue(struct request_queue *q) { } 882 static inline void blkcg_exit_queue(struct request_queue *q) { } 883 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } 884 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } 885 static inline int blkcg_activate_policy(struct request_queue *q, 886 const struct blkcg_policy *pol) { return 0; } 887 static inline void blkcg_deactivate_policy(struct request_queue *q, 888 const struct blkcg_policy *pol) { } 889 890 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } 891 892 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 893 struct blkcg_policy *pol) { return NULL; } 894 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } 895 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } 896 static inline void blkg_get(struct blkcg_gq *blkg) { } 897 static inline void blkg_put(struct blkcg_gq *blkg) { } 898 899 static inline struct request_list *blk_get_rl(struct request_queue *q, 900 struct bio *bio) { return &q->root_rl; } 901 static inline void blk_put_rl(struct request_list *rl) { } 902 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } 903 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } 904 905 static inline bool blkcg_bio_issue_check(struct request_queue *q, 906 struct bio *bio) { return true; } 907 908 #define blk_queue_for_each_rl(rl, q) \ 909 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) 910 911 #endif /* CONFIG_BLOCK */ 912 #endif /* CONFIG_BLK_CGROUP */ 913 #endif /* _BLK_CGROUP_H */ 914