1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _BLK_CGROUP_H 3 #define _BLK_CGROUP_H 4 /* 5 * Common Block IO controller cgroup interface 6 * 7 * Based on ideas and code from CFQ, CFS and BFQ: 8 * Copyright (C) 2003 Jens Axboe <[email protected]> 9 * 10 * Copyright (C) 2008 Fabio Checconi <[email protected]> 11 * Paolo Valente <[email protected]> 12 * 13 * Copyright (C) 2009 Vivek Goyal <[email protected]> 14 * Nauman Rafique <[email protected]> 15 */ 16 17 #include <linux/cgroup.h> 18 #include <linux/percpu_counter.h> 19 #include <linux/seq_file.h> 20 #include <linux/radix-tree.h> 21 #include <linux/blkdev.h> 22 #include <linux/atomic.h> 23 #include <linux/kthread.h> 24 25 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ 26 #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) 27 28 /* Max limits for throttle policy */ 29 #define THROTL_IOPS_MAX UINT_MAX 30 31 #ifdef CONFIG_BLK_CGROUP 32 33 enum blkg_rwstat_type { 34 BLKG_RWSTAT_READ, 35 BLKG_RWSTAT_WRITE, 36 BLKG_RWSTAT_SYNC, 37 BLKG_RWSTAT_ASYNC, 38 BLKG_RWSTAT_DISCARD, 39 40 BLKG_RWSTAT_NR, 41 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, 42 }; 43 44 struct blkcg_gq; 45 46 struct blkcg { 47 struct cgroup_subsys_state css; 48 spinlock_t lock; 49 50 struct radix_tree_root blkg_tree; 51 struct blkcg_gq __rcu *blkg_hint; 52 struct hlist_head blkg_list; 53 54 struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; 55 56 struct list_head all_blkcgs_node; 57 #ifdef CONFIG_CGROUP_WRITEBACK 58 struct list_head cgwb_list; 59 refcount_t cgwb_refcnt; 60 #endif 61 }; 62 63 /* 64 * blkg_[rw]stat->aux_cnt is excluded for local stats but included for 65 * recursive. Used to carry stats of dead children, and, for blkg_rwstat, 66 * to carry result values from read and sum operations. 67 */ 68 struct blkg_stat { 69 struct percpu_counter cpu_cnt; 70 atomic64_t aux_cnt; 71 }; 72 73 struct blkg_rwstat { 74 struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; 75 atomic64_t aux_cnt[BLKG_RWSTAT_NR]; 76 }; 77 78 /* 79 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a 80 * request_queue (q). This is used by blkcg policies which need to track 81 * information per blkcg - q pair. 82 * 83 * There can be multiple active blkcg policies and each blkg:policy pair is 84 * represented by a blkg_policy_data which is allocated and freed by each 85 * policy's pd_alloc/free_fn() methods. A policy can allocate private data 86 * area by allocating larger data structure which embeds blkg_policy_data 87 * at the beginning. 88 */ 89 struct blkg_policy_data { 90 /* the blkg and policy id this per-policy data belongs to */ 91 struct blkcg_gq *blkg; 92 int plid; 93 }; 94 95 /* 96 * Policies that need to keep per-blkcg data which is independent from any 97 * request_queue associated to it should implement cpd_alloc/free_fn() 98 * methods. A policy can allocate private data area by allocating larger 99 * data structure which embeds blkcg_policy_data at the beginning. 100 * cpd_init() is invoked to let each policy handle per-blkcg data. 101 */ 102 struct blkcg_policy_data { 103 /* the blkcg and policy id this per-policy data belongs to */ 104 struct blkcg *blkcg; 105 int plid; 106 }; 107 108 /* association between a blk cgroup and a request queue */ 109 struct blkcg_gq { 110 /* Pointer to the associated request_queue */ 111 struct request_queue *q; 112 struct list_head q_node; 113 struct hlist_node blkcg_node; 114 struct blkcg *blkcg; 115 116 /* 117 * Each blkg gets congested separately and the congestion state is 118 * propagated to the matching bdi_writeback_congested. 119 */ 120 struct bdi_writeback_congested *wb_congested; 121 122 /* all non-root blkcg_gq's are guaranteed to have access to parent */ 123 struct blkcg_gq *parent; 124 125 /* request allocation list for this blkcg-q pair */ 126 struct request_list rl; 127 128 /* reference count */ 129 atomic_t refcnt; 130 131 /* is this blkg online? protected by both blkcg and q locks */ 132 bool online; 133 134 struct blkg_rwstat stat_bytes; 135 struct blkg_rwstat stat_ios; 136 137 struct blkg_policy_data *pd[BLKCG_MAX_POLS]; 138 139 struct rcu_head rcu_head; 140 141 atomic_t use_delay; 142 atomic64_t delay_nsec; 143 atomic64_t delay_start; 144 u64 last_delay; 145 int last_use; 146 }; 147 148 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); 149 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); 150 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); 151 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); 152 typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node); 153 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); 154 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); 155 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); 156 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); 157 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); 158 typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf, 159 size_t size); 160 161 struct blkcg_policy { 162 int plid; 163 /* cgroup files for the policy */ 164 struct cftype *dfl_cftypes; 165 struct cftype *legacy_cftypes; 166 167 /* operations */ 168 blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; 169 blkcg_pol_init_cpd_fn *cpd_init_fn; 170 blkcg_pol_free_cpd_fn *cpd_free_fn; 171 blkcg_pol_bind_cpd_fn *cpd_bind_fn; 172 173 blkcg_pol_alloc_pd_fn *pd_alloc_fn; 174 blkcg_pol_init_pd_fn *pd_init_fn; 175 blkcg_pol_online_pd_fn *pd_online_fn; 176 blkcg_pol_offline_pd_fn *pd_offline_fn; 177 blkcg_pol_free_pd_fn *pd_free_fn; 178 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; 179 blkcg_pol_stat_pd_fn *pd_stat_fn; 180 }; 181 182 extern struct blkcg blkcg_root; 183 extern struct cgroup_subsys_state * const blkcg_root_css; 184 185 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, 186 struct request_queue *q, bool update_hint); 187 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 188 struct request_queue *q); 189 int blkcg_init_queue(struct request_queue *q); 190 void blkcg_drain_queue(struct request_queue *q); 191 void blkcg_exit_queue(struct request_queue *q); 192 193 /* Blkio controller policy registration */ 194 int blkcg_policy_register(struct blkcg_policy *pol); 195 void blkcg_policy_unregister(struct blkcg_policy *pol); 196 int blkcg_activate_policy(struct request_queue *q, 197 const struct blkcg_policy *pol); 198 void blkcg_deactivate_policy(struct request_queue *q, 199 const struct blkcg_policy *pol); 200 201 const char *blkg_dev_name(struct blkcg_gq *blkg); 202 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, 203 u64 (*prfill)(struct seq_file *, 204 struct blkg_policy_data *, int), 205 const struct blkcg_policy *pol, int data, 206 bool show_total); 207 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); 208 u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 209 const struct blkg_rwstat *rwstat); 210 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); 211 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 212 int off); 213 int blkg_print_stat_bytes(struct seq_file *sf, void *v); 214 int blkg_print_stat_ios(struct seq_file *sf, void *v); 215 int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); 216 int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); 217 218 u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, 219 struct blkcg_policy *pol, int off); 220 struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, 221 struct blkcg_policy *pol, int off); 222 223 struct blkg_conf_ctx { 224 struct gendisk *disk; 225 struct blkcg_gq *blkg; 226 char *body; 227 }; 228 229 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 230 char *input, struct blkg_conf_ctx *ctx); 231 void blkg_conf_finish(struct blkg_conf_ctx *ctx); 232 233 234 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) 235 { 236 return css ? container_of(css, struct blkcg, css) : NULL; 237 } 238 239 static inline struct blkcg *bio_blkcg(struct bio *bio) 240 { 241 struct cgroup_subsys_state *css; 242 243 if (bio && bio->bi_css) 244 return css_to_blkcg(bio->bi_css); 245 css = kthread_blkcg(); 246 if (css) 247 return css_to_blkcg(css); 248 return css_to_blkcg(task_css(current, io_cgrp_id)); 249 } 250 251 static inline bool blk_cgroup_congested(void) 252 { 253 struct cgroup_subsys_state *css; 254 bool ret = false; 255 256 rcu_read_lock(); 257 css = kthread_blkcg(); 258 if (!css) 259 css = task_css(current, io_cgrp_id); 260 while (css) { 261 if (atomic_read(&css->cgroup->congestion_count)) { 262 ret = true; 263 break; 264 } 265 css = css->parent; 266 } 267 rcu_read_unlock(); 268 return ret; 269 } 270 271 /** 272 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg 273 * @return: true if this bio needs to be submitted with the root blkg context. 274 * 275 * In order to avoid priority inversions we sometimes need to issue a bio as if 276 * it were attached to the root blkg, and then backcharge to the actual owning 277 * blkg. The idea is we do bio_blkcg() to look up the actual context for the 278 * bio and attach the appropriate blkg to the bio. Then we call this helper and 279 * if it is true run with the root blkg for that queue and then do any 280 * backcharging to the originating cgroup once the io is complete. 281 */ 282 static inline bool bio_issue_as_root_blkg(struct bio *bio) 283 { 284 return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; 285 } 286 287 /** 288 * blkcg_parent - get the parent of a blkcg 289 * @blkcg: blkcg of interest 290 * 291 * Return the parent blkcg of @blkcg. Can be called anytime. 292 */ 293 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) 294 { 295 return css_to_blkcg(blkcg->css.parent); 296 } 297 298 /** 299 * __blkg_lookup - internal version of blkg_lookup() 300 * @blkcg: blkcg of interest 301 * @q: request_queue of interest 302 * @update_hint: whether to update lookup hint with the result or not 303 * 304 * This is internal version and shouldn't be used by policy 305 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of 306 * @q's bypass state. If @update_hint is %true, the caller should be 307 * holding @q->queue_lock and lookup hint is updated on success. 308 */ 309 static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, 310 struct request_queue *q, 311 bool update_hint) 312 { 313 struct blkcg_gq *blkg; 314 315 if (blkcg == &blkcg_root) 316 return q->root_blkg; 317 318 blkg = rcu_dereference(blkcg->blkg_hint); 319 if (blkg && blkg->q == q) 320 return blkg; 321 322 return blkg_lookup_slowpath(blkcg, q, update_hint); 323 } 324 325 /** 326 * blkg_lookup - lookup blkg for the specified blkcg - q pair 327 * @blkcg: blkcg of interest 328 * @q: request_queue of interest 329 * 330 * Lookup blkg for the @blkcg - @q pair. This function should be called 331 * under RCU read lock and is guaranteed to return %NULL if @q is bypassing 332 * - see blk_queue_bypass_start() for details. 333 */ 334 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, 335 struct request_queue *q) 336 { 337 WARN_ON_ONCE(!rcu_read_lock_held()); 338 339 if (unlikely(blk_queue_bypass(q))) 340 return NULL; 341 return __blkg_lookup(blkcg, q, false); 342 } 343 344 /** 345 * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair 346 * @q: request_queue of interest 347 * 348 * Lookup blkg for @q at the root level. See also blkg_lookup(). 349 */ 350 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) 351 { 352 return q->root_blkg; 353 } 354 355 /** 356 * blkg_to_pdata - get policy private data 357 * @blkg: blkg of interest 358 * @pol: policy of interest 359 * 360 * Return pointer to private data associated with the @blkg-@pol pair. 361 */ 362 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 363 struct blkcg_policy *pol) 364 { 365 return blkg ? blkg->pd[pol->plid] : NULL; 366 } 367 368 static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, 369 struct blkcg_policy *pol) 370 { 371 return blkcg ? blkcg->cpd[pol->plid] : NULL; 372 } 373 374 /** 375 * pdata_to_blkg - get blkg associated with policy private data 376 * @pd: policy private data of interest 377 * 378 * @pd is policy private data. Determine the blkg it's associated with. 379 */ 380 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) 381 { 382 return pd ? pd->blkg : NULL; 383 } 384 385 static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) 386 { 387 return cpd ? cpd->blkcg : NULL; 388 } 389 390 extern void blkcg_destroy_blkgs(struct blkcg *blkcg); 391 392 #ifdef CONFIG_CGROUP_WRITEBACK 393 394 /** 395 * blkcg_cgwb_get - get a reference for blkcg->cgwb_list 396 * @blkcg: blkcg of interest 397 * 398 * This is used to track the number of active wb's related to a blkcg. 399 */ 400 static inline void blkcg_cgwb_get(struct blkcg *blkcg) 401 { 402 refcount_inc(&blkcg->cgwb_refcnt); 403 } 404 405 /** 406 * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list 407 * @blkcg: blkcg of interest 408 * 409 * This is used to track the number of active wb's related to a blkcg. 410 * When this count goes to zero, all active wb has finished so the 411 * blkcg can continue destruction by calling blkcg_destroy_blkgs(). 412 * This work may occur in cgwb_release_workfn() on the cgwb_release 413 * workqueue. 414 */ 415 static inline void blkcg_cgwb_put(struct blkcg *blkcg) 416 { 417 if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) 418 blkcg_destroy_blkgs(blkcg); 419 } 420 421 #else 422 423 static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } 424 425 static inline void blkcg_cgwb_put(struct blkcg *blkcg) 426 { 427 /* wb isn't being accounted, so trigger destruction right away */ 428 blkcg_destroy_blkgs(blkcg); 429 } 430 431 #endif 432 433 /** 434 * blkg_path - format cgroup path of blkg 435 * @blkg: blkg of interest 436 * @buf: target buffer 437 * @buflen: target buffer length 438 * 439 * Format the path of the cgroup of @blkg into @buf. 440 */ 441 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) 442 { 443 return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); 444 } 445 446 /** 447 * blkg_get - get a blkg reference 448 * @blkg: blkg to get 449 * 450 * The caller should be holding an existing reference. 451 */ 452 static inline void blkg_get(struct blkcg_gq *blkg) 453 { 454 WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); 455 atomic_inc(&blkg->refcnt); 456 } 457 458 /** 459 * blkg_try_get - try and get a blkg reference 460 * @blkg: blkg to get 461 * 462 * This is for use when doing an RCU lookup of the blkg. We may be in the midst 463 * of freeing this blkg, so we can only use it if the refcnt is not zero. 464 */ 465 static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) 466 { 467 if (atomic_inc_not_zero(&blkg->refcnt)) 468 return blkg; 469 return NULL; 470 } 471 472 473 void __blkg_release_rcu(struct rcu_head *rcu); 474 475 /** 476 * blkg_put - put a blkg reference 477 * @blkg: blkg to put 478 */ 479 static inline void blkg_put(struct blkcg_gq *blkg) 480 { 481 WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); 482 if (atomic_dec_and_test(&blkg->refcnt)) 483 call_rcu(&blkg->rcu_head, __blkg_release_rcu); 484 } 485 486 /** 487 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants 488 * @d_blkg: loop cursor pointing to the current descendant 489 * @pos_css: used for iteration 490 * @p_blkg: target blkg to walk descendants of 491 * 492 * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU 493 * read locked. If called under either blkcg or queue lock, the iteration 494 * is guaranteed to include all and only online blkgs. The caller may 495 * update @pos_css by calling css_rightmost_descendant() to skip subtree. 496 * @p_blkg is included in the iteration and the first node to be visited. 497 */ 498 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ 499 css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ 500 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ 501 (p_blkg)->q, false))) 502 503 /** 504 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants 505 * @d_blkg: loop cursor pointing to the current descendant 506 * @pos_css: used for iteration 507 * @p_blkg: target blkg to walk descendants of 508 * 509 * Similar to blkg_for_each_descendant_pre() but performs post-order 510 * traversal instead. Synchronization rules are the same. @p_blkg is 511 * included in the iteration and the last node to be visited. 512 */ 513 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ 514 css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ 515 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ 516 (p_blkg)->q, false))) 517 518 /** 519 * blk_get_rl - get request_list to use 520 * @q: request_queue of interest 521 * @bio: bio which will be attached to the allocated request (may be %NULL) 522 * 523 * The caller wants to allocate a request from @q to use for @bio. Find 524 * the request_list to use and obtain a reference on it. Should be called 525 * under queue_lock. This function is guaranteed to return non-%NULL 526 * request_list. 527 */ 528 static inline struct request_list *blk_get_rl(struct request_queue *q, 529 struct bio *bio) 530 { 531 struct blkcg *blkcg; 532 struct blkcg_gq *blkg; 533 534 rcu_read_lock(); 535 536 blkcg = bio_blkcg(bio); 537 538 /* bypass blkg lookup and use @q->root_rl directly for root */ 539 if (blkcg == &blkcg_root) 540 goto root_rl; 541 542 /* 543 * Try to use blkg->rl. blkg lookup may fail under memory pressure 544 * or if either the blkcg or queue is going away. Fall back to 545 * root_rl in such cases. 546 */ 547 blkg = blkg_lookup(blkcg, q); 548 if (unlikely(!blkg)) 549 goto root_rl; 550 551 blkg_get(blkg); 552 rcu_read_unlock(); 553 return &blkg->rl; 554 root_rl: 555 rcu_read_unlock(); 556 return &q->root_rl; 557 } 558 559 /** 560 * blk_put_rl - put request_list 561 * @rl: request_list to put 562 * 563 * Put the reference acquired by blk_get_rl(). Should be called under 564 * queue_lock. 565 */ 566 static inline void blk_put_rl(struct request_list *rl) 567 { 568 if (rl->blkg->blkcg != &blkcg_root) 569 blkg_put(rl->blkg); 570 } 571 572 /** 573 * blk_rq_set_rl - associate a request with a request_list 574 * @rq: request of interest 575 * @rl: target request_list 576 * 577 * Associate @rq with @rl so that accounting and freeing can know the 578 * request_list @rq came from. 579 */ 580 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) 581 { 582 rq->rl = rl; 583 } 584 585 /** 586 * blk_rq_rl - return the request_list a request came from 587 * @rq: request of interest 588 * 589 * Return the request_list @rq is allocated from. 590 */ 591 static inline struct request_list *blk_rq_rl(struct request *rq) 592 { 593 return rq->rl; 594 } 595 596 struct request_list *__blk_queue_next_rl(struct request_list *rl, 597 struct request_queue *q); 598 /** 599 * blk_queue_for_each_rl - iterate through all request_lists of a request_queue 600 * 601 * Should be used under queue_lock. 602 */ 603 #define blk_queue_for_each_rl(rl, q) \ 604 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) 605 606 static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) 607 { 608 int ret; 609 610 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); 611 if (ret) 612 return ret; 613 614 atomic64_set(&stat->aux_cnt, 0); 615 return 0; 616 } 617 618 static inline void blkg_stat_exit(struct blkg_stat *stat) 619 { 620 percpu_counter_destroy(&stat->cpu_cnt); 621 } 622 623 /** 624 * blkg_stat_add - add a value to a blkg_stat 625 * @stat: target blkg_stat 626 * @val: value to add 627 * 628 * Add @val to @stat. The caller must ensure that IRQ on the same CPU 629 * don't re-enter this function for the same counter. 630 */ 631 static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) 632 { 633 percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); 634 } 635 636 /** 637 * blkg_stat_read - read the current value of a blkg_stat 638 * @stat: blkg_stat to read 639 */ 640 static inline uint64_t blkg_stat_read(struct blkg_stat *stat) 641 { 642 return percpu_counter_sum_positive(&stat->cpu_cnt); 643 } 644 645 /** 646 * blkg_stat_reset - reset a blkg_stat 647 * @stat: blkg_stat to reset 648 */ 649 static inline void blkg_stat_reset(struct blkg_stat *stat) 650 { 651 percpu_counter_set(&stat->cpu_cnt, 0); 652 atomic64_set(&stat->aux_cnt, 0); 653 } 654 655 /** 656 * blkg_stat_add_aux - add a blkg_stat into another's aux count 657 * @to: the destination blkg_stat 658 * @from: the source 659 * 660 * Add @from's count including the aux one to @to's aux count. 661 */ 662 static inline void blkg_stat_add_aux(struct blkg_stat *to, 663 struct blkg_stat *from) 664 { 665 atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt), 666 &to->aux_cnt); 667 } 668 669 static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) 670 { 671 int i, ret; 672 673 for (i = 0; i < BLKG_RWSTAT_NR; i++) { 674 ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); 675 if (ret) { 676 while (--i >= 0) 677 percpu_counter_destroy(&rwstat->cpu_cnt[i]); 678 return ret; 679 } 680 atomic64_set(&rwstat->aux_cnt[i], 0); 681 } 682 return 0; 683 } 684 685 static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) 686 { 687 int i; 688 689 for (i = 0; i < BLKG_RWSTAT_NR; i++) 690 percpu_counter_destroy(&rwstat->cpu_cnt[i]); 691 } 692 693 /** 694 * blkg_rwstat_add - add a value to a blkg_rwstat 695 * @rwstat: target blkg_rwstat 696 * @op: REQ_OP and flags 697 * @val: value to add 698 * 699 * Add @val to @rwstat. The counters are chosen according to @rw. The 700 * caller is responsible for synchronizing calls to this function. 701 */ 702 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, 703 unsigned int op, uint64_t val) 704 { 705 struct percpu_counter *cnt; 706 707 if (op_is_discard(op)) 708 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; 709 else if (op_is_write(op)) 710 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; 711 else 712 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; 713 714 percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); 715 716 if (op_is_sync(op)) 717 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; 718 else 719 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; 720 721 percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); 722 } 723 724 /** 725 * blkg_rwstat_read - read the current values of a blkg_rwstat 726 * @rwstat: blkg_rwstat to read 727 * 728 * Read the current snapshot of @rwstat and return it in the aux counts. 729 */ 730 static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) 731 { 732 struct blkg_rwstat result; 733 int i; 734 735 for (i = 0; i < BLKG_RWSTAT_NR; i++) 736 atomic64_set(&result.aux_cnt[i], 737 percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); 738 return result; 739 } 740 741 /** 742 * blkg_rwstat_total - read the total count of a blkg_rwstat 743 * @rwstat: blkg_rwstat to read 744 * 745 * Return the total count of @rwstat regardless of the IO direction. This 746 * function can be called without synchronization and takes care of u64 747 * atomicity. 748 */ 749 static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) 750 { 751 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); 752 753 return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + 754 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); 755 } 756 757 /** 758 * blkg_rwstat_reset - reset a blkg_rwstat 759 * @rwstat: blkg_rwstat to reset 760 */ 761 static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) 762 { 763 int i; 764 765 for (i = 0; i < BLKG_RWSTAT_NR; i++) { 766 percpu_counter_set(&rwstat->cpu_cnt[i], 0); 767 atomic64_set(&rwstat->aux_cnt[i], 0); 768 } 769 } 770 771 /** 772 * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count 773 * @to: the destination blkg_rwstat 774 * @from: the source 775 * 776 * Add @from's count including the aux one to @to's aux count. 777 */ 778 static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, 779 struct blkg_rwstat *from) 780 { 781 u64 sum[BLKG_RWSTAT_NR]; 782 int i; 783 784 for (i = 0; i < BLKG_RWSTAT_NR; i++) 785 sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); 786 787 for (i = 0; i < BLKG_RWSTAT_NR; i++) 788 atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), 789 &to->aux_cnt[i]); 790 } 791 792 #ifdef CONFIG_BLK_DEV_THROTTLING 793 extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, 794 struct bio *bio); 795 #else 796 static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, 797 struct bio *bio) { return false; } 798 #endif 799 800 static inline bool blkcg_bio_issue_check(struct request_queue *q, 801 struct bio *bio) 802 { 803 struct blkcg *blkcg; 804 struct blkcg_gq *blkg; 805 bool throtl = false; 806 807 rcu_read_lock(); 808 blkcg = bio_blkcg(bio); 809 810 /* associate blkcg if bio hasn't attached one */ 811 bio_associate_blkcg(bio, &blkcg->css); 812 813 blkg = blkg_lookup(blkcg, q); 814 if (unlikely(!blkg)) { 815 spin_lock_irq(q->queue_lock); 816 blkg = blkg_lookup_create(blkcg, q); 817 if (IS_ERR(blkg)) 818 blkg = NULL; 819 spin_unlock_irq(q->queue_lock); 820 } 821 822 throtl = blk_throtl_bio(q, blkg, bio); 823 824 if (!throtl) { 825 blkg = blkg ?: q->root_blkg; 826 /* 827 * If the bio is flagged with BIO_QUEUE_ENTERED it means this 828 * is a split bio and we would have already accounted for the 829 * size of the bio. 830 */ 831 if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) 832 blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, 833 bio->bi_iter.bi_size); 834 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); 835 } 836 837 rcu_read_unlock(); 838 return !throtl; 839 } 840 841 static inline void blkcg_use_delay(struct blkcg_gq *blkg) 842 { 843 if (atomic_add_return(1, &blkg->use_delay) == 1) 844 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); 845 } 846 847 static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) 848 { 849 int old = atomic_read(&blkg->use_delay); 850 851 if (old == 0) 852 return 0; 853 854 /* 855 * We do this song and dance because we can race with somebody else 856 * adding or removing delay. If we just did an atomic_dec we'd end up 857 * negative and we'd already be in trouble. We need to subtract 1 and 858 * then check to see if we were the last delay so we can drop the 859 * congestion count on the cgroup. 860 */ 861 while (old) { 862 int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); 863 if (cur == old) 864 break; 865 old = cur; 866 } 867 868 if (old == 0) 869 return 0; 870 if (old == 1) 871 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); 872 return 1; 873 } 874 875 static inline void blkcg_clear_delay(struct blkcg_gq *blkg) 876 { 877 int old = atomic_read(&blkg->use_delay); 878 if (!old) 879 return; 880 /* We only want 1 person clearing the congestion count for this blkg. */ 881 while (old) { 882 int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); 883 if (cur == old) { 884 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); 885 break; 886 } 887 old = cur; 888 } 889 } 890 891 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); 892 void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); 893 void blkcg_maybe_throttle_current(void); 894 #else /* CONFIG_BLK_CGROUP */ 895 896 struct blkcg { 897 }; 898 899 struct blkg_policy_data { 900 }; 901 902 struct blkcg_policy_data { 903 }; 904 905 struct blkcg_gq { 906 }; 907 908 struct blkcg_policy { 909 }; 910 911 #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) 912 913 static inline void blkcg_maybe_throttle_current(void) { } 914 static inline bool blk_cgroup_congested(void) { return false; } 915 916 #ifdef CONFIG_BLOCK 917 918 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } 919 920 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } 921 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) 922 { return NULL; } 923 static inline int blkcg_init_queue(struct request_queue *q) { return 0; } 924 static inline void blkcg_drain_queue(struct request_queue *q) { } 925 static inline void blkcg_exit_queue(struct request_queue *q) { } 926 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } 927 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } 928 static inline int blkcg_activate_policy(struct request_queue *q, 929 const struct blkcg_policy *pol) { return 0; } 930 static inline void blkcg_deactivate_policy(struct request_queue *q, 931 const struct blkcg_policy *pol) { } 932 933 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } 934 935 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 936 struct blkcg_policy *pol) { return NULL; } 937 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } 938 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } 939 static inline void blkg_get(struct blkcg_gq *blkg) { } 940 static inline void blkg_put(struct blkcg_gq *blkg) { } 941 942 static inline struct request_list *blk_get_rl(struct request_queue *q, 943 struct bio *bio) { return &q->root_rl; } 944 static inline void blk_put_rl(struct request_list *rl) { } 945 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } 946 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } 947 948 static inline bool blkcg_bio_issue_check(struct request_queue *q, 949 struct bio *bio) { return true; } 950 951 #define blk_queue_for_each_rl(rl, q) \ 952 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) 953 954 #endif /* CONFIG_BLOCK */ 955 #endif /* CONFIG_BLK_CGROUP */ 956 #endif /* _BLK_CGROUP_H */ 957