1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* memcontrol.h - Memory Controller 3 * 4 * Copyright IBM Corporation, 2007 5 * Author Balbir Singh <[email protected]> 6 * 7 * Copyright 2007 OpenVZ SWsoft Inc 8 * Author: Pavel Emelianov <[email protected]> 9 */ 10 11 #ifndef _LINUX_MEMCONTROL_H 12 #define _LINUX_MEMCONTROL_H 13 #include <linux/cgroup.h> 14 #include <linux/vm_event_item.h> 15 #include <linux/hardirq.h> 16 #include <linux/jump_label.h> 17 #include <linux/page_counter.h> 18 #include <linux/vmpressure.h> 19 #include <linux/eventfd.h> 20 #include <linux/mm.h> 21 #include <linux/vmstat.h> 22 #include <linux/writeback.h> 23 #include <linux/page-flags.h> 24 25 struct mem_cgroup; 26 struct obj_cgroup; 27 struct page; 28 struct mm_struct; 29 struct kmem_cache; 30 31 /* Cgroup-specific page state, on top of universal node page state */ 32 enum memcg_stat_item { 33 MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, 34 MEMCG_SOCK, 35 MEMCG_PERCPU_B, 36 MEMCG_NR_STAT, 37 }; 38 39 enum memcg_memory_event { 40 MEMCG_LOW, 41 MEMCG_HIGH, 42 MEMCG_MAX, 43 MEMCG_OOM, 44 MEMCG_OOM_KILL, 45 MEMCG_SWAP_HIGH, 46 MEMCG_SWAP_MAX, 47 MEMCG_SWAP_FAIL, 48 MEMCG_NR_MEMORY_EVENTS, 49 }; 50 51 struct mem_cgroup_reclaim_cookie { 52 pg_data_t *pgdat; 53 unsigned int generation; 54 }; 55 56 #ifdef CONFIG_MEMCG 57 58 #define MEM_CGROUP_ID_SHIFT 16 59 #define MEM_CGROUP_ID_MAX USHRT_MAX 60 61 struct mem_cgroup_id { 62 int id; 63 refcount_t ref; 64 }; 65 66 /* 67 * Per memcg event counter is incremented at every pagein/pageout. With THP, 68 * it will be incremented by the number of pages. This counter is used 69 * to trigger some periodic events. This is straightforward and better 70 * than using jiffies etc. to handle periodic memcg event. 71 */ 72 enum mem_cgroup_events_target { 73 MEM_CGROUP_TARGET_THRESH, 74 MEM_CGROUP_TARGET_SOFTLIMIT, 75 MEM_CGROUP_NTARGETS, 76 }; 77 78 struct memcg_vmstats_percpu { 79 /* Local (CPU and cgroup) page state & events */ 80 long state[MEMCG_NR_STAT]; 81 unsigned long events[NR_VM_EVENT_ITEMS]; 82 83 /* Delta calculation for lockless upward propagation */ 84 long state_prev[MEMCG_NR_STAT]; 85 unsigned long events_prev[NR_VM_EVENT_ITEMS]; 86 87 /* Cgroup1: threshold notifications & softlimit tree updates */ 88 unsigned long nr_page_events; 89 unsigned long targets[MEM_CGROUP_NTARGETS]; 90 }; 91 92 struct memcg_vmstats { 93 /* Aggregated (CPU and subtree) page state & events */ 94 long state[MEMCG_NR_STAT]; 95 unsigned long events[NR_VM_EVENT_ITEMS]; 96 97 /* Pending child counts during tree propagation */ 98 long state_pending[MEMCG_NR_STAT]; 99 unsigned long events_pending[NR_VM_EVENT_ITEMS]; 100 }; 101 102 struct mem_cgroup_reclaim_iter { 103 struct mem_cgroup *position; 104 /* scan generation, increased every round-trip */ 105 unsigned int generation; 106 }; 107 108 struct lruvec_stat { 109 long count[NR_VM_NODE_STAT_ITEMS]; 110 }; 111 112 struct batched_lruvec_stat { 113 s32 count[NR_VM_NODE_STAT_ITEMS]; 114 }; 115 116 /* 117 * Bitmap and deferred work of shrinker::id corresponding to memcg-aware 118 * shrinkers, which have elements charged to this memcg. 119 */ 120 struct shrinker_info { 121 struct rcu_head rcu; 122 atomic_long_t *nr_deferred; 123 unsigned long *map; 124 }; 125 126 /* 127 * per-node information in memory controller. 128 */ 129 struct mem_cgroup_per_node { 130 struct lruvec lruvec; 131 132 /* 133 * Legacy local VM stats. This should be struct lruvec_stat and 134 * cannot be optimized to struct batched_lruvec_stat. Because 135 * the threshold of the lruvec_stat_cpu can be as big as 136 * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this 137 * filed has no upper limit. 138 */ 139 struct lruvec_stat __percpu *lruvec_stat_local; 140 141 /* Subtree VM stats (batched updates) */ 142 struct batched_lruvec_stat __percpu *lruvec_stat_cpu; 143 atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; 144 145 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; 146 147 struct mem_cgroup_reclaim_iter iter; 148 149 struct shrinker_info __rcu *shrinker_info; 150 151 struct rb_node tree_node; /* RB tree node */ 152 unsigned long usage_in_excess;/* Set to the value by which */ 153 /* the soft limit is exceeded*/ 154 bool on_tree; 155 struct mem_cgroup *memcg; /* Back pointer, we cannot */ 156 /* use container_of */ 157 }; 158 159 struct mem_cgroup_threshold { 160 struct eventfd_ctx *eventfd; 161 unsigned long threshold; 162 }; 163 164 /* For threshold */ 165 struct mem_cgroup_threshold_ary { 166 /* An array index points to threshold just below or equal to usage. */ 167 int current_threshold; 168 /* Size of entries[] */ 169 unsigned int size; 170 /* Array of thresholds */ 171 struct mem_cgroup_threshold entries[]; 172 }; 173 174 struct mem_cgroup_thresholds { 175 /* Primary thresholds array */ 176 struct mem_cgroup_threshold_ary *primary; 177 /* 178 * Spare threshold array. 179 * This is needed to make mem_cgroup_unregister_event() "never fail". 180 * It must be able to store at least primary->size - 1 entries. 181 */ 182 struct mem_cgroup_threshold_ary *spare; 183 }; 184 185 enum memcg_kmem_state { 186 KMEM_NONE, 187 KMEM_ALLOCATED, 188 KMEM_ONLINE, 189 }; 190 191 #if defined(CONFIG_SMP) 192 struct memcg_padding { 193 char x[0]; 194 } ____cacheline_internodealigned_in_smp; 195 #define MEMCG_PADDING(name) struct memcg_padding name; 196 #else 197 #define MEMCG_PADDING(name) 198 #endif 199 200 /* 201 * Remember four most recent foreign writebacks with dirty pages in this 202 * cgroup. Inode sharing is expected to be uncommon and, even if we miss 203 * one in a given round, we're likely to catch it later if it keeps 204 * foreign-dirtying, so a fairly low count should be enough. 205 * 206 * See mem_cgroup_track_foreign_dirty_slowpath() for details. 207 */ 208 #define MEMCG_CGWB_FRN_CNT 4 209 210 struct memcg_cgwb_frn { 211 u64 bdi_id; /* bdi->id of the foreign inode */ 212 int memcg_id; /* memcg->css.id of foreign inode */ 213 u64 at; /* jiffies_64 at the time of dirtying */ 214 struct wb_completion done; /* tracks in-flight foreign writebacks */ 215 }; 216 217 /* 218 * Bucket for arbitrarily byte-sized objects charged to a memory 219 * cgroup. The bucket can be reparented in one piece when the cgroup 220 * is destroyed, without having to round up the individual references 221 * of all live memory objects in the wild. 222 */ 223 struct obj_cgroup { 224 struct percpu_ref refcnt; 225 struct mem_cgroup *memcg; 226 atomic_t nr_charged_bytes; 227 union { 228 struct list_head list; 229 struct rcu_head rcu; 230 }; 231 }; 232 233 /* 234 * The memory controller data structure. The memory controller controls both 235 * page cache and RSS per cgroup. We would eventually like to provide 236 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 237 * to help the administrator determine what knobs to tune. 238 */ 239 struct mem_cgroup { 240 struct cgroup_subsys_state css; 241 242 /* Private memcg ID. Used to ID objects that outlive the cgroup */ 243 struct mem_cgroup_id id; 244 245 /* Accounted resources */ 246 struct page_counter memory; /* Both v1 & v2 */ 247 248 union { 249 struct page_counter swap; /* v2 only */ 250 struct page_counter memsw; /* v1 only */ 251 }; 252 253 /* Legacy consumer-oriented counters */ 254 struct page_counter kmem; /* v1 only */ 255 struct page_counter tcpmem; /* v1 only */ 256 257 /* Range enforcement for interrupt charges */ 258 struct work_struct high_work; 259 260 unsigned long soft_limit; 261 262 /* vmpressure notifications */ 263 struct vmpressure vmpressure; 264 265 /* 266 * Should the OOM killer kill all belonging tasks, had it kill one? 267 */ 268 bool oom_group; 269 270 /* protected by memcg_oom_lock */ 271 bool oom_lock; 272 int under_oom; 273 274 int swappiness; 275 /* OOM-Killer disable */ 276 int oom_kill_disable; 277 278 /* memory.events and memory.events.local */ 279 struct cgroup_file events_file; 280 struct cgroup_file events_local_file; 281 282 /* handle for "memory.swap.events" */ 283 struct cgroup_file swap_events_file; 284 285 /* protect arrays of thresholds */ 286 struct mutex thresholds_lock; 287 288 /* thresholds for memory usage. RCU-protected */ 289 struct mem_cgroup_thresholds thresholds; 290 291 /* thresholds for mem+swap usage. RCU-protected */ 292 struct mem_cgroup_thresholds memsw_thresholds; 293 294 /* For oom notifier event fd */ 295 struct list_head oom_notify; 296 297 /* 298 * Should we move charges of a task when a task is moved into this 299 * mem_cgroup ? And what type of charges should we move ? 300 */ 301 unsigned long move_charge_at_immigrate; 302 /* taken only while moving_account > 0 */ 303 spinlock_t move_lock; 304 unsigned long move_lock_flags; 305 306 MEMCG_PADDING(_pad1_); 307 308 /* memory.stat */ 309 struct memcg_vmstats vmstats; 310 311 /* memory.events */ 312 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; 313 atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; 314 315 unsigned long socket_pressure; 316 317 /* Legacy tcp memory accounting */ 318 bool tcpmem_active; 319 int tcpmem_pressure; 320 321 #ifdef CONFIG_MEMCG_KMEM 322 int kmemcg_id; 323 enum memcg_kmem_state kmem_state; 324 struct obj_cgroup __rcu *objcg; 325 struct list_head objcg_list; /* list of inherited objcgs */ 326 #endif 327 328 MEMCG_PADDING(_pad2_); 329 330 /* 331 * set > 0 if pages under this cgroup are moving to other cgroup. 332 */ 333 atomic_t moving_account; 334 struct task_struct *move_lock_task; 335 336 struct memcg_vmstats_percpu __percpu *vmstats_percpu; 337 338 #ifdef CONFIG_CGROUP_WRITEBACK 339 struct list_head cgwb_list; 340 struct wb_domain cgwb_domain; 341 struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT]; 342 #endif 343 344 /* List of events which userspace want to receive */ 345 struct list_head event_list; 346 spinlock_t event_list_lock; 347 348 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 349 struct deferred_split deferred_split_queue; 350 #endif 351 352 struct mem_cgroup_per_node *nodeinfo[0]; 353 /* WARNING: nodeinfo must be the last member here */ 354 }; 355 356 /* 357 * size of first charge trial. "32" comes from vmscan.c's magic value. 358 * TODO: maybe necessary to use big numbers in big irons. 359 */ 360 #define MEMCG_CHARGE_BATCH 32U 361 362 extern struct mem_cgroup *root_mem_cgroup; 363 364 enum page_memcg_data_flags { 365 /* page->memcg_data is a pointer to an objcgs vector */ 366 MEMCG_DATA_OBJCGS = (1UL << 0), 367 /* page has been accounted as a non-slab kernel page */ 368 MEMCG_DATA_KMEM = (1UL << 1), 369 /* the next bit after the last actual flag */ 370 __NR_MEMCG_DATA_FLAGS = (1UL << 2), 371 }; 372 373 #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) 374 375 static inline bool PageMemcgKmem(struct page *page); 376 377 /* 378 * After the initialization objcg->memcg is always pointing at 379 * a valid memcg, but can be atomically swapped to the parent memcg. 380 * 381 * The caller must ensure that the returned memcg won't be released: 382 * e.g. acquire the rcu_read_lock or css_set_lock. 383 */ 384 static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) 385 { 386 return READ_ONCE(objcg->memcg); 387 } 388 389 /* 390 * __page_memcg - get the memory cgroup associated with a non-kmem page 391 * @page: a pointer to the page struct 392 * 393 * Returns a pointer to the memory cgroup associated with the page, 394 * or NULL. This function assumes that the page is known to have a 395 * proper memory cgroup pointer. It's not safe to call this function 396 * against some type of pages, e.g. slab pages or ex-slab pages or 397 * kmem pages. 398 */ 399 static inline struct mem_cgroup *__page_memcg(struct page *page) 400 { 401 unsigned long memcg_data = page->memcg_data; 402 403 VM_BUG_ON_PAGE(PageSlab(page), page); 404 VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); 405 VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); 406 407 return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 408 } 409 410 /* 411 * __page_objcg - get the object cgroup associated with a kmem page 412 * @page: a pointer to the page struct 413 * 414 * Returns a pointer to the object cgroup associated with the page, 415 * or NULL. This function assumes that the page is known to have a 416 * proper object cgroup pointer. It's not safe to call this function 417 * against some type of pages, e.g. slab pages or ex-slab pages or 418 * LRU pages. 419 */ 420 static inline struct obj_cgroup *__page_objcg(struct page *page) 421 { 422 unsigned long memcg_data = page->memcg_data; 423 424 VM_BUG_ON_PAGE(PageSlab(page), page); 425 VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); 426 VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page); 427 428 return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 429 } 430 431 /* 432 * page_memcg - get the memory cgroup associated with a page 433 * @page: a pointer to the page struct 434 * 435 * Returns a pointer to the memory cgroup associated with the page, 436 * or NULL. This function assumes that the page is known to have a 437 * proper memory cgroup pointer. It's not safe to call this function 438 * against some type of pages, e.g. slab pages or ex-slab pages. 439 * 440 * For a non-kmem page any of the following ensures page and memcg binding 441 * stability: 442 * 443 * - the page lock 444 * - LRU isolation 445 * - lock_page_memcg() 446 * - exclusive reference 447 * 448 * For a kmem page a caller should hold an rcu read lock to protect memcg 449 * associated with a kmem page from being released. 450 */ 451 static inline struct mem_cgroup *page_memcg(struct page *page) 452 { 453 if (PageMemcgKmem(page)) 454 return obj_cgroup_memcg(__page_objcg(page)); 455 else 456 return __page_memcg(page); 457 } 458 459 /* 460 * page_memcg_rcu - locklessly get the memory cgroup associated with a page 461 * @page: a pointer to the page struct 462 * 463 * Returns a pointer to the memory cgroup associated with the page, 464 * or NULL. This function assumes that the page is known to have a 465 * proper memory cgroup pointer. It's not safe to call this function 466 * against some type of pages, e.g. slab pages or ex-slab pages. 467 */ 468 static inline struct mem_cgroup *page_memcg_rcu(struct page *page) 469 { 470 unsigned long memcg_data = READ_ONCE(page->memcg_data); 471 472 VM_BUG_ON_PAGE(PageSlab(page), page); 473 WARN_ON_ONCE(!rcu_read_lock_held()); 474 475 if (memcg_data & MEMCG_DATA_KMEM) { 476 struct obj_cgroup *objcg; 477 478 objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 479 return obj_cgroup_memcg(objcg); 480 } 481 482 return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 483 } 484 485 /* 486 * page_memcg_check - get the memory cgroup associated with a page 487 * @page: a pointer to the page struct 488 * 489 * Returns a pointer to the memory cgroup associated with the page, 490 * or NULL. This function unlike page_memcg() can take any page 491 * as an argument. It has to be used in cases when it's not known if a page 492 * has an associated memory cgroup pointer or an object cgroups vector or 493 * an object cgroup. 494 * 495 * For a non-kmem page any of the following ensures page and memcg binding 496 * stability: 497 * 498 * - the page lock 499 * - LRU isolation 500 * - lock_page_memcg() 501 * - exclusive reference 502 * 503 * For a kmem page a caller should hold an rcu read lock to protect memcg 504 * associated with a kmem page from being released. 505 */ 506 static inline struct mem_cgroup *page_memcg_check(struct page *page) 507 { 508 /* 509 * Because page->memcg_data might be changed asynchronously 510 * for slab pages, READ_ONCE() should be used here. 511 */ 512 unsigned long memcg_data = READ_ONCE(page->memcg_data); 513 514 if (memcg_data & MEMCG_DATA_OBJCGS) 515 return NULL; 516 517 if (memcg_data & MEMCG_DATA_KMEM) { 518 struct obj_cgroup *objcg; 519 520 objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 521 return obj_cgroup_memcg(objcg); 522 } 523 524 return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 525 } 526 527 #ifdef CONFIG_MEMCG_KMEM 528 /* 529 * PageMemcgKmem - check if the page has MemcgKmem flag set 530 * @page: a pointer to the page struct 531 * 532 * Checks if the page has MemcgKmem flag set. The caller must ensure that 533 * the page has an associated memory cgroup. It's not safe to call this function 534 * against some types of pages, e.g. slab pages. 535 */ 536 static inline bool PageMemcgKmem(struct page *page) 537 { 538 VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); 539 return page->memcg_data & MEMCG_DATA_KMEM; 540 } 541 542 /* 543 * page_objcgs - get the object cgroups vector associated with a page 544 * @page: a pointer to the page struct 545 * 546 * Returns a pointer to the object cgroups vector associated with the page, 547 * or NULL. This function assumes that the page is known to have an 548 * associated object cgroups vector. It's not safe to call this function 549 * against pages, which might have an associated memory cgroup: e.g. 550 * kernel stack pages. 551 */ 552 static inline struct obj_cgroup **page_objcgs(struct page *page) 553 { 554 unsigned long memcg_data = READ_ONCE(page->memcg_data); 555 556 VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); 557 VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); 558 559 return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 560 } 561 562 /* 563 * page_objcgs_check - get the object cgroups vector associated with a page 564 * @page: a pointer to the page struct 565 * 566 * Returns a pointer to the object cgroups vector associated with the page, 567 * or NULL. This function is safe to use if the page can be directly associated 568 * with a memory cgroup. 569 */ 570 static inline struct obj_cgroup **page_objcgs_check(struct page *page) 571 { 572 unsigned long memcg_data = READ_ONCE(page->memcg_data); 573 574 if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) 575 return NULL; 576 577 VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); 578 579 return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 580 } 581 582 #else 583 static inline bool PageMemcgKmem(struct page *page) 584 { 585 return false; 586 } 587 588 static inline struct obj_cgroup **page_objcgs(struct page *page) 589 { 590 return NULL; 591 } 592 593 static inline struct obj_cgroup **page_objcgs_check(struct page *page) 594 { 595 return NULL; 596 } 597 #endif 598 599 static __always_inline bool memcg_stat_item_in_bytes(int idx) 600 { 601 if (idx == MEMCG_PERCPU_B) 602 return true; 603 return vmstat_item_in_bytes(idx); 604 } 605 606 static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) 607 { 608 return (memcg == root_mem_cgroup); 609 } 610 611 static inline bool mem_cgroup_disabled(void) 612 { 613 return !cgroup_subsys_enabled(memory_cgrp_subsys); 614 } 615 616 static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, 617 struct mem_cgroup *memcg, 618 bool in_low_reclaim) 619 { 620 if (mem_cgroup_disabled()) 621 return 0; 622 623 /* 624 * There is no reclaim protection applied to a targeted reclaim. 625 * We are special casing this specific case here because 626 * mem_cgroup_protected calculation is not robust enough to keep 627 * the protection invariant for calculated effective values for 628 * parallel reclaimers with different reclaim target. This is 629 * especially a problem for tail memcgs (as they have pages on LRU) 630 * which would want to have effective values 0 for targeted reclaim 631 * but a different value for external reclaim. 632 * 633 * Example 634 * Let's have global and A's reclaim in parallel: 635 * | 636 * A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G) 637 * |\ 638 * | C (low = 1G, usage = 2.5G) 639 * B (low = 1G, usage = 0.5G) 640 * 641 * For the global reclaim 642 * A.elow = A.low 643 * B.elow = min(B.usage, B.low) because children_low_usage <= A.elow 644 * C.elow = min(C.usage, C.low) 645 * 646 * With the effective values resetting we have A reclaim 647 * A.elow = 0 648 * B.elow = B.low 649 * C.elow = C.low 650 * 651 * If the global reclaim races with A's reclaim then 652 * B.elow = C.elow = 0 because children_low_usage > A.elow) 653 * is possible and reclaiming B would be violating the protection. 654 * 655 */ 656 if (root == memcg) 657 return 0; 658 659 if (in_low_reclaim) 660 return READ_ONCE(memcg->memory.emin); 661 662 return max(READ_ONCE(memcg->memory.emin), 663 READ_ONCE(memcg->memory.elow)); 664 } 665 666 void mem_cgroup_calculate_protection(struct mem_cgroup *root, 667 struct mem_cgroup *memcg); 668 669 static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg) 670 { 671 /* 672 * The root memcg doesn't account charges, and doesn't support 673 * protection. 674 */ 675 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg); 676 677 } 678 679 static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) 680 { 681 if (!mem_cgroup_supports_protection(memcg)) 682 return false; 683 684 return READ_ONCE(memcg->memory.elow) >= 685 page_counter_read(&memcg->memory); 686 } 687 688 static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) 689 { 690 if (!mem_cgroup_supports_protection(memcg)) 691 return false; 692 693 return READ_ONCE(memcg->memory.emin) >= 694 page_counter_read(&memcg->memory); 695 } 696 697 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); 698 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, 699 gfp_t gfp, swp_entry_t entry); 700 void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); 701 702 void mem_cgroup_uncharge(struct page *page); 703 void mem_cgroup_uncharge_list(struct list_head *page_list); 704 705 void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); 706 707 /** 708 * mem_cgroup_lruvec - get the lru list vector for a memcg & node 709 * @memcg: memcg of the wanted lruvec 710 * @pgdat: pglist_data 711 * 712 * Returns the lru list vector holding pages for a given @memcg & 713 * @pgdat combination. This can be the node lruvec, if the memory 714 * controller is disabled. 715 */ 716 static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, 717 struct pglist_data *pgdat) 718 { 719 struct mem_cgroup_per_node *mz; 720 struct lruvec *lruvec; 721 722 if (mem_cgroup_disabled()) { 723 lruvec = &pgdat->__lruvec; 724 goto out; 725 } 726 727 if (!memcg) 728 memcg = root_mem_cgroup; 729 730 mz = memcg->nodeinfo[pgdat->node_id]; 731 lruvec = &mz->lruvec; 732 out: 733 /* 734 * Since a node can be onlined after the mem_cgroup was created, 735 * we have to be prepared to initialize lruvec->pgdat here; 736 * and if offlined then reonlined, we need to reinitialize it. 737 */ 738 if (unlikely(lruvec->pgdat != pgdat)) 739 lruvec->pgdat = pgdat; 740 return lruvec; 741 } 742 743 /** 744 * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page 745 * @page: the page 746 * @pgdat: pgdat of the page 747 * 748 * This function relies on page->mem_cgroup being stable. 749 */ 750 static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, 751 struct pglist_data *pgdat) 752 { 753 struct mem_cgroup *memcg = page_memcg(page); 754 755 VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); 756 return mem_cgroup_lruvec(memcg, pgdat); 757 } 758 759 static inline bool lruvec_holds_page_lru_lock(struct page *page, 760 struct lruvec *lruvec) 761 { 762 pg_data_t *pgdat = page_pgdat(page); 763 const struct mem_cgroup *memcg; 764 struct mem_cgroup_per_node *mz; 765 766 if (mem_cgroup_disabled()) 767 return lruvec == &pgdat->__lruvec; 768 769 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 770 memcg = page_memcg(page) ? : root_mem_cgroup; 771 772 return lruvec->pgdat == pgdat && mz->memcg == memcg; 773 } 774 775 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); 776 777 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); 778 779 struct lruvec *lock_page_lruvec(struct page *page); 780 struct lruvec *lock_page_lruvec_irq(struct page *page); 781 struct lruvec *lock_page_lruvec_irqsave(struct page *page, 782 unsigned long *flags); 783 784 #ifdef CONFIG_DEBUG_VM 785 void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page); 786 #else 787 static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) 788 { 789 } 790 #endif 791 792 static inline 793 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ 794 return css ? container_of(css, struct mem_cgroup, css) : NULL; 795 } 796 797 static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg) 798 { 799 return percpu_ref_tryget(&objcg->refcnt); 800 } 801 802 static inline void obj_cgroup_get(struct obj_cgroup *objcg) 803 { 804 percpu_ref_get(&objcg->refcnt); 805 } 806 807 static inline void obj_cgroup_get_many(struct obj_cgroup *objcg, 808 unsigned long nr) 809 { 810 percpu_ref_get_many(&objcg->refcnt, nr); 811 } 812 813 static inline void obj_cgroup_put(struct obj_cgroup *objcg) 814 { 815 percpu_ref_put(&objcg->refcnt); 816 } 817 818 static inline void mem_cgroup_put(struct mem_cgroup *memcg) 819 { 820 if (memcg) 821 css_put(&memcg->css); 822 } 823 824 #define mem_cgroup_from_counter(counter, member) \ 825 container_of(counter, struct mem_cgroup, member) 826 827 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, 828 struct mem_cgroup *, 829 struct mem_cgroup_reclaim_cookie *); 830 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); 831 int mem_cgroup_scan_tasks(struct mem_cgroup *, 832 int (*)(struct task_struct *, void *), void *); 833 834 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 835 { 836 if (mem_cgroup_disabled()) 837 return 0; 838 839 return memcg->id.id; 840 } 841 struct mem_cgroup *mem_cgroup_from_id(unsigned short id); 842 843 static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m) 844 { 845 return mem_cgroup_from_css(seq_css(m)); 846 } 847 848 static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) 849 { 850 struct mem_cgroup_per_node *mz; 851 852 if (mem_cgroup_disabled()) 853 return NULL; 854 855 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 856 return mz->memcg; 857 } 858 859 /** 860 * parent_mem_cgroup - find the accounting parent of a memcg 861 * @memcg: memcg whose parent to find 862 * 863 * Returns the parent memcg, or NULL if this is the root or the memory 864 * controller is in legacy no-hierarchy mode. 865 */ 866 static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) 867 { 868 if (!memcg->memory.parent) 869 return NULL; 870 return mem_cgroup_from_counter(memcg->memory.parent, memory); 871 } 872 873 static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, 874 struct mem_cgroup *root) 875 { 876 if (root == memcg) 877 return true; 878 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); 879 } 880 881 static inline bool mm_match_cgroup(struct mm_struct *mm, 882 struct mem_cgroup *memcg) 883 { 884 struct mem_cgroup *task_memcg; 885 bool match = false; 886 887 rcu_read_lock(); 888 task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 889 if (task_memcg) 890 match = mem_cgroup_is_descendant(task_memcg, memcg); 891 rcu_read_unlock(); 892 return match; 893 } 894 895 struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); 896 ino_t page_cgroup_ino(struct page *page); 897 898 static inline bool mem_cgroup_online(struct mem_cgroup *memcg) 899 { 900 if (mem_cgroup_disabled()) 901 return true; 902 return !!(memcg->css.flags & CSS_ONLINE); 903 } 904 905 /* 906 * For memory reclaim. 907 */ 908 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); 909 910 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 911 int zid, int nr_pages); 912 913 static inline 914 unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, 915 enum lru_list lru, int zone_idx) 916 { 917 struct mem_cgroup_per_node *mz; 918 919 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 920 return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); 921 } 922 923 void mem_cgroup_handle_over_high(void); 924 925 unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); 926 927 unsigned long mem_cgroup_size(struct mem_cgroup *memcg); 928 929 void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, 930 struct task_struct *p); 931 932 void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg); 933 934 static inline void mem_cgroup_enter_user_fault(void) 935 { 936 WARN_ON(current->in_user_fault); 937 current->in_user_fault = 1; 938 } 939 940 static inline void mem_cgroup_exit_user_fault(void) 941 { 942 WARN_ON(!current->in_user_fault); 943 current->in_user_fault = 0; 944 } 945 946 static inline bool task_in_memcg_oom(struct task_struct *p) 947 { 948 return p->memcg_in_oom; 949 } 950 951 bool mem_cgroup_oom_synchronize(bool wait); 952 struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, 953 struct mem_cgroup *oom_domain); 954 void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); 955 956 #ifdef CONFIG_MEMCG_SWAP 957 extern bool cgroup_memory_noswap; 958 #endif 959 960 void lock_page_memcg(struct page *page); 961 void unlock_page_memcg(struct page *page); 962 963 void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); 964 965 /* idx can be of type enum memcg_stat_item or node_stat_item */ 966 static inline void mod_memcg_state(struct mem_cgroup *memcg, 967 int idx, int val) 968 { 969 unsigned long flags; 970 971 local_irq_save(flags); 972 __mod_memcg_state(memcg, idx, val); 973 local_irq_restore(flags); 974 } 975 976 static inline unsigned long lruvec_page_state(struct lruvec *lruvec, 977 enum node_stat_item idx) 978 { 979 struct mem_cgroup_per_node *pn; 980 long x; 981 982 if (mem_cgroup_disabled()) 983 return node_page_state(lruvec_pgdat(lruvec), idx); 984 985 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 986 x = atomic_long_read(&pn->lruvec_stat[idx]); 987 #ifdef CONFIG_SMP 988 if (x < 0) 989 x = 0; 990 #endif 991 return x; 992 } 993 994 static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, 995 enum node_stat_item idx) 996 { 997 struct mem_cgroup_per_node *pn; 998 long x = 0; 999 int cpu; 1000 1001 if (mem_cgroup_disabled()) 1002 return node_page_state(lruvec_pgdat(lruvec), idx); 1003 1004 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 1005 for_each_possible_cpu(cpu) 1006 x += per_cpu(pn->lruvec_stat_local->count[idx], cpu); 1007 #ifdef CONFIG_SMP 1008 if (x < 0) 1009 x = 0; 1010 #endif 1011 return x; 1012 } 1013 1014 void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, 1015 int val); 1016 void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); 1017 1018 static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, 1019 int val) 1020 { 1021 unsigned long flags; 1022 1023 local_irq_save(flags); 1024 __mod_lruvec_kmem_state(p, idx, val); 1025 local_irq_restore(flags); 1026 } 1027 1028 static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, 1029 enum node_stat_item idx, int val) 1030 { 1031 unsigned long flags; 1032 1033 local_irq_save(flags); 1034 __mod_memcg_lruvec_state(lruvec, idx, val); 1035 local_irq_restore(flags); 1036 } 1037 1038 void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, 1039 unsigned long count); 1040 1041 static inline void count_memcg_events(struct mem_cgroup *memcg, 1042 enum vm_event_item idx, 1043 unsigned long count) 1044 { 1045 unsigned long flags; 1046 1047 local_irq_save(flags); 1048 __count_memcg_events(memcg, idx, count); 1049 local_irq_restore(flags); 1050 } 1051 1052 static inline void count_memcg_page_event(struct page *page, 1053 enum vm_event_item idx) 1054 { 1055 struct mem_cgroup *memcg = page_memcg(page); 1056 1057 if (memcg) 1058 count_memcg_events(memcg, idx, 1); 1059 } 1060 1061 static inline void count_memcg_event_mm(struct mm_struct *mm, 1062 enum vm_event_item idx) 1063 { 1064 struct mem_cgroup *memcg; 1065 1066 if (mem_cgroup_disabled()) 1067 return; 1068 1069 rcu_read_lock(); 1070 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 1071 if (likely(memcg)) 1072 count_memcg_events(memcg, idx, 1); 1073 rcu_read_unlock(); 1074 } 1075 1076 static inline void memcg_memory_event(struct mem_cgroup *memcg, 1077 enum memcg_memory_event event) 1078 { 1079 bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || 1080 event == MEMCG_SWAP_FAIL; 1081 1082 atomic_long_inc(&memcg->memory_events_local[event]); 1083 if (!swap_event) 1084 cgroup_file_notify(&memcg->events_local_file); 1085 1086 do { 1087 atomic_long_inc(&memcg->memory_events[event]); 1088 if (swap_event) 1089 cgroup_file_notify(&memcg->swap_events_file); 1090 else 1091 cgroup_file_notify(&memcg->events_file); 1092 1093 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) 1094 break; 1095 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) 1096 break; 1097 } while ((memcg = parent_mem_cgroup(memcg)) && 1098 !mem_cgroup_is_root(memcg)); 1099 } 1100 1101 static inline void memcg_memory_event_mm(struct mm_struct *mm, 1102 enum memcg_memory_event event) 1103 { 1104 struct mem_cgroup *memcg; 1105 1106 if (mem_cgroup_disabled()) 1107 return; 1108 1109 rcu_read_lock(); 1110 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 1111 if (likely(memcg)) 1112 memcg_memory_event(memcg, event); 1113 rcu_read_unlock(); 1114 } 1115 1116 void split_page_memcg(struct page *head, unsigned int nr); 1117 1118 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 1119 gfp_t gfp_mask, 1120 unsigned long *total_scanned); 1121 1122 #else /* CONFIG_MEMCG */ 1123 1124 #define MEM_CGROUP_ID_SHIFT 0 1125 #define MEM_CGROUP_ID_MAX 0 1126 1127 static inline struct mem_cgroup *page_memcg(struct page *page) 1128 { 1129 return NULL; 1130 } 1131 1132 static inline struct mem_cgroup *page_memcg_rcu(struct page *page) 1133 { 1134 WARN_ON_ONCE(!rcu_read_lock_held()); 1135 return NULL; 1136 } 1137 1138 static inline struct mem_cgroup *page_memcg_check(struct page *page) 1139 { 1140 return NULL; 1141 } 1142 1143 static inline bool PageMemcgKmem(struct page *page) 1144 { 1145 return false; 1146 } 1147 1148 static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) 1149 { 1150 return true; 1151 } 1152 1153 static inline bool mem_cgroup_disabled(void) 1154 { 1155 return true; 1156 } 1157 1158 static inline void memcg_memory_event(struct mem_cgroup *memcg, 1159 enum memcg_memory_event event) 1160 { 1161 } 1162 1163 static inline void memcg_memory_event_mm(struct mm_struct *mm, 1164 enum memcg_memory_event event) 1165 { 1166 } 1167 1168 static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, 1169 struct mem_cgroup *memcg, 1170 bool in_low_reclaim) 1171 { 1172 return 0; 1173 } 1174 1175 static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, 1176 struct mem_cgroup *memcg) 1177 { 1178 } 1179 1180 static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) 1181 { 1182 return false; 1183 } 1184 1185 static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) 1186 { 1187 return false; 1188 } 1189 1190 static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, 1191 gfp_t gfp_mask) 1192 { 1193 return 0; 1194 } 1195 1196 static inline int mem_cgroup_swapin_charge_page(struct page *page, 1197 struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) 1198 { 1199 return 0; 1200 } 1201 1202 static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) 1203 { 1204 } 1205 1206 static inline void mem_cgroup_uncharge(struct page *page) 1207 { 1208 } 1209 1210 static inline void mem_cgroup_uncharge_list(struct list_head *page_list) 1211 { 1212 } 1213 1214 static inline void mem_cgroup_migrate(struct page *old, struct page *new) 1215 { 1216 } 1217 1218 static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, 1219 struct pglist_data *pgdat) 1220 { 1221 return &pgdat->__lruvec; 1222 } 1223 1224 static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, 1225 struct pglist_data *pgdat) 1226 { 1227 return &pgdat->__lruvec; 1228 } 1229 1230 static inline bool lruvec_holds_page_lru_lock(struct page *page, 1231 struct lruvec *lruvec) 1232 { 1233 pg_data_t *pgdat = page_pgdat(page); 1234 1235 return lruvec == &pgdat->__lruvec; 1236 } 1237 1238 static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) 1239 { 1240 } 1241 1242 static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) 1243 { 1244 return NULL; 1245 } 1246 1247 static inline bool mm_match_cgroup(struct mm_struct *mm, 1248 struct mem_cgroup *memcg) 1249 { 1250 return true; 1251 } 1252 1253 static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) 1254 { 1255 return NULL; 1256 } 1257 1258 static inline void mem_cgroup_put(struct mem_cgroup *memcg) 1259 { 1260 } 1261 1262 static inline struct lruvec *lock_page_lruvec(struct page *page) 1263 { 1264 struct pglist_data *pgdat = page_pgdat(page); 1265 1266 spin_lock(&pgdat->__lruvec.lru_lock); 1267 return &pgdat->__lruvec; 1268 } 1269 1270 static inline struct lruvec *lock_page_lruvec_irq(struct page *page) 1271 { 1272 struct pglist_data *pgdat = page_pgdat(page); 1273 1274 spin_lock_irq(&pgdat->__lruvec.lru_lock); 1275 return &pgdat->__lruvec; 1276 } 1277 1278 static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page, 1279 unsigned long *flagsp) 1280 { 1281 struct pglist_data *pgdat = page_pgdat(page); 1282 1283 spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); 1284 return &pgdat->__lruvec; 1285 } 1286 1287 static inline struct mem_cgroup * 1288 mem_cgroup_iter(struct mem_cgroup *root, 1289 struct mem_cgroup *prev, 1290 struct mem_cgroup_reclaim_cookie *reclaim) 1291 { 1292 return NULL; 1293 } 1294 1295 static inline void mem_cgroup_iter_break(struct mem_cgroup *root, 1296 struct mem_cgroup *prev) 1297 { 1298 } 1299 1300 static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, 1301 int (*fn)(struct task_struct *, void *), void *arg) 1302 { 1303 return 0; 1304 } 1305 1306 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 1307 { 1308 return 0; 1309 } 1310 1311 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) 1312 { 1313 WARN_ON_ONCE(id); 1314 /* XXX: This should always return root_mem_cgroup */ 1315 return NULL; 1316 } 1317 1318 static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m) 1319 { 1320 return NULL; 1321 } 1322 1323 static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) 1324 { 1325 return NULL; 1326 } 1327 1328 static inline bool mem_cgroup_online(struct mem_cgroup *memcg) 1329 { 1330 return true; 1331 } 1332 1333 static inline 1334 unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, 1335 enum lru_list lru, int zone_idx) 1336 { 1337 return 0; 1338 } 1339 1340 static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg) 1341 { 1342 return 0; 1343 } 1344 1345 static inline unsigned long mem_cgroup_size(struct mem_cgroup *memcg) 1346 { 1347 return 0; 1348 } 1349 1350 static inline void 1351 mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p) 1352 { 1353 } 1354 1355 static inline void 1356 mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) 1357 { 1358 } 1359 1360 static inline void lock_page_memcg(struct page *page) 1361 { 1362 } 1363 1364 static inline void unlock_page_memcg(struct page *page) 1365 { 1366 } 1367 1368 static inline void mem_cgroup_handle_over_high(void) 1369 { 1370 } 1371 1372 static inline void mem_cgroup_enter_user_fault(void) 1373 { 1374 } 1375 1376 static inline void mem_cgroup_exit_user_fault(void) 1377 { 1378 } 1379 1380 static inline bool task_in_memcg_oom(struct task_struct *p) 1381 { 1382 return false; 1383 } 1384 1385 static inline bool mem_cgroup_oom_synchronize(bool wait) 1386 { 1387 return false; 1388 } 1389 1390 static inline struct mem_cgroup *mem_cgroup_get_oom_group( 1391 struct task_struct *victim, struct mem_cgroup *oom_domain) 1392 { 1393 return NULL; 1394 } 1395 1396 static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) 1397 { 1398 } 1399 1400 static inline void __mod_memcg_state(struct mem_cgroup *memcg, 1401 int idx, 1402 int nr) 1403 { 1404 } 1405 1406 static inline void mod_memcg_state(struct mem_cgroup *memcg, 1407 int idx, 1408 int nr) 1409 { 1410 } 1411 1412 static inline unsigned long lruvec_page_state(struct lruvec *lruvec, 1413 enum node_stat_item idx) 1414 { 1415 return node_page_state(lruvec_pgdat(lruvec), idx); 1416 } 1417 1418 static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, 1419 enum node_stat_item idx) 1420 { 1421 return node_page_state(lruvec_pgdat(lruvec), idx); 1422 } 1423 1424 static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, 1425 enum node_stat_item idx, int val) 1426 { 1427 } 1428 1429 static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, 1430 int val) 1431 { 1432 struct page *page = virt_to_head_page(p); 1433 1434 __mod_node_page_state(page_pgdat(page), idx, val); 1435 } 1436 1437 static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, 1438 int val) 1439 { 1440 struct page *page = virt_to_head_page(p); 1441 1442 mod_node_page_state(page_pgdat(page), idx, val); 1443 } 1444 1445 static inline void count_memcg_events(struct mem_cgroup *memcg, 1446 enum vm_event_item idx, 1447 unsigned long count) 1448 { 1449 } 1450 1451 static inline void __count_memcg_events(struct mem_cgroup *memcg, 1452 enum vm_event_item idx, 1453 unsigned long count) 1454 { 1455 } 1456 1457 static inline void count_memcg_page_event(struct page *page, 1458 int idx) 1459 { 1460 } 1461 1462 static inline 1463 void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) 1464 { 1465 } 1466 1467 static inline void split_page_memcg(struct page *head, unsigned int nr) 1468 { 1469 } 1470 1471 static inline 1472 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 1473 gfp_t gfp_mask, 1474 unsigned long *total_scanned) 1475 { 1476 return 0; 1477 } 1478 #endif /* CONFIG_MEMCG */ 1479 1480 static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) 1481 { 1482 __mod_lruvec_kmem_state(p, idx, 1); 1483 } 1484 1485 static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx) 1486 { 1487 __mod_lruvec_kmem_state(p, idx, -1); 1488 } 1489 1490 static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) 1491 { 1492 struct mem_cgroup *memcg; 1493 1494 memcg = lruvec_memcg(lruvec); 1495 if (!memcg) 1496 return NULL; 1497 memcg = parent_mem_cgroup(memcg); 1498 if (!memcg) 1499 return NULL; 1500 return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec)); 1501 } 1502 1503 static inline void unlock_page_lruvec(struct lruvec *lruvec) 1504 { 1505 spin_unlock(&lruvec->lru_lock); 1506 } 1507 1508 static inline void unlock_page_lruvec_irq(struct lruvec *lruvec) 1509 { 1510 spin_unlock_irq(&lruvec->lru_lock); 1511 } 1512 1513 static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, 1514 unsigned long flags) 1515 { 1516 spin_unlock_irqrestore(&lruvec->lru_lock, flags); 1517 } 1518 1519 /* Don't lock again iff page's lruvec locked */ 1520 static inline struct lruvec *relock_page_lruvec_irq(struct page *page, 1521 struct lruvec *locked_lruvec) 1522 { 1523 if (locked_lruvec) { 1524 if (lruvec_holds_page_lru_lock(page, locked_lruvec)) 1525 return locked_lruvec; 1526 1527 unlock_page_lruvec_irq(locked_lruvec); 1528 } 1529 1530 return lock_page_lruvec_irq(page); 1531 } 1532 1533 /* Don't lock again iff page's lruvec locked */ 1534 static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, 1535 struct lruvec *locked_lruvec, unsigned long *flags) 1536 { 1537 if (locked_lruvec) { 1538 if (lruvec_holds_page_lru_lock(page, locked_lruvec)) 1539 return locked_lruvec; 1540 1541 unlock_page_lruvec_irqrestore(locked_lruvec, *flags); 1542 } 1543 1544 return lock_page_lruvec_irqsave(page, flags); 1545 } 1546 1547 #ifdef CONFIG_CGROUP_WRITEBACK 1548 1549 struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); 1550 void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, 1551 unsigned long *pheadroom, unsigned long *pdirty, 1552 unsigned long *pwriteback); 1553 1554 void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, 1555 struct bdi_writeback *wb); 1556 1557 static inline void mem_cgroup_track_foreign_dirty(struct page *page, 1558 struct bdi_writeback *wb) 1559 { 1560 if (mem_cgroup_disabled()) 1561 return; 1562 1563 if (unlikely(&page_memcg(page)->css != wb->memcg_css)) 1564 mem_cgroup_track_foreign_dirty_slowpath(page, wb); 1565 } 1566 1567 void mem_cgroup_flush_foreign(struct bdi_writeback *wb); 1568 1569 #else /* CONFIG_CGROUP_WRITEBACK */ 1570 1571 static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) 1572 { 1573 return NULL; 1574 } 1575 1576 static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, 1577 unsigned long *pfilepages, 1578 unsigned long *pheadroom, 1579 unsigned long *pdirty, 1580 unsigned long *pwriteback) 1581 { 1582 } 1583 1584 static inline void mem_cgroup_track_foreign_dirty(struct page *page, 1585 struct bdi_writeback *wb) 1586 { 1587 } 1588 1589 static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb) 1590 { 1591 } 1592 1593 #endif /* CONFIG_CGROUP_WRITEBACK */ 1594 1595 struct sock; 1596 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); 1597 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); 1598 #ifdef CONFIG_MEMCG 1599 extern struct static_key_false memcg_sockets_enabled_key; 1600 #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) 1601 void mem_cgroup_sk_alloc(struct sock *sk); 1602 void mem_cgroup_sk_free(struct sock *sk); 1603 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) 1604 { 1605 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) 1606 return true; 1607 do { 1608 if (time_before(jiffies, memcg->socket_pressure)) 1609 return true; 1610 } while ((memcg = parent_mem_cgroup(memcg))); 1611 return false; 1612 } 1613 1614 int alloc_shrinker_info(struct mem_cgroup *memcg); 1615 void free_shrinker_info(struct mem_cgroup *memcg); 1616 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); 1617 void reparent_shrinker_deferred(struct mem_cgroup *memcg); 1618 #else 1619 #define mem_cgroup_sockets_enabled 0 1620 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; 1621 static inline void mem_cgroup_sk_free(struct sock *sk) { }; 1622 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) 1623 { 1624 return false; 1625 } 1626 1627 static inline void set_shrinker_bit(struct mem_cgroup *memcg, 1628 int nid, int shrinker_id) 1629 { 1630 } 1631 #endif 1632 1633 #ifdef CONFIG_MEMCG_KMEM 1634 int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); 1635 void __memcg_kmem_uncharge_page(struct page *page, int order); 1636 1637 struct obj_cgroup *get_obj_cgroup_from_current(void); 1638 1639 int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); 1640 void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); 1641 1642 extern struct static_key_false memcg_kmem_enabled_key; 1643 1644 extern int memcg_nr_cache_ids; 1645 void memcg_get_cache_ids(void); 1646 void memcg_put_cache_ids(void); 1647 1648 /* 1649 * Helper macro to loop through all memcg-specific caches. Callers must still 1650 * check if the cache is valid (it is either valid or NULL). 1651 * the slab_mutex must be held when looping through those caches 1652 */ 1653 #define for_each_memcg_cache_index(_idx) \ 1654 for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++) 1655 1656 static inline bool memcg_kmem_enabled(void) 1657 { 1658 return static_branch_likely(&memcg_kmem_enabled_key); 1659 } 1660 1661 static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, 1662 int order) 1663 { 1664 if (memcg_kmem_enabled()) 1665 return __memcg_kmem_charge_page(page, gfp, order); 1666 return 0; 1667 } 1668 1669 static inline void memcg_kmem_uncharge_page(struct page *page, int order) 1670 { 1671 if (memcg_kmem_enabled()) 1672 __memcg_kmem_uncharge_page(page, order); 1673 } 1674 1675 /* 1676 * A helper for accessing memcg's kmem_id, used for getting 1677 * corresponding LRU lists. 1678 */ 1679 static inline int memcg_cache_id(struct mem_cgroup *memcg) 1680 { 1681 return memcg ? memcg->kmemcg_id : -1; 1682 } 1683 1684 struct mem_cgroup *mem_cgroup_from_obj(void *p); 1685 1686 #else 1687 1688 static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, 1689 int order) 1690 { 1691 return 0; 1692 } 1693 1694 static inline void memcg_kmem_uncharge_page(struct page *page, int order) 1695 { 1696 } 1697 1698 static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, 1699 int order) 1700 { 1701 return 0; 1702 } 1703 1704 static inline void __memcg_kmem_uncharge_page(struct page *page, int order) 1705 { 1706 } 1707 1708 #define for_each_memcg_cache_index(_idx) \ 1709 for (; NULL; ) 1710 1711 static inline bool memcg_kmem_enabled(void) 1712 { 1713 return false; 1714 } 1715 1716 static inline int memcg_cache_id(struct mem_cgroup *memcg) 1717 { 1718 return -1; 1719 } 1720 1721 static inline void memcg_get_cache_ids(void) 1722 { 1723 } 1724 1725 static inline void memcg_put_cache_ids(void) 1726 { 1727 } 1728 1729 static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) 1730 { 1731 return NULL; 1732 } 1733 1734 #endif /* CONFIG_MEMCG_KMEM */ 1735 1736 #endif /* _LINUX_MEMCONTROL_H */ 1737