1 /* memcontrol.h - Memory Controller 2 * 3 * Copyright IBM Corporation, 2007 4 * Author Balbir Singh <[email protected]> 5 * 6 * Copyright 2007 OpenVZ SWsoft Inc 7 * Author: Pavel Emelianov <[email protected]> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License as published by 11 * the Free Software Foundation; either version 2 of the License, or 12 * (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 */ 19 20 #ifndef _LINUX_MEMCONTROL_H 21 #define _LINUX_MEMCONTROL_H 22 #include <linux/cgroup.h> 23 #include <linux/vm_event_item.h> 24 #include <linux/hardirq.h> 25 #include <linux/jump_label.h> 26 #include <linux/page_counter.h> 27 #include <linux/vmpressure.h> 28 #include <linux/eventfd.h> 29 #include <linux/mm.h> 30 #include <linux/vmstat.h> 31 #include <linux/writeback.h> 32 #include <linux/page-flags.h> 33 34 struct mem_cgroup; 35 struct page; 36 struct mm_struct; 37 struct kmem_cache; 38 39 /* Cgroup-specific page state, on top of universal node page state */ 40 enum memcg_stat_item { 41 MEMCG_CACHE = NR_VM_NODE_STAT_ITEMS, 42 MEMCG_RSS, 43 MEMCG_RSS_HUGE, 44 MEMCG_SWAP, 45 MEMCG_SOCK, 46 /* XXX: why are these zone and not node counters? */ 47 MEMCG_KERNEL_STACK_KB, 48 MEMCG_NR_STAT, 49 }; 50 51 enum memcg_memory_event { 52 MEMCG_LOW, 53 MEMCG_HIGH, 54 MEMCG_MAX, 55 MEMCG_OOM, 56 MEMCG_SWAP_MAX, 57 MEMCG_SWAP_FAIL, 58 MEMCG_NR_MEMORY_EVENTS, 59 }; 60 61 enum mem_cgroup_protection { 62 MEMCG_PROT_NONE, 63 MEMCG_PROT_LOW, 64 MEMCG_PROT_MIN, 65 }; 66 67 struct mem_cgroup_reclaim_cookie { 68 pg_data_t *pgdat; 69 int priority; 70 unsigned int generation; 71 }; 72 73 #ifdef CONFIG_MEMCG 74 75 #define MEM_CGROUP_ID_SHIFT 16 76 #define MEM_CGROUP_ID_MAX USHRT_MAX 77 78 struct mem_cgroup_id { 79 int id; 80 atomic_t ref; 81 }; 82 83 /* 84 * Per memcg event counter is incremented at every pagein/pageout. With THP, 85 * it will be incremated by the number of pages. This counter is used for 86 * for trigger some periodic events. This is straightforward and better 87 * than using jiffies etc. to handle periodic memcg event. 88 */ 89 enum mem_cgroup_events_target { 90 MEM_CGROUP_TARGET_THRESH, 91 MEM_CGROUP_TARGET_SOFTLIMIT, 92 MEM_CGROUP_TARGET_NUMAINFO, 93 MEM_CGROUP_NTARGETS, 94 }; 95 96 struct mem_cgroup_stat_cpu { 97 long count[MEMCG_NR_STAT]; 98 unsigned long events[NR_VM_EVENT_ITEMS]; 99 unsigned long nr_page_events; 100 unsigned long targets[MEM_CGROUP_NTARGETS]; 101 }; 102 103 struct mem_cgroup_reclaim_iter { 104 struct mem_cgroup *position; 105 /* scan generation, increased every round-trip */ 106 unsigned int generation; 107 }; 108 109 struct lruvec_stat { 110 long count[NR_VM_NODE_STAT_ITEMS]; 111 }; 112 113 /* 114 * per-zone information in memory controller. 115 */ 116 struct mem_cgroup_per_node { 117 struct lruvec lruvec; 118 119 struct lruvec_stat __percpu *lruvec_stat_cpu; 120 atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; 121 122 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; 123 124 struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; 125 126 struct rb_node tree_node; /* RB tree node */ 127 unsigned long usage_in_excess;/* Set to the value by which */ 128 /* the soft limit is exceeded*/ 129 bool on_tree; 130 bool congested; /* memcg has many dirty pages */ 131 /* backed by a congested BDI */ 132 133 struct mem_cgroup *memcg; /* Back pointer, we cannot */ 134 /* use container_of */ 135 }; 136 137 struct mem_cgroup_threshold { 138 struct eventfd_ctx *eventfd; 139 unsigned long threshold; 140 }; 141 142 /* For threshold */ 143 struct mem_cgroup_threshold_ary { 144 /* An array index points to threshold just below or equal to usage. */ 145 int current_threshold; 146 /* Size of entries[] */ 147 unsigned int size; 148 /* Array of thresholds */ 149 struct mem_cgroup_threshold entries[0]; 150 }; 151 152 struct mem_cgroup_thresholds { 153 /* Primary thresholds array */ 154 struct mem_cgroup_threshold_ary *primary; 155 /* 156 * Spare threshold array. 157 * This is needed to make mem_cgroup_unregister_event() "never fail". 158 * It must be able to store at least primary->size - 1 entries. 159 */ 160 struct mem_cgroup_threshold_ary *spare; 161 }; 162 163 enum memcg_kmem_state { 164 KMEM_NONE, 165 KMEM_ALLOCATED, 166 KMEM_ONLINE, 167 }; 168 169 #if defined(CONFIG_SMP) 170 struct memcg_padding { 171 char x[0]; 172 } ____cacheline_internodealigned_in_smp; 173 #define MEMCG_PADDING(name) struct memcg_padding name; 174 #else 175 #define MEMCG_PADDING(name) 176 #endif 177 178 /* 179 * The memory controller data structure. The memory controller controls both 180 * page cache and RSS per cgroup. We would eventually like to provide 181 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 182 * to help the administrator determine what knobs to tune. 183 */ 184 struct mem_cgroup { 185 struct cgroup_subsys_state css; 186 187 /* Private memcg ID. Used to ID objects that outlive the cgroup */ 188 struct mem_cgroup_id id; 189 190 /* Accounted resources */ 191 struct page_counter memory; 192 struct page_counter swap; 193 194 /* Legacy consumer-oriented counters */ 195 struct page_counter memsw; 196 struct page_counter kmem; 197 struct page_counter tcpmem; 198 199 /* Upper bound of normal memory consumption range */ 200 unsigned long high; 201 202 /* Range enforcement for interrupt charges */ 203 struct work_struct high_work; 204 205 unsigned long soft_limit; 206 207 /* vmpressure notifications */ 208 struct vmpressure vmpressure; 209 210 /* 211 * Should the accounting and control be hierarchical, per subtree? 212 */ 213 bool use_hierarchy; 214 215 /* protected by memcg_oom_lock */ 216 bool oom_lock; 217 int under_oom; 218 219 int swappiness; 220 /* OOM-Killer disable */ 221 int oom_kill_disable; 222 223 /* memory.events */ 224 struct cgroup_file events_file; 225 226 /* handle for "memory.swap.events" */ 227 struct cgroup_file swap_events_file; 228 229 /* protect arrays of thresholds */ 230 struct mutex thresholds_lock; 231 232 /* thresholds for memory usage. RCU-protected */ 233 struct mem_cgroup_thresholds thresholds; 234 235 /* thresholds for mem+swap usage. RCU-protected */ 236 struct mem_cgroup_thresholds memsw_thresholds; 237 238 /* For oom notifier event fd */ 239 struct list_head oom_notify; 240 241 /* 242 * Should we move charges of a task when a task is moved into this 243 * mem_cgroup ? And what type of charges should we move ? 244 */ 245 unsigned long move_charge_at_immigrate; 246 /* taken only while moving_account > 0 */ 247 spinlock_t move_lock; 248 unsigned long move_lock_flags; 249 250 MEMCG_PADDING(_pad1_); 251 252 /* 253 * set > 0 if pages under this cgroup are moving to other cgroup. 254 */ 255 atomic_t moving_account; 256 struct task_struct *move_lock_task; 257 258 /* memory.stat */ 259 struct mem_cgroup_stat_cpu __percpu *stat_cpu; 260 261 MEMCG_PADDING(_pad2_); 262 263 atomic_long_t stat[MEMCG_NR_STAT]; 264 atomic_long_t events[NR_VM_EVENT_ITEMS]; 265 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; 266 267 unsigned long socket_pressure; 268 269 /* Legacy tcp memory accounting */ 270 bool tcpmem_active; 271 int tcpmem_pressure; 272 273 #ifndef CONFIG_SLOB 274 /* Index in the kmem_cache->memcg_params.memcg_caches array */ 275 int kmemcg_id; 276 enum memcg_kmem_state kmem_state; 277 struct list_head kmem_caches; 278 #endif 279 280 int last_scanned_node; 281 #if MAX_NUMNODES > 1 282 nodemask_t scan_nodes; 283 atomic_t numainfo_events; 284 atomic_t numainfo_updating; 285 #endif 286 287 #ifdef CONFIG_CGROUP_WRITEBACK 288 struct list_head cgwb_list; 289 struct wb_domain cgwb_domain; 290 #endif 291 292 /* List of events which userspace want to receive */ 293 struct list_head event_list; 294 spinlock_t event_list_lock; 295 296 struct mem_cgroup_per_node *nodeinfo[0]; 297 /* WARNING: nodeinfo must be the last member here */ 298 }; 299 300 /* 301 * size of first charge trial. "32" comes from vmscan.c's magic value. 302 * TODO: maybe necessary to use big numbers in big irons. 303 */ 304 #define MEMCG_CHARGE_BATCH 32U 305 306 extern struct mem_cgroup *root_mem_cgroup; 307 308 static inline bool mem_cgroup_disabled(void) 309 { 310 return !cgroup_subsys_enabled(memory_cgrp_subsys); 311 } 312 313 enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, 314 struct mem_cgroup *memcg); 315 316 int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, 317 gfp_t gfp_mask, struct mem_cgroup **memcgp, 318 bool compound); 319 void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, 320 bool lrucare, bool compound); 321 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, 322 bool compound); 323 void mem_cgroup_uncharge(struct page *page); 324 void mem_cgroup_uncharge_list(struct list_head *page_list); 325 326 void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); 327 328 static struct mem_cgroup_per_node * 329 mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) 330 { 331 return memcg->nodeinfo[nid]; 332 } 333 334 /** 335 * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone 336 * @node: node of the wanted lruvec 337 * @memcg: memcg of the wanted lruvec 338 * 339 * Returns the lru list vector holding pages for a given @node or a given 340 * @memcg and @zone. This can be the node lruvec, if the memory controller 341 * is disabled. 342 */ 343 static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, 344 struct mem_cgroup *memcg) 345 { 346 struct mem_cgroup_per_node *mz; 347 struct lruvec *lruvec; 348 349 if (mem_cgroup_disabled()) { 350 lruvec = node_lruvec(pgdat); 351 goto out; 352 } 353 354 mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); 355 lruvec = &mz->lruvec; 356 out: 357 /* 358 * Since a node can be onlined after the mem_cgroup was created, 359 * we have to be prepared to initialize lruvec->pgdat here; 360 * and if offlined then reonlined, we need to reinitialize it. 361 */ 362 if (unlikely(lruvec->pgdat != pgdat)) 363 lruvec->pgdat = pgdat; 364 return lruvec; 365 } 366 367 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); 368 369 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); 370 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); 371 372 static inline 373 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ 374 return css ? container_of(css, struct mem_cgroup, css) : NULL; 375 } 376 377 #define mem_cgroup_from_counter(counter, member) \ 378 container_of(counter, struct mem_cgroup, member) 379 380 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, 381 struct mem_cgroup *, 382 struct mem_cgroup_reclaim_cookie *); 383 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); 384 int mem_cgroup_scan_tasks(struct mem_cgroup *, 385 int (*)(struct task_struct *, void *), void *); 386 387 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 388 { 389 if (mem_cgroup_disabled()) 390 return 0; 391 392 return memcg->id.id; 393 } 394 struct mem_cgroup *mem_cgroup_from_id(unsigned short id); 395 396 static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) 397 { 398 struct mem_cgroup_per_node *mz; 399 400 if (mem_cgroup_disabled()) 401 return NULL; 402 403 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 404 return mz->memcg; 405 } 406 407 /** 408 * parent_mem_cgroup - find the accounting parent of a memcg 409 * @memcg: memcg whose parent to find 410 * 411 * Returns the parent memcg, or NULL if this is the root or the memory 412 * controller is in legacy no-hierarchy mode. 413 */ 414 static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) 415 { 416 if (!memcg->memory.parent) 417 return NULL; 418 return mem_cgroup_from_counter(memcg->memory.parent, memory); 419 } 420 421 static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, 422 struct mem_cgroup *root) 423 { 424 if (root == memcg) 425 return true; 426 if (!root->use_hierarchy) 427 return false; 428 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); 429 } 430 431 static inline bool mm_match_cgroup(struct mm_struct *mm, 432 struct mem_cgroup *memcg) 433 { 434 struct mem_cgroup *task_memcg; 435 bool match = false; 436 437 rcu_read_lock(); 438 task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 439 if (task_memcg) 440 match = mem_cgroup_is_descendant(task_memcg, memcg); 441 rcu_read_unlock(); 442 return match; 443 } 444 445 struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); 446 ino_t page_cgroup_ino(struct page *page); 447 448 static inline bool mem_cgroup_online(struct mem_cgroup *memcg) 449 { 450 if (mem_cgroup_disabled()) 451 return true; 452 return !!(memcg->css.flags & CSS_ONLINE); 453 } 454 455 /* 456 * For memory reclaim. 457 */ 458 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); 459 460 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 461 int zid, int nr_pages); 462 463 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, 464 int nid, unsigned int lru_mask); 465 466 static inline 467 unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) 468 { 469 struct mem_cgroup_per_node *mz; 470 unsigned long nr_pages = 0; 471 int zid; 472 473 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 474 for (zid = 0; zid < MAX_NR_ZONES; zid++) 475 nr_pages += mz->lru_zone_size[zid][lru]; 476 return nr_pages; 477 } 478 479 static inline 480 unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, 481 enum lru_list lru, int zone_idx) 482 { 483 struct mem_cgroup_per_node *mz; 484 485 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 486 return mz->lru_zone_size[zone_idx][lru]; 487 } 488 489 void mem_cgroup_handle_over_high(void); 490 491 unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); 492 493 void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 494 struct task_struct *p); 495 496 static inline void mem_cgroup_oom_enable(void) 497 { 498 WARN_ON(current->memcg_may_oom); 499 current->memcg_may_oom = 1; 500 } 501 502 static inline void mem_cgroup_oom_disable(void) 503 { 504 WARN_ON(!current->memcg_may_oom); 505 current->memcg_may_oom = 0; 506 } 507 508 static inline bool task_in_memcg_oom(struct task_struct *p) 509 { 510 return p->memcg_in_oom; 511 } 512 513 bool mem_cgroup_oom_synchronize(bool wait); 514 515 #ifdef CONFIG_MEMCG_SWAP 516 extern int do_swap_account; 517 #endif 518 519 struct mem_cgroup *lock_page_memcg(struct page *page); 520 void __unlock_page_memcg(struct mem_cgroup *memcg); 521 void unlock_page_memcg(struct page *page); 522 523 /* idx can be of type enum memcg_stat_item or node_stat_item */ 524 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, 525 int idx) 526 { 527 long x = atomic_long_read(&memcg->stat[idx]); 528 #ifdef CONFIG_SMP 529 if (x < 0) 530 x = 0; 531 #endif 532 return x; 533 } 534 535 /* idx can be of type enum memcg_stat_item or node_stat_item */ 536 static inline void __mod_memcg_state(struct mem_cgroup *memcg, 537 int idx, int val) 538 { 539 long x; 540 541 if (mem_cgroup_disabled()) 542 return; 543 544 x = val + __this_cpu_read(memcg->stat_cpu->count[idx]); 545 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { 546 atomic_long_add(x, &memcg->stat[idx]); 547 x = 0; 548 } 549 __this_cpu_write(memcg->stat_cpu->count[idx], x); 550 } 551 552 /* idx can be of type enum memcg_stat_item or node_stat_item */ 553 static inline void mod_memcg_state(struct mem_cgroup *memcg, 554 int idx, int val) 555 { 556 unsigned long flags; 557 558 local_irq_save(flags); 559 __mod_memcg_state(memcg, idx, val); 560 local_irq_restore(flags); 561 } 562 563 /** 564 * mod_memcg_page_state - update page state statistics 565 * @page: the page 566 * @idx: page state item to account 567 * @val: number of pages (positive or negative) 568 * 569 * The @page must be locked or the caller must use lock_page_memcg() 570 * to prevent double accounting when the page is concurrently being 571 * moved to another memcg: 572 * 573 * lock_page(page) or lock_page_memcg(page) 574 * if (TestClearPageState(page)) 575 * mod_memcg_page_state(page, state, -1); 576 * unlock_page(page) or unlock_page_memcg(page) 577 * 578 * Kernel pages are an exception to this, since they'll never move. 579 */ 580 static inline void __mod_memcg_page_state(struct page *page, 581 int idx, int val) 582 { 583 if (page->mem_cgroup) 584 __mod_memcg_state(page->mem_cgroup, idx, val); 585 } 586 587 static inline void mod_memcg_page_state(struct page *page, 588 int idx, int val) 589 { 590 if (page->mem_cgroup) 591 mod_memcg_state(page->mem_cgroup, idx, val); 592 } 593 594 static inline unsigned long lruvec_page_state(struct lruvec *lruvec, 595 enum node_stat_item idx) 596 { 597 struct mem_cgroup_per_node *pn; 598 long x; 599 600 if (mem_cgroup_disabled()) 601 return node_page_state(lruvec_pgdat(lruvec), idx); 602 603 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 604 x = atomic_long_read(&pn->lruvec_stat[idx]); 605 #ifdef CONFIG_SMP 606 if (x < 0) 607 x = 0; 608 #endif 609 return x; 610 } 611 612 static inline void __mod_lruvec_state(struct lruvec *lruvec, 613 enum node_stat_item idx, int val) 614 { 615 struct mem_cgroup_per_node *pn; 616 long x; 617 618 /* Update node */ 619 __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); 620 621 if (mem_cgroup_disabled()) 622 return; 623 624 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 625 626 /* Update memcg */ 627 __mod_memcg_state(pn->memcg, idx, val); 628 629 /* Update lruvec */ 630 x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); 631 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { 632 atomic_long_add(x, &pn->lruvec_stat[idx]); 633 x = 0; 634 } 635 __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); 636 } 637 638 static inline void mod_lruvec_state(struct lruvec *lruvec, 639 enum node_stat_item idx, int val) 640 { 641 unsigned long flags; 642 643 local_irq_save(flags); 644 __mod_lruvec_state(lruvec, idx, val); 645 local_irq_restore(flags); 646 } 647 648 static inline void __mod_lruvec_page_state(struct page *page, 649 enum node_stat_item idx, int val) 650 { 651 pg_data_t *pgdat = page_pgdat(page); 652 struct lruvec *lruvec; 653 654 /* Untracked pages have no memcg, no lruvec. Update only the node */ 655 if (!page->mem_cgroup) { 656 __mod_node_page_state(pgdat, idx, val); 657 return; 658 } 659 660 lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup); 661 __mod_lruvec_state(lruvec, idx, val); 662 } 663 664 static inline void mod_lruvec_page_state(struct page *page, 665 enum node_stat_item idx, int val) 666 { 667 unsigned long flags; 668 669 local_irq_save(flags); 670 __mod_lruvec_page_state(page, idx, val); 671 local_irq_restore(flags); 672 } 673 674 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 675 gfp_t gfp_mask, 676 unsigned long *total_scanned); 677 678 static inline void __count_memcg_events(struct mem_cgroup *memcg, 679 enum vm_event_item idx, 680 unsigned long count) 681 { 682 unsigned long x; 683 684 if (mem_cgroup_disabled()) 685 return; 686 687 x = count + __this_cpu_read(memcg->stat_cpu->events[idx]); 688 if (unlikely(x > MEMCG_CHARGE_BATCH)) { 689 atomic_long_add(x, &memcg->events[idx]); 690 x = 0; 691 } 692 __this_cpu_write(memcg->stat_cpu->events[idx], x); 693 } 694 695 static inline void count_memcg_events(struct mem_cgroup *memcg, 696 enum vm_event_item idx, 697 unsigned long count) 698 { 699 unsigned long flags; 700 701 local_irq_save(flags); 702 __count_memcg_events(memcg, idx, count); 703 local_irq_restore(flags); 704 } 705 706 static inline void count_memcg_page_event(struct page *page, 707 enum vm_event_item idx) 708 { 709 if (page->mem_cgroup) 710 count_memcg_events(page->mem_cgroup, idx, 1); 711 } 712 713 static inline void count_memcg_event_mm(struct mm_struct *mm, 714 enum vm_event_item idx) 715 { 716 struct mem_cgroup *memcg; 717 718 if (mem_cgroup_disabled()) 719 return; 720 721 rcu_read_lock(); 722 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 723 if (likely(memcg)) { 724 count_memcg_events(memcg, idx, 1); 725 if (idx == OOM_KILL) 726 cgroup_file_notify(&memcg->events_file); 727 } 728 rcu_read_unlock(); 729 } 730 731 static inline void memcg_memory_event(struct mem_cgroup *memcg, 732 enum memcg_memory_event event) 733 { 734 atomic_long_inc(&memcg->memory_events[event]); 735 cgroup_file_notify(&memcg->events_file); 736 } 737 738 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 739 void mem_cgroup_split_huge_fixup(struct page *head); 740 #endif 741 742 #else /* CONFIG_MEMCG */ 743 744 #define MEM_CGROUP_ID_SHIFT 0 745 #define MEM_CGROUP_ID_MAX 0 746 747 struct mem_cgroup; 748 749 static inline bool mem_cgroup_disabled(void) 750 { 751 return true; 752 } 753 754 static inline void memcg_memory_event(struct mem_cgroup *memcg, 755 enum memcg_memory_event event) 756 { 757 } 758 759 static inline enum mem_cgroup_protection mem_cgroup_protected( 760 struct mem_cgroup *root, struct mem_cgroup *memcg) 761 { 762 return MEMCG_PROT_NONE; 763 } 764 765 static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, 766 gfp_t gfp_mask, 767 struct mem_cgroup **memcgp, 768 bool compound) 769 { 770 *memcgp = NULL; 771 return 0; 772 } 773 774 static inline void mem_cgroup_commit_charge(struct page *page, 775 struct mem_cgroup *memcg, 776 bool lrucare, bool compound) 777 { 778 } 779 780 static inline void mem_cgroup_cancel_charge(struct page *page, 781 struct mem_cgroup *memcg, 782 bool compound) 783 { 784 } 785 786 static inline void mem_cgroup_uncharge(struct page *page) 787 { 788 } 789 790 static inline void mem_cgroup_uncharge_list(struct list_head *page_list) 791 { 792 } 793 794 static inline void mem_cgroup_migrate(struct page *old, struct page *new) 795 { 796 } 797 798 static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, 799 struct mem_cgroup *memcg) 800 { 801 return node_lruvec(pgdat); 802 } 803 804 static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, 805 struct pglist_data *pgdat) 806 { 807 return &pgdat->lruvec; 808 } 809 810 static inline bool mm_match_cgroup(struct mm_struct *mm, 811 struct mem_cgroup *memcg) 812 { 813 return true; 814 } 815 816 static inline bool task_in_mem_cgroup(struct task_struct *task, 817 const struct mem_cgroup *memcg) 818 { 819 return true; 820 } 821 822 static inline struct mem_cgroup * 823 mem_cgroup_iter(struct mem_cgroup *root, 824 struct mem_cgroup *prev, 825 struct mem_cgroup_reclaim_cookie *reclaim) 826 { 827 return NULL; 828 } 829 830 static inline void mem_cgroup_iter_break(struct mem_cgroup *root, 831 struct mem_cgroup *prev) 832 { 833 } 834 835 static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, 836 int (*fn)(struct task_struct *, void *), void *arg) 837 { 838 return 0; 839 } 840 841 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 842 { 843 return 0; 844 } 845 846 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) 847 { 848 WARN_ON_ONCE(id); 849 /* XXX: This should always return root_mem_cgroup */ 850 return NULL; 851 } 852 853 static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) 854 { 855 return NULL; 856 } 857 858 static inline bool mem_cgroup_online(struct mem_cgroup *memcg) 859 { 860 return true; 861 } 862 863 static inline unsigned long 864 mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) 865 { 866 return 0; 867 } 868 static inline 869 unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, 870 enum lru_list lru, int zone_idx) 871 { 872 return 0; 873 } 874 875 static inline unsigned long 876 mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, 877 int nid, unsigned int lru_mask) 878 { 879 return 0; 880 } 881 882 static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg) 883 { 884 return 0; 885 } 886 887 static inline void 888 mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) 889 { 890 } 891 892 static inline struct mem_cgroup *lock_page_memcg(struct page *page) 893 { 894 return NULL; 895 } 896 897 static inline void __unlock_page_memcg(struct mem_cgroup *memcg) 898 { 899 } 900 901 static inline void unlock_page_memcg(struct page *page) 902 { 903 } 904 905 static inline void mem_cgroup_handle_over_high(void) 906 { 907 } 908 909 static inline void mem_cgroup_oom_enable(void) 910 { 911 } 912 913 static inline void mem_cgroup_oom_disable(void) 914 { 915 } 916 917 static inline bool task_in_memcg_oom(struct task_struct *p) 918 { 919 return false; 920 } 921 922 static inline bool mem_cgroup_oom_synchronize(bool wait) 923 { 924 return false; 925 } 926 927 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, 928 int idx) 929 { 930 return 0; 931 } 932 933 static inline void __mod_memcg_state(struct mem_cgroup *memcg, 934 int idx, 935 int nr) 936 { 937 } 938 939 static inline void mod_memcg_state(struct mem_cgroup *memcg, 940 int idx, 941 int nr) 942 { 943 } 944 945 static inline void __mod_memcg_page_state(struct page *page, 946 int idx, 947 int nr) 948 { 949 } 950 951 static inline void mod_memcg_page_state(struct page *page, 952 int idx, 953 int nr) 954 { 955 } 956 957 static inline unsigned long lruvec_page_state(struct lruvec *lruvec, 958 enum node_stat_item idx) 959 { 960 return node_page_state(lruvec_pgdat(lruvec), idx); 961 } 962 963 static inline void __mod_lruvec_state(struct lruvec *lruvec, 964 enum node_stat_item idx, int val) 965 { 966 __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); 967 } 968 969 static inline void mod_lruvec_state(struct lruvec *lruvec, 970 enum node_stat_item idx, int val) 971 { 972 mod_node_page_state(lruvec_pgdat(lruvec), idx, val); 973 } 974 975 static inline void __mod_lruvec_page_state(struct page *page, 976 enum node_stat_item idx, int val) 977 { 978 __mod_node_page_state(page_pgdat(page), idx, val); 979 } 980 981 static inline void mod_lruvec_page_state(struct page *page, 982 enum node_stat_item idx, int val) 983 { 984 mod_node_page_state(page_pgdat(page), idx, val); 985 } 986 987 static inline 988 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 989 gfp_t gfp_mask, 990 unsigned long *total_scanned) 991 { 992 return 0; 993 } 994 995 static inline void mem_cgroup_split_huge_fixup(struct page *head) 996 { 997 } 998 999 static inline void count_memcg_events(struct mem_cgroup *memcg, 1000 enum vm_event_item idx, 1001 unsigned long count) 1002 { 1003 } 1004 1005 static inline void count_memcg_page_event(struct page *page, 1006 int idx) 1007 { 1008 } 1009 1010 static inline 1011 void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) 1012 { 1013 } 1014 #endif /* CONFIG_MEMCG */ 1015 1016 /* idx can be of type enum memcg_stat_item or node_stat_item */ 1017 static inline void __inc_memcg_state(struct mem_cgroup *memcg, 1018 int idx) 1019 { 1020 __mod_memcg_state(memcg, idx, 1); 1021 } 1022 1023 /* idx can be of type enum memcg_stat_item or node_stat_item */ 1024 static inline void __dec_memcg_state(struct mem_cgroup *memcg, 1025 int idx) 1026 { 1027 __mod_memcg_state(memcg, idx, -1); 1028 } 1029 1030 /* idx can be of type enum memcg_stat_item or node_stat_item */ 1031 static inline void __inc_memcg_page_state(struct page *page, 1032 int idx) 1033 { 1034 __mod_memcg_page_state(page, idx, 1); 1035 } 1036 1037 /* idx can be of type enum memcg_stat_item or node_stat_item */ 1038 static inline void __dec_memcg_page_state(struct page *page, 1039 int idx) 1040 { 1041 __mod_memcg_page_state(page, idx, -1); 1042 } 1043 1044 static inline void __inc_lruvec_state(struct lruvec *lruvec, 1045 enum node_stat_item idx) 1046 { 1047 __mod_lruvec_state(lruvec, idx, 1); 1048 } 1049 1050 static inline void __dec_lruvec_state(struct lruvec *lruvec, 1051 enum node_stat_item idx) 1052 { 1053 __mod_lruvec_state(lruvec, idx, -1); 1054 } 1055 1056 static inline void __inc_lruvec_page_state(struct page *page, 1057 enum node_stat_item idx) 1058 { 1059 __mod_lruvec_page_state(page, idx, 1); 1060 } 1061 1062 static inline void __dec_lruvec_page_state(struct page *page, 1063 enum node_stat_item idx) 1064 { 1065 __mod_lruvec_page_state(page, idx, -1); 1066 } 1067 1068 /* idx can be of type enum memcg_stat_item or node_stat_item */ 1069 static inline void inc_memcg_state(struct mem_cgroup *memcg, 1070 int idx) 1071 { 1072 mod_memcg_state(memcg, idx, 1); 1073 } 1074 1075 /* idx can be of type enum memcg_stat_item or node_stat_item */ 1076 static inline void dec_memcg_state(struct mem_cgroup *memcg, 1077 int idx) 1078 { 1079 mod_memcg_state(memcg, idx, -1); 1080 } 1081 1082 /* idx can be of type enum memcg_stat_item or node_stat_item */ 1083 static inline void inc_memcg_page_state(struct page *page, 1084 int idx) 1085 { 1086 mod_memcg_page_state(page, idx, 1); 1087 } 1088 1089 /* idx can be of type enum memcg_stat_item or node_stat_item */ 1090 static inline void dec_memcg_page_state(struct page *page, 1091 int idx) 1092 { 1093 mod_memcg_page_state(page, idx, -1); 1094 } 1095 1096 static inline void inc_lruvec_state(struct lruvec *lruvec, 1097 enum node_stat_item idx) 1098 { 1099 mod_lruvec_state(lruvec, idx, 1); 1100 } 1101 1102 static inline void dec_lruvec_state(struct lruvec *lruvec, 1103 enum node_stat_item idx) 1104 { 1105 mod_lruvec_state(lruvec, idx, -1); 1106 } 1107 1108 static inline void inc_lruvec_page_state(struct page *page, 1109 enum node_stat_item idx) 1110 { 1111 mod_lruvec_page_state(page, idx, 1); 1112 } 1113 1114 static inline void dec_lruvec_page_state(struct page *page, 1115 enum node_stat_item idx) 1116 { 1117 mod_lruvec_page_state(page, idx, -1); 1118 } 1119 1120 #ifdef CONFIG_CGROUP_WRITEBACK 1121 1122 struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); 1123 void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, 1124 unsigned long *pheadroom, unsigned long *pdirty, 1125 unsigned long *pwriteback); 1126 1127 #else /* CONFIG_CGROUP_WRITEBACK */ 1128 1129 static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) 1130 { 1131 return NULL; 1132 } 1133 1134 static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, 1135 unsigned long *pfilepages, 1136 unsigned long *pheadroom, 1137 unsigned long *pdirty, 1138 unsigned long *pwriteback) 1139 { 1140 } 1141 1142 #endif /* CONFIG_CGROUP_WRITEBACK */ 1143 1144 struct sock; 1145 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); 1146 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); 1147 #ifdef CONFIG_MEMCG 1148 extern struct static_key_false memcg_sockets_enabled_key; 1149 #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) 1150 void mem_cgroup_sk_alloc(struct sock *sk); 1151 void mem_cgroup_sk_free(struct sock *sk); 1152 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) 1153 { 1154 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) 1155 return true; 1156 do { 1157 if (time_before(jiffies, memcg->socket_pressure)) 1158 return true; 1159 } while ((memcg = parent_mem_cgroup(memcg))); 1160 return false; 1161 } 1162 #else 1163 #define mem_cgroup_sockets_enabled 0 1164 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; 1165 static inline void mem_cgroup_sk_free(struct sock *sk) { }; 1166 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) 1167 { 1168 return false; 1169 } 1170 #endif 1171 1172 struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep); 1173 void memcg_kmem_put_cache(struct kmem_cache *cachep); 1174 int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, 1175 struct mem_cgroup *memcg); 1176 int memcg_kmem_charge(struct page *page, gfp_t gfp, int order); 1177 void memcg_kmem_uncharge(struct page *page, int order); 1178 1179 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) 1180 extern struct static_key_false memcg_kmem_enabled_key; 1181 extern struct workqueue_struct *memcg_kmem_cache_wq; 1182 1183 extern int memcg_nr_cache_ids; 1184 void memcg_get_cache_ids(void); 1185 void memcg_put_cache_ids(void); 1186 1187 /* 1188 * Helper macro to loop through all memcg-specific caches. Callers must still 1189 * check if the cache is valid (it is either valid or NULL). 1190 * the slab_mutex must be held when looping through those caches 1191 */ 1192 #define for_each_memcg_cache_index(_idx) \ 1193 for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++) 1194 1195 static inline bool memcg_kmem_enabled(void) 1196 { 1197 return static_branch_unlikely(&memcg_kmem_enabled_key); 1198 } 1199 1200 /* 1201 * helper for accessing a memcg's index. It will be used as an index in the 1202 * child cache array in kmem_cache, and also to derive its name. This function 1203 * will return -1 when this is not a kmem-limited memcg. 1204 */ 1205 static inline int memcg_cache_id(struct mem_cgroup *memcg) 1206 { 1207 return memcg ? memcg->kmemcg_id : -1; 1208 } 1209 1210 #else 1211 #define for_each_memcg_cache_index(_idx) \ 1212 for (; NULL; ) 1213 1214 static inline bool memcg_kmem_enabled(void) 1215 { 1216 return false; 1217 } 1218 1219 static inline int memcg_cache_id(struct mem_cgroup *memcg) 1220 { 1221 return -1; 1222 } 1223 1224 static inline void memcg_get_cache_ids(void) 1225 { 1226 } 1227 1228 static inline void memcg_put_cache_ids(void) 1229 { 1230 } 1231 1232 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ 1233 1234 #endif /* _LINUX_MEMCONTROL_H */ 1235