1 #ifndef _LINUX_MM_H 2 #define _LINUX_MM_H 3 4 #include <linux/sched.h> 5 #include <linux/errno.h> 6 7 #ifdef __KERNEL__ 8 9 #include <linux/config.h> 10 #include <linux/gfp.h> 11 #include <linux/list.h> 12 #include <linux/mmzone.h> 13 #include <linux/rbtree.h> 14 #include <linux/prio_tree.h> 15 #include <linux/fs.h> 16 #include <linux/mutex.h> 17 18 struct mempolicy; 19 struct anon_vma; 20 21 #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ 22 extern unsigned long max_mapnr; 23 #endif 24 25 extern unsigned long num_physpages; 26 extern void * high_memory; 27 extern unsigned long vmalloc_earlyreserve; 28 extern int page_cluster; 29 30 #ifdef CONFIG_SYSCTL 31 extern int sysctl_legacy_va_layout; 32 #else 33 #define sysctl_legacy_va_layout 0 34 #endif 35 36 #include <asm/page.h> 37 #include <asm/pgtable.h> 38 #include <asm/processor.h> 39 #include <asm/atomic.h> 40 41 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) 42 43 /* 44 * Linux kernel virtual memory manager primitives. 45 * The idea being to have a "virtual" mm in the same way 46 * we have a virtual fs - giving a cleaner interface to the 47 * mm details, and allowing different kinds of memory mappings 48 * (from shared memory to executable loading to arbitrary 49 * mmap() functions). 50 */ 51 52 /* 53 * This struct defines a memory VMM memory area. There is one of these 54 * per VM-area/task. A VM area is any part of the process virtual memory 55 * space that has a special rule for the page-fault handlers (ie a shared 56 * library, the executable area etc). 57 */ 58 struct vm_area_struct { 59 struct mm_struct * vm_mm; /* The address space we belong to. */ 60 unsigned long vm_start; /* Our start address within vm_mm. */ 61 unsigned long vm_end; /* The first byte after our end address 62 within vm_mm. */ 63 64 /* linked list of VM areas per task, sorted by address */ 65 struct vm_area_struct *vm_next; 66 67 pgprot_t vm_page_prot; /* Access permissions of this VMA. */ 68 unsigned long vm_flags; /* Flags, listed below. */ 69 70 struct rb_node vm_rb; 71 72 /* 73 * For areas with an address space and backing store, 74 * linkage into the address_space->i_mmap prio tree, or 75 * linkage to the list of like vmas hanging off its node, or 76 * linkage of vma in the address_space->i_mmap_nonlinear list. 77 */ 78 union { 79 struct { 80 struct list_head list; 81 void *parent; /* aligns with prio_tree_node parent */ 82 struct vm_area_struct *head; 83 } vm_set; 84 85 struct raw_prio_tree_node prio_tree_node; 86 } shared; 87 88 /* 89 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma 90 * list, after a COW of one of the file pages. A MAP_SHARED vma 91 * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack 92 * or brk vma (with NULL file) can only be in an anon_vma list. 93 */ 94 struct list_head anon_vma_node; /* Serialized by anon_vma->lock */ 95 struct anon_vma *anon_vma; /* Serialized by page_table_lock */ 96 97 /* Function pointers to deal with this struct. */ 98 struct vm_operations_struct * vm_ops; 99 100 /* Information about our backing store: */ 101 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE 102 units, *not* PAGE_CACHE_SIZE */ 103 struct file * vm_file; /* File we map to (can be NULL). */ 104 void * vm_private_data; /* was vm_pte (shared mem) */ 105 unsigned long vm_truncate_count;/* truncate_count or restart_addr */ 106 107 #ifndef CONFIG_MMU 108 atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ 109 #endif 110 #ifdef CONFIG_NUMA 111 struct mempolicy *vm_policy; /* NUMA policy for the VMA */ 112 #endif 113 }; 114 115 /* 116 * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is 117 * disabled, then there's a single shared list of VMAs maintained by the 118 * system, and mm's subscribe to these individually 119 */ 120 struct vm_list_struct { 121 struct vm_list_struct *next; 122 struct vm_area_struct *vma; 123 }; 124 125 #ifndef CONFIG_MMU 126 extern struct rb_root nommu_vma_tree; 127 extern struct rw_semaphore nommu_vma_sem; 128 129 extern unsigned int kobjsize(const void *objp); 130 #endif 131 132 /* 133 * vm_flags.. 134 */ 135 #define VM_READ 0x00000001 /* currently active flags */ 136 #define VM_WRITE 0x00000002 137 #define VM_EXEC 0x00000004 138 #define VM_SHARED 0x00000008 139 140 /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ 141 #define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ 142 #define VM_MAYWRITE 0x00000020 143 #define VM_MAYEXEC 0x00000040 144 #define VM_MAYSHARE 0x00000080 145 146 #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ 147 #define VM_GROWSUP 0x00000200 148 #define VM_SHM 0x00000000 /* Means nothing: delete it later */ 149 #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ 150 #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ 151 152 #define VM_EXECUTABLE 0x00001000 153 #define VM_LOCKED 0x00002000 154 #define VM_IO 0x00004000 /* Memory mapped I/O or similar */ 155 156 /* Used by sys_madvise() */ 157 #define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ 158 #define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ 159 160 #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ 161 #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ 162 #define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */ 163 #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 164 #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 165 #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 166 #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ 167 #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ 168 169 #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ 170 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS 171 #endif 172 173 #ifdef CONFIG_STACK_GROWSUP 174 #define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) 175 #else 176 #define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) 177 #endif 178 179 #define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) 180 #define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK 181 #define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK)) 182 #define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) 183 #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) 184 185 /* 186 * mapping from the currently active vm_flags protection bits (the 187 * low four bits) to a page protection mask.. 188 */ 189 extern pgprot_t protection_map[16]; 190 191 192 /* 193 * These are the virtual MM functions - opening of an area, closing and 194 * unmapping it (needed to keep files on disk up-to-date etc), pointer 195 * to the functions called when a no-page or a wp-page exception occurs. 196 */ 197 struct vm_operations_struct { 198 void (*open)(struct vm_area_struct * area); 199 void (*close)(struct vm_area_struct * area); 200 struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); 201 int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); 202 #ifdef CONFIG_NUMA 203 int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); 204 struct mempolicy *(*get_policy)(struct vm_area_struct *vma, 205 unsigned long addr); 206 #endif 207 }; 208 209 struct mmu_gather; 210 struct inode; 211 212 /* 213 * Each physical page in the system has a struct page associated with 214 * it to keep track of whatever it is we are using the page for at the 215 * moment. Note that we have no way to track which tasks are using 216 * a page. 217 */ 218 struct page { 219 unsigned long flags; /* Atomic flags, some possibly 220 * updated asynchronously */ 221 atomic_t _count; /* Usage count, see below. */ 222 atomic_t _mapcount; /* Count of ptes mapped in mms, 223 * to show when page is mapped 224 * & limit reverse map searches. 225 */ 226 union { 227 struct { 228 unsigned long private; /* Mapping-private opaque data: 229 * usually used for buffer_heads 230 * if PagePrivate set; used for 231 * swp_entry_t if PageSwapCache. 232 * When page is free, this 233 * indicates order in the buddy 234 * system. 235 */ 236 struct address_space *mapping; /* If low bit clear, points to 237 * inode address_space, or NULL. 238 * If page mapped as anonymous 239 * memory, low bit is set, and 240 * it points to anon_vma object: 241 * see PAGE_MAPPING_ANON below. 242 */ 243 }; 244 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 245 spinlock_t ptl; 246 #endif 247 }; 248 pgoff_t index; /* Our offset within mapping. */ 249 struct list_head lru; /* Pageout list, eg. active_list 250 * protected by zone->lru_lock ! 251 */ 252 /* 253 * On machines where all RAM is mapped into kernel address space, 254 * we can simply calculate the virtual address. On machines with 255 * highmem some memory is mapped into kernel virtual memory 256 * dynamically, so we need a place to store that address. 257 * Note that this field could be 16 bits on x86 ... ;) 258 * 259 * Architectures with slow multiplication can define 260 * WANT_PAGE_VIRTUAL in asm/page.h 261 */ 262 #if defined(WANT_PAGE_VIRTUAL) 263 void *virtual; /* Kernel virtual address (NULL if 264 not kmapped, ie. highmem) */ 265 #endif /* WANT_PAGE_VIRTUAL */ 266 }; 267 268 #define page_private(page) ((page)->private) 269 #define set_page_private(page, v) ((page)->private = (v)) 270 271 /* 272 * FIXME: take this include out, include page-flags.h in 273 * files which need it (119 of them) 274 */ 275 #include <linux/page-flags.h> 276 277 /* 278 * Methods to modify the page usage count. 279 * 280 * What counts for a page usage: 281 * - cache mapping (page->mapping) 282 * - private data (page->private) 283 * - page mapped in a task's page tables, each mapping 284 * is counted separately 285 * 286 * Also, many kernel routines increase the page count before a critical 287 * routine so they can be sure the page doesn't go away from under them. 288 * 289 * Since 2.6.6 (approx), a free page has ->_count = -1. This is so that we 290 * can use atomic_add_negative(-1, page->_count) to detect when the page 291 * becomes free and so that we can also use atomic_inc_and_test to atomically 292 * detect when we just tried to grab a ref on a page which some other CPU has 293 * already deemed to be freeable. 294 * 295 * NO code should make assumptions about this internal detail! Use the provided 296 * macros which retain the old rules: page_count(page) == 0 is a free page. 297 */ 298 299 /* 300 * Drop a ref, return true if the logical refcount fell to zero (the page has 301 * no users) 302 */ 303 #define put_page_testzero(p) \ 304 ({ \ 305 BUG_ON(page_count(p) == 0); \ 306 atomic_add_negative(-1, &(p)->_count); \ 307 }) 308 309 /* 310 * Grab a ref, return true if the page previously had a logical refcount of 311 * zero. ie: returns true if we just grabbed an already-deemed-to-be-free page 312 */ 313 #define get_page_testone(p) atomic_inc_and_test(&(p)->_count) 314 315 #define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1) 316 #define __put_page(p) atomic_dec(&(p)->_count) 317 318 extern void FASTCALL(__page_cache_release(struct page *)); 319 320 static inline int page_count(struct page *page) 321 { 322 if (PageCompound(page)) 323 page = (struct page *)page_private(page); 324 return atomic_read(&page->_count) + 1; 325 } 326 327 static inline void get_page(struct page *page) 328 { 329 if (unlikely(PageCompound(page))) 330 page = (struct page *)page_private(page); 331 atomic_inc(&page->_count); 332 } 333 334 void put_page(struct page *page); 335 336 /* 337 * Multiple processes may "see" the same page. E.g. for untouched 338 * mappings of /dev/null, all processes see the same page full of 339 * zeroes, and text pages of executables and shared libraries have 340 * only one copy in memory, at most, normally. 341 * 342 * For the non-reserved pages, page_count(page) denotes a reference count. 343 * page_count() == 0 means the page is free. page->lru is then used for 344 * freelist management in the buddy allocator. 345 * page_count() == 1 means the page is used for exactly one purpose 346 * (e.g. a private data page of one process). 347 * 348 * A page may be used for kmalloc() or anyone else who does a 349 * __get_free_page(). In this case the page_count() is at least 1, and 350 * all other fields are unused but should be 0 or NULL. The 351 * management of this page is the responsibility of the one who uses 352 * it. 353 * 354 * The other pages (we may call them "process pages") are completely 355 * managed by the Linux memory manager: I/O, buffers, swapping etc. 356 * The following discussion applies only to them. 357 * 358 * A page may belong to an inode's memory mapping. In this case, 359 * page->mapping is the pointer to the inode, and page->index is the 360 * file offset of the page, in units of PAGE_CACHE_SIZE. 361 * 362 * A page contains an opaque `private' member, which belongs to the 363 * page's address_space. Usually, this is the address of a circular 364 * list of the page's disk buffers. 365 * 366 * For pages belonging to inodes, the page_count() is the number of 367 * attaches, plus 1 if `private' contains something, plus one for 368 * the page cache itself. 369 * 370 * Instead of keeping dirty/clean pages in per address-space lists, we instead 371 * now tag pages as dirty/under writeback in the radix tree. 372 * 373 * There is also a per-mapping radix tree mapping index to the page 374 * in memory if present. The tree is rooted at mapping->root. 375 * 376 * All process pages can do I/O: 377 * - inode pages may need to be read from disk, 378 * - inode pages which have been modified and are MAP_SHARED may need 379 * to be written to disk, 380 * - private pages which have been modified may need to be swapped out 381 * to swap space and (later) to be read back into memory. 382 */ 383 384 /* 385 * The zone field is never updated after free_area_init_core() 386 * sets it, so none of the operations on it need to be atomic. 387 */ 388 389 390 /* 391 * page->flags layout: 392 * 393 * There are three possibilities for how page->flags get 394 * laid out. The first is for the normal case, without 395 * sparsemem. The second is for sparsemem when there is 396 * plenty of space for node and section. The last is when 397 * we have run out of space and have to fall back to an 398 * alternate (slower) way of determining the node. 399 * 400 * No sparsemem: | NODE | ZONE | ... | FLAGS | 401 * with space for node: | SECTION | NODE | ZONE | ... | FLAGS | 402 * no space for node: | SECTION | ZONE | ... | FLAGS | 403 */ 404 #ifdef CONFIG_SPARSEMEM 405 #define SECTIONS_WIDTH SECTIONS_SHIFT 406 #else 407 #define SECTIONS_WIDTH 0 408 #endif 409 410 #define ZONES_WIDTH ZONES_SHIFT 411 412 #if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= FLAGS_RESERVED 413 #define NODES_WIDTH NODES_SHIFT 414 #else 415 #define NODES_WIDTH 0 416 #endif 417 418 /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ 419 #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) 420 #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) 421 #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) 422 423 /* 424 * We are going to use the flags for the page to node mapping if its in 425 * there. This includes the case where there is no node, so it is implicit. 426 */ 427 #define FLAGS_HAS_NODE (NODES_WIDTH > 0 || NODES_SHIFT == 0) 428 429 #ifndef PFN_SECTION_SHIFT 430 #define PFN_SECTION_SHIFT 0 431 #endif 432 433 /* 434 * Define the bit shifts to access each section. For non-existant 435 * sections we define the shift as 0; that plus a 0 mask ensures 436 * the compiler will optimise away reference to them. 437 */ 438 #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) 439 #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) 440 #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) 441 442 /* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */ 443 #if FLAGS_HAS_NODE 444 #define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT) 445 #else 446 #define ZONETABLE_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) 447 #endif 448 #define ZONETABLE_PGSHIFT ZONES_PGSHIFT 449 450 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED 451 #error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED 452 #endif 453 454 #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) 455 #define NODES_MASK ((1UL << NODES_WIDTH) - 1) 456 #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) 457 #define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) 458 459 static inline unsigned long page_zonenum(struct page *page) 460 { 461 return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; 462 } 463 464 struct zone; 465 extern struct zone *zone_table[]; 466 467 static inline struct zone *page_zone(struct page *page) 468 { 469 return zone_table[(page->flags >> ZONETABLE_PGSHIFT) & 470 ZONETABLE_MASK]; 471 } 472 473 static inline unsigned long page_to_nid(struct page *page) 474 { 475 if (FLAGS_HAS_NODE) 476 return (page->flags >> NODES_PGSHIFT) & NODES_MASK; 477 else 478 return page_zone(page)->zone_pgdat->node_id; 479 } 480 static inline unsigned long page_to_section(struct page *page) 481 { 482 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; 483 } 484 485 static inline void set_page_zone(struct page *page, unsigned long zone) 486 { 487 page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); 488 page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; 489 } 490 static inline void set_page_node(struct page *page, unsigned long node) 491 { 492 page->flags &= ~(NODES_MASK << NODES_PGSHIFT); 493 page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; 494 } 495 static inline void set_page_section(struct page *page, unsigned long section) 496 { 497 page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); 498 page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; 499 } 500 501 static inline void set_page_links(struct page *page, unsigned long zone, 502 unsigned long node, unsigned long pfn) 503 { 504 set_page_zone(page, zone); 505 set_page_node(page, node); 506 set_page_section(page, pfn_to_section_nr(pfn)); 507 } 508 509 #ifndef CONFIG_DISCONTIGMEM 510 /* The array of struct pages - for discontigmem use pgdat->lmem_map */ 511 extern struct page *mem_map; 512 #endif 513 514 static inline void *lowmem_page_address(struct page *page) 515 { 516 return __va(page_to_pfn(page) << PAGE_SHIFT); 517 } 518 519 #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) 520 #define HASHED_PAGE_VIRTUAL 521 #endif 522 523 #if defined(WANT_PAGE_VIRTUAL) 524 #define page_address(page) ((page)->virtual) 525 #define set_page_address(page, address) \ 526 do { \ 527 (page)->virtual = (address); \ 528 } while(0) 529 #define page_address_init() do { } while(0) 530 #endif 531 532 #if defined(HASHED_PAGE_VIRTUAL) 533 void *page_address(struct page *page); 534 void set_page_address(struct page *page, void *virtual); 535 void page_address_init(void); 536 #endif 537 538 #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) 539 #define page_address(page) lowmem_page_address(page) 540 #define set_page_address(page, address) do { } while(0) 541 #define page_address_init() do { } while(0) 542 #endif 543 544 /* 545 * On an anonymous page mapped into a user virtual memory area, 546 * page->mapping points to its anon_vma, not to a struct address_space; 547 * with the PAGE_MAPPING_ANON bit set to distinguish it. 548 * 549 * Please note that, confusingly, "page_mapping" refers to the inode 550 * address_space which maps the page from disk; whereas "page_mapped" 551 * refers to user virtual address space into which the page is mapped. 552 */ 553 #define PAGE_MAPPING_ANON 1 554 555 extern struct address_space swapper_space; 556 static inline struct address_space *page_mapping(struct page *page) 557 { 558 struct address_space *mapping = page->mapping; 559 560 if (unlikely(PageSwapCache(page))) 561 mapping = &swapper_space; 562 else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) 563 mapping = NULL; 564 return mapping; 565 } 566 567 static inline int PageAnon(struct page *page) 568 { 569 return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; 570 } 571 572 /* 573 * Return the pagecache index of the passed page. Regular pagecache pages 574 * use ->index whereas swapcache pages use ->private 575 */ 576 static inline pgoff_t page_index(struct page *page) 577 { 578 if (unlikely(PageSwapCache(page))) 579 return page_private(page); 580 return page->index; 581 } 582 583 /* 584 * The atomic page->_mapcount, like _count, starts from -1: 585 * so that transitions both from it and to it can be tracked, 586 * using atomic_inc_and_test and atomic_add_negative(-1). 587 */ 588 static inline void reset_page_mapcount(struct page *page) 589 { 590 atomic_set(&(page)->_mapcount, -1); 591 } 592 593 static inline int page_mapcount(struct page *page) 594 { 595 return atomic_read(&(page)->_mapcount) + 1; 596 } 597 598 /* 599 * Return true if this page is mapped into pagetables. 600 */ 601 static inline int page_mapped(struct page *page) 602 { 603 return atomic_read(&(page)->_mapcount) >= 0; 604 } 605 606 /* 607 * Error return values for the *_nopage functions 608 */ 609 #define NOPAGE_SIGBUS (NULL) 610 #define NOPAGE_OOM ((struct page *) (-1)) 611 612 /* 613 * Different kinds of faults, as returned by handle_mm_fault(). 614 * Used to decide whether a process gets delivered SIGBUS or 615 * just gets major/minor fault counters bumped up. 616 */ 617 #define VM_FAULT_OOM 0x00 618 #define VM_FAULT_SIGBUS 0x01 619 #define VM_FAULT_MINOR 0x02 620 #define VM_FAULT_MAJOR 0x03 621 622 /* 623 * Special case for get_user_pages. 624 * Must be in a distinct bit from the above VM_FAULT_ flags. 625 */ 626 #define VM_FAULT_WRITE 0x10 627 628 #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 629 630 extern void show_free_areas(void); 631 632 #ifdef CONFIG_SHMEM 633 struct page *shmem_nopage(struct vm_area_struct *vma, 634 unsigned long address, int *type); 635 int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new); 636 struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 637 unsigned long addr); 638 int shmem_lock(struct file *file, int lock, struct user_struct *user); 639 #else 640 #define shmem_nopage filemap_nopage 641 642 static inline int shmem_lock(struct file *file, int lock, 643 struct user_struct *user) 644 { 645 return 0; 646 } 647 648 static inline int shmem_set_policy(struct vm_area_struct *vma, 649 struct mempolicy *new) 650 { 651 return 0; 652 } 653 654 static inline struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 655 unsigned long addr) 656 { 657 return NULL; 658 } 659 #endif 660 struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags); 661 extern int shmem_mmap(struct file *file, struct vm_area_struct *vma); 662 663 int shmem_zero_setup(struct vm_area_struct *); 664 665 #ifndef CONFIG_MMU 666 extern unsigned long shmem_get_unmapped_area(struct file *file, 667 unsigned long addr, 668 unsigned long len, 669 unsigned long pgoff, 670 unsigned long flags); 671 #endif 672 673 static inline int can_do_mlock(void) 674 { 675 if (capable(CAP_IPC_LOCK)) 676 return 1; 677 if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0) 678 return 1; 679 return 0; 680 } 681 extern int user_shm_lock(size_t, struct user_struct *); 682 extern void user_shm_unlock(size_t, struct user_struct *); 683 684 /* 685 * Parameter block passed down to zap_pte_range in exceptional cases. 686 */ 687 struct zap_details { 688 struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ 689 struct address_space *check_mapping; /* Check page->mapping if set */ 690 pgoff_t first_index; /* Lowest page->index to unmap */ 691 pgoff_t last_index; /* Highest page->index to unmap */ 692 spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ 693 unsigned long truncate_count; /* Compare vm_truncate_count */ 694 }; 695 696 struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t); 697 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, 698 unsigned long size, struct zap_details *); 699 unsigned long unmap_vmas(struct mmu_gather **tlb, 700 struct vm_area_struct *start_vma, unsigned long start_addr, 701 unsigned long end_addr, unsigned long *nr_accounted, 702 struct zap_details *); 703 void free_pgd_range(struct mmu_gather **tlb, unsigned long addr, 704 unsigned long end, unsigned long floor, unsigned long ceiling); 705 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma, 706 unsigned long floor, unsigned long ceiling); 707 int copy_page_range(struct mm_struct *dst, struct mm_struct *src, 708 struct vm_area_struct *vma); 709 int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, 710 unsigned long size, pgprot_t prot); 711 void unmap_mapping_range(struct address_space *mapping, 712 loff_t const holebegin, loff_t const holelen, int even_cows); 713 714 static inline void unmap_shared_mapping_range(struct address_space *mapping, 715 loff_t const holebegin, loff_t const holelen) 716 { 717 unmap_mapping_range(mapping, holebegin, holelen, 0); 718 } 719 720 extern int vmtruncate(struct inode * inode, loff_t offset); 721 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); 722 extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); 723 extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); 724 725 #ifdef CONFIG_MMU 726 extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, 727 unsigned long address, int write_access); 728 729 static inline int handle_mm_fault(struct mm_struct *mm, 730 struct vm_area_struct *vma, unsigned long address, 731 int write_access) 732 { 733 return __handle_mm_fault(mm, vma, address, write_access) & 734 (~VM_FAULT_WRITE); 735 } 736 #else 737 static inline int handle_mm_fault(struct mm_struct *mm, 738 struct vm_area_struct *vma, unsigned long address, 739 int write_access) 740 { 741 /* should never happen if there's no MMU */ 742 BUG(); 743 return VM_FAULT_SIGBUS; 744 } 745 #endif 746 747 extern int make_pages_present(unsigned long addr, unsigned long end); 748 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); 749 void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); 750 751 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, 752 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 753 void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long); 754 755 int __set_page_dirty_buffers(struct page *page); 756 int __set_page_dirty_nobuffers(struct page *page); 757 int redirty_page_for_writepage(struct writeback_control *wbc, 758 struct page *page); 759 int FASTCALL(set_page_dirty(struct page *page)); 760 int set_page_dirty_lock(struct page *page); 761 int clear_page_dirty_for_io(struct page *page); 762 763 extern unsigned long do_mremap(unsigned long addr, 764 unsigned long old_len, unsigned long new_len, 765 unsigned long flags, unsigned long new_addr); 766 767 /* 768 * Prototype to add a shrinker callback for ageable caches. 769 * 770 * These functions are passed a count `nr_to_scan' and a gfpmask. They should 771 * scan `nr_to_scan' objects, attempting to free them. 772 * 773 * The callback must return the number of objects which remain in the cache. 774 * 775 * The callback will be passed nr_to_scan == 0 when the VM is querying the 776 * cache size, so a fastpath for that case is appropriate. 777 */ 778 typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask); 779 780 /* 781 * Add an aging callback. The int is the number of 'seeks' it takes 782 * to recreate one of the objects that these functions age. 783 */ 784 785 #define DEFAULT_SEEKS 2 786 struct shrinker; 787 extern struct shrinker *set_shrinker(int, shrinker_t); 788 extern void remove_shrinker(struct shrinker *shrinker); 789 790 extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); 791 792 int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); 793 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); 794 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); 795 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); 796 797 /* 798 * The following ifdef needed to get the 4level-fixup.h header to work. 799 * Remove it when 4level-fixup.h has been removed. 800 */ 801 #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) 802 static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 803 { 804 return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? 805 NULL: pud_offset(pgd, address); 806 } 807 808 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) 809 { 810 return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? 811 NULL: pmd_offset(pud, address); 812 } 813 #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ 814 815 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 816 /* 817 * We tuck a spinlock to guard each pagetable page into its struct page, 818 * at page->private, with BUILD_BUG_ON to make sure that this will not 819 * overflow into the next struct page (as it might with DEBUG_SPINLOCK). 820 * When freeing, reset page->mapping so free_pages_check won't complain. 821 */ 822 #define __pte_lockptr(page) &((page)->ptl) 823 #define pte_lock_init(_page) do { \ 824 spin_lock_init(__pte_lockptr(_page)); \ 825 } while (0) 826 #define pte_lock_deinit(page) ((page)->mapping = NULL) 827 #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) 828 #else 829 /* 830 * We use mm->page_table_lock to guard all pagetable pages of the mm. 831 */ 832 #define pte_lock_init(page) do {} while (0) 833 #define pte_lock_deinit(page) do {} while (0) 834 #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) 835 #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ 836 837 #define pte_offset_map_lock(mm, pmd, address, ptlp) \ 838 ({ \ 839 spinlock_t *__ptl = pte_lockptr(mm, pmd); \ 840 pte_t *__pte = pte_offset_map(pmd, address); \ 841 *(ptlp) = __ptl; \ 842 spin_lock(__ptl); \ 843 __pte; \ 844 }) 845 846 #define pte_unmap_unlock(pte, ptl) do { \ 847 spin_unlock(ptl); \ 848 pte_unmap(pte); \ 849 } while (0) 850 851 #define pte_alloc_map(mm, pmd, address) \ 852 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ 853 NULL: pte_offset_map(pmd, address)) 854 855 #define pte_alloc_map_lock(mm, pmd, address, ptlp) \ 856 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ 857 NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) 858 859 #define pte_alloc_kernel(pmd, address) \ 860 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ 861 NULL: pte_offset_kernel(pmd, address)) 862 863 extern void free_area_init(unsigned long * zones_size); 864 extern void free_area_init_node(int nid, pg_data_t *pgdat, 865 unsigned long * zones_size, unsigned long zone_start_pfn, 866 unsigned long *zholes_size); 867 extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); 868 extern void setup_per_zone_pages_min(void); 869 extern void mem_init(void); 870 extern void show_mem(void); 871 extern void si_meminfo(struct sysinfo * val); 872 extern void si_meminfo_node(struct sysinfo *val, int nid); 873 874 #ifdef CONFIG_NUMA 875 extern void setup_per_cpu_pageset(void); 876 #else 877 static inline void setup_per_cpu_pageset(void) {} 878 #endif 879 880 /* prio_tree.c */ 881 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); 882 void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); 883 void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *); 884 struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, 885 struct prio_tree_iter *iter); 886 887 #define vma_prio_tree_foreach(vma, iter, root, begin, end) \ 888 for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \ 889 (vma = vma_prio_tree_next(vma, iter)); ) 890 891 static inline void vma_nonlinear_insert(struct vm_area_struct *vma, 892 struct list_head *list) 893 { 894 vma->shared.vm_set.parent = NULL; 895 list_add_tail(&vma->shared.vm_set.list, list); 896 } 897 898 /* mmap.c */ 899 extern int __vm_enough_memory(long pages, int cap_sys_admin); 900 extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, 901 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); 902 extern struct vm_area_struct *vma_merge(struct mm_struct *, 903 struct vm_area_struct *prev, unsigned long addr, unsigned long end, 904 unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, 905 struct mempolicy *); 906 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); 907 extern int split_vma(struct mm_struct *, 908 struct vm_area_struct *, unsigned long addr, int new_below); 909 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); 910 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, 911 struct rb_node **, struct rb_node *); 912 extern void unlink_file_vma(struct vm_area_struct *); 913 extern struct vm_area_struct *copy_vma(struct vm_area_struct **, 914 unsigned long addr, unsigned long len, pgoff_t pgoff); 915 extern void exit_mmap(struct mm_struct *); 916 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); 917 918 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 919 920 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, 921 unsigned long len, unsigned long prot, 922 unsigned long flag, unsigned long pgoff); 923 924 static inline unsigned long do_mmap(struct file *file, unsigned long addr, 925 unsigned long len, unsigned long prot, 926 unsigned long flag, unsigned long offset) 927 { 928 unsigned long ret = -EINVAL; 929 if ((offset + PAGE_ALIGN(len)) < offset) 930 goto out; 931 if (!(offset & ~PAGE_MASK)) 932 ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); 933 out: 934 return ret; 935 } 936 937 extern int do_munmap(struct mm_struct *, unsigned long, size_t); 938 939 extern unsigned long do_brk(unsigned long, unsigned long); 940 941 /* filemap.c */ 942 extern unsigned long page_unuse(struct page *); 943 extern void truncate_inode_pages(struct address_space *, loff_t); 944 extern void truncate_inode_pages_range(struct address_space *, 945 loff_t lstart, loff_t lend); 946 947 /* generic vm_area_ops exported for stackable file systems */ 948 extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); 949 extern int filemap_populate(struct vm_area_struct *, unsigned long, 950 unsigned long, pgprot_t, unsigned long, int); 951 952 /* mm/page-writeback.c */ 953 int write_one_page(struct page *page, int wait); 954 955 /* readahead.c */ 956 #define VM_MAX_READAHEAD 128 /* kbytes */ 957 #define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ 958 #define VM_MAX_CACHE_HIT 256 /* max pages in a row in cache before 959 * turning readahead off */ 960 961 int do_page_cache_readahead(struct address_space *mapping, struct file *filp, 962 pgoff_t offset, unsigned long nr_to_read); 963 int force_page_cache_readahead(struct address_space *mapping, struct file *filp, 964 pgoff_t offset, unsigned long nr_to_read); 965 unsigned long page_cache_readahead(struct address_space *mapping, 966 struct file_ra_state *ra, 967 struct file *filp, 968 pgoff_t offset, 969 unsigned long size); 970 void handle_ra_miss(struct address_space *mapping, 971 struct file_ra_state *ra, pgoff_t offset); 972 unsigned long max_sane_readahead(unsigned long nr); 973 974 /* Do stack extension */ 975 extern int expand_stack(struct vm_area_struct *vma, unsigned long address); 976 #ifdef CONFIG_IA64 977 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); 978 #endif 979 980 /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 981 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); 982 extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, 983 struct vm_area_struct **pprev); 984 985 /* Look up the first VMA which intersects the interval start_addr..end_addr-1, 986 NULL if none. Assume start_addr < end_addr. */ 987 static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) 988 { 989 struct vm_area_struct * vma = find_vma(mm,start_addr); 990 991 if (vma && end_addr <= vma->vm_start) 992 vma = NULL; 993 return vma; 994 } 995 996 static inline unsigned long vma_pages(struct vm_area_struct *vma) 997 { 998 return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 999 } 1000 1001 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); 1002 struct page *vmalloc_to_page(void *addr); 1003 unsigned long vmalloc_to_pfn(void *addr); 1004 int remap_pfn_range(struct vm_area_struct *, unsigned long addr, 1005 unsigned long pfn, unsigned long size, pgprot_t); 1006 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); 1007 1008 struct page *follow_page(struct vm_area_struct *, unsigned long address, 1009 unsigned int foll_flags); 1010 #define FOLL_WRITE 0x01 /* check pte is writable */ 1011 #define FOLL_TOUCH 0x02 /* mark page accessed */ 1012 #define FOLL_GET 0x04 /* do get_page on page */ 1013 #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ 1014 1015 #ifdef CONFIG_PROC_FS 1016 void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); 1017 #else 1018 static inline void vm_stat_account(struct mm_struct *mm, 1019 unsigned long flags, struct file *file, long pages) 1020 { 1021 } 1022 #endif /* CONFIG_PROC_FS */ 1023 1024 #ifndef CONFIG_DEBUG_PAGEALLOC 1025 static inline void 1026 kernel_map_pages(struct page *page, int numpages, int enable) 1027 { 1028 if (!PageHighMem(page) && !enable) 1029 mutex_debug_check_no_locks_freed(page_address(page), 1030 page_address(page + numpages)); 1031 } 1032 #endif 1033 1034 extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); 1035 #ifdef __HAVE_ARCH_GATE_AREA 1036 int in_gate_area_no_task(unsigned long addr); 1037 int in_gate_area(struct task_struct *task, unsigned long addr); 1038 #else 1039 int in_gate_area_no_task(unsigned long addr); 1040 #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) 1041 #endif /* __HAVE_ARCH_GATE_AREA */ 1042 1043 /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ 1044 #define OOM_DISABLE -17 1045 1046 int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, 1047 void __user *, size_t *, loff_t *); 1048 int shrink_slab(unsigned long scanned, gfp_t gfp_mask, 1049 unsigned long lru_pages); 1050 void drop_pagecache(void); 1051 void drop_slab(void); 1052 1053 #endif /* __KERNEL__ */ 1054 #endif /* _LINUX_MM_H */ 1055