1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_HUGETLB_H 3 #define _LINUX_HUGETLB_H 4 5 #include <linux/mm.h> 6 #include <linux/mm_types.h> 7 #include <linux/mmdebug.h> 8 #include <linux/fs.h> 9 #include <linux/hugetlb_inline.h> 10 #include <linux/cgroup.h> 11 #include <linux/page_ref.h> 12 #include <linux/list.h> 13 #include <linux/kref.h> 14 #include <linux/pgtable.h> 15 #include <linux/gfp.h> 16 #include <linux/userfaultfd_k.h> 17 18 struct ctl_table; 19 struct user_struct; 20 struct mmu_gather; 21 struct node; 22 23 void free_huge_folio(struct folio *folio); 24 25 #ifdef CONFIG_HUGETLB_PAGE 26 27 #include <linux/pagemap.h> 28 #include <linux/shm.h> 29 #include <asm/tlbflush.h> 30 31 /* 32 * For HugeTLB page, there are more metadata to save in the struct page. But 33 * the head struct page cannot meet our needs, so we have to abuse other tail 34 * struct page to store the metadata. 35 */ 36 #define __NR_USED_SUBPAGE 3 37 38 struct hugepage_subpool { 39 spinlock_t lock; 40 long count; 41 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 42 long used_hpages; /* Used count against maximum, includes */ 43 /* both allocated and reserved pages. */ 44 struct hstate *hstate; 45 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 46 long rsv_hpages; /* Pages reserved against global pool to */ 47 /* satisfy minimum size. */ 48 }; 49 50 struct resv_map { 51 struct kref refs; 52 spinlock_t lock; 53 struct list_head regions; 54 long adds_in_progress; 55 struct list_head region_cache; 56 long region_cache_count; 57 struct rw_semaphore rw_sema; 58 #ifdef CONFIG_CGROUP_HUGETLB 59 /* 60 * On private mappings, the counter to uncharge reservations is stored 61 * here. If these fields are 0, then either the mapping is shared, or 62 * cgroup accounting is disabled for this resv_map. 63 */ 64 struct page_counter *reservation_counter; 65 unsigned long pages_per_hpage; 66 struct cgroup_subsys_state *css; 67 #endif 68 }; 69 70 /* 71 * Region tracking -- allows tracking of reservations and instantiated pages 72 * across the pages in a mapping. 73 * 74 * The region data structures are embedded into a resv_map and protected 75 * by a resv_map's lock. The set of regions within the resv_map represent 76 * reservations for huge pages, or huge pages that have already been 77 * instantiated within the map. The from and to elements are huge page 78 * indices into the associated mapping. from indicates the starting index 79 * of the region. to represents the first index past the end of the region. 80 * 81 * For example, a file region structure with from == 0 and to == 4 represents 82 * four huge pages in a mapping. It is important to note that the to element 83 * represents the first element past the end of the region. This is used in 84 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 85 * 86 * Interval notation of the form [from, to) will be used to indicate that 87 * the endpoint from is inclusive and to is exclusive. 88 */ 89 struct file_region { 90 struct list_head link; 91 long from; 92 long to; 93 #ifdef CONFIG_CGROUP_HUGETLB 94 /* 95 * On shared mappings, each reserved region appears as a struct 96 * file_region in resv_map. These fields hold the info needed to 97 * uncharge each reservation. 98 */ 99 struct page_counter *reservation_counter; 100 struct cgroup_subsys_state *css; 101 #endif 102 }; 103 104 struct hugetlb_vma_lock { 105 struct kref refs; 106 struct rw_semaphore rw_sema; 107 struct vm_area_struct *vma; 108 }; 109 110 extern struct resv_map *resv_map_alloc(void); 111 void resv_map_release(struct kref *ref); 112 113 extern spinlock_t hugetlb_lock; 114 extern int hugetlb_max_hstate __read_mostly; 115 #define for_each_hstate(h) \ 116 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 117 118 struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 119 long min_hpages); 120 void hugepage_put_subpool(struct hugepage_subpool *spool); 121 122 void hugetlb_dup_vma_private(struct vm_area_struct *vma); 123 void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 124 int move_hugetlb_page_tables(struct vm_area_struct *vma, 125 struct vm_area_struct *new_vma, 126 unsigned long old_addr, unsigned long new_addr, 127 unsigned long len); 128 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 129 struct vm_area_struct *, struct vm_area_struct *); 130 void unmap_hugepage_range(struct vm_area_struct *, 131 unsigned long, unsigned long, struct page *, 132 zap_flags_t); 133 void __unmap_hugepage_range(struct mmu_gather *tlb, 134 struct vm_area_struct *vma, 135 unsigned long start, unsigned long end, 136 struct page *ref_page, zap_flags_t zap_flags); 137 void hugetlb_report_meminfo(struct seq_file *); 138 int hugetlb_report_node_meminfo(char *buf, int len, int nid); 139 void hugetlb_show_meminfo_node(int nid); 140 unsigned long hugetlb_total_pages(void); 141 vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 142 unsigned long address, unsigned int flags); 143 #ifdef CONFIG_USERFAULTFD 144 int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 145 struct vm_area_struct *dst_vma, 146 unsigned long dst_addr, 147 unsigned long src_addr, 148 uffd_flags_t flags, 149 struct folio **foliop); 150 #endif /* CONFIG_USERFAULTFD */ 151 bool hugetlb_reserve_pages(struct inode *inode, long from, long to, 152 struct vm_area_struct *vma, 153 vm_flags_t vm_flags); 154 long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 155 long freed); 156 bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); 157 int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 158 int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 159 bool *migratable_cleared); 160 void folio_putback_hugetlb(struct folio *folio); 161 void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 162 void hugetlb_fix_reserve_counts(struct inode *inode); 163 extern struct mutex *hugetlb_fault_mutex_table; 164 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 165 166 pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 167 unsigned long addr, pud_t *pud); 168 bool hugetlbfs_pagecache_present(struct hstate *h, 169 struct vm_area_struct *vma, 170 unsigned long address); 171 172 struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); 173 174 extern int sysctl_hugetlb_shm_group; 175 extern struct list_head huge_boot_pages[MAX_NUMNODES]; 176 177 void hugetlb_bootmem_alloc(void); 178 bool hugetlb_bootmem_allocated(void); 179 180 /* arch callbacks */ 181 182 #ifndef CONFIG_HIGHPTE 183 /* 184 * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures 185 * which may go down to the lowest PTE level in their huge_pte_offset() and 186 * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). 187 */ 188 static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) 189 { 190 return pte_offset_kernel(pmd, address); 191 } 192 static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, 193 unsigned long address) 194 { 195 return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); 196 } 197 #endif 198 199 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 200 unsigned long addr, unsigned long sz); 201 /* 202 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 203 * Returns the pte_t* if found, or NULL if the address is not mapped. 204 * 205 * IMPORTANT: we should normally not directly call this function, instead 206 * this is only a common interface to implement arch-specific 207 * walker. Please use hugetlb_walk() instead, because that will attempt to 208 * verify the locking for you. 209 * 210 * Since this function will walk all the pgtable pages (including not only 211 * high-level pgtable page, but also PUD entry that can be unshared 212 * concurrently for VM_SHARED), the caller of this function should be 213 * responsible of its thread safety. One can follow this rule: 214 * 215 * (1) For private mappings: pmd unsharing is not possible, so holding the 216 * mmap_lock for either read or write is sufficient. Most callers 217 * already hold the mmap_lock, so normally, no special action is 218 * required. 219 * 220 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 221 * pgtable page can go away from under us! It can be done by a pmd 222 * unshare with a follow up munmap() on the other process), then we 223 * need either: 224 * 225 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 226 * won't happen upon the range (it also makes sure the pte_t we 227 * read is the right and stable one), or, 228 * 229 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 230 * sure even if unshare happened the racy unmap() will wait until 231 * i_mmap_rwsem is released. 232 * 233 * Option (2.1) is the safest, which guarantees pte stability from pmd 234 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 235 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 236 * access. 237 */ 238 pte_t *huge_pte_offset(struct mm_struct *mm, 239 unsigned long addr, unsigned long sz); 240 unsigned long hugetlb_mask_last_page(struct hstate *h); 241 int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, 242 unsigned long addr, pte_t *ptep); 243 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 244 unsigned long *start, unsigned long *end); 245 246 extern void __hugetlb_zap_begin(struct vm_area_struct *vma, 247 unsigned long *begin, unsigned long *end); 248 extern void __hugetlb_zap_end(struct vm_area_struct *vma, 249 struct zap_details *details); 250 251 static inline void hugetlb_zap_begin(struct vm_area_struct *vma, 252 unsigned long *start, unsigned long *end) 253 { 254 if (is_vm_hugetlb_page(vma)) 255 __hugetlb_zap_begin(vma, start, end); 256 } 257 258 static inline void hugetlb_zap_end(struct vm_area_struct *vma, 259 struct zap_details *details) 260 { 261 if (is_vm_hugetlb_page(vma)) 262 __hugetlb_zap_end(vma, details); 263 } 264 265 void hugetlb_vma_lock_read(struct vm_area_struct *vma); 266 void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 267 void hugetlb_vma_lock_write(struct vm_area_struct *vma); 268 void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 269 int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 270 void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 271 void hugetlb_vma_lock_release(struct kref *kref); 272 long hugetlb_change_protection(struct vm_area_struct *vma, 273 unsigned long address, unsigned long end, pgprot_t newprot, 274 unsigned long cp_flags); 275 bool is_hugetlb_entry_migration(pte_t pte); 276 bool is_hugetlb_entry_hwpoisoned(pte_t pte); 277 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 278 279 #else /* !CONFIG_HUGETLB_PAGE */ 280 281 static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 282 { 283 } 284 285 static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 286 { 287 } 288 289 static inline unsigned long hugetlb_total_pages(void) 290 { 291 return 0; 292 } 293 294 static inline struct address_space *hugetlb_folio_mapping_lock_write( 295 struct folio *folio) 296 { 297 return NULL; 298 } 299 300 static inline int huge_pmd_unshare(struct mm_struct *mm, 301 struct vm_area_struct *vma, 302 unsigned long addr, pte_t *ptep) 303 { 304 return 0; 305 } 306 307 static inline void adjust_range_if_pmd_sharing_possible( 308 struct vm_area_struct *vma, 309 unsigned long *start, unsigned long *end) 310 { 311 } 312 313 static inline void hugetlb_zap_begin( 314 struct vm_area_struct *vma, 315 unsigned long *start, unsigned long *end) 316 { 317 } 318 319 static inline void hugetlb_zap_end( 320 struct vm_area_struct *vma, 321 struct zap_details *details) 322 { 323 } 324 325 static inline int copy_hugetlb_page_range(struct mm_struct *dst, 326 struct mm_struct *src, 327 struct vm_area_struct *dst_vma, 328 struct vm_area_struct *src_vma) 329 { 330 BUG(); 331 return 0; 332 } 333 334 static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 335 struct vm_area_struct *new_vma, 336 unsigned long old_addr, 337 unsigned long new_addr, 338 unsigned long len) 339 { 340 BUG(); 341 return 0; 342 } 343 344 static inline void hugetlb_report_meminfo(struct seq_file *m) 345 { 346 } 347 348 static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 349 { 350 return 0; 351 } 352 353 static inline void hugetlb_show_meminfo_node(int nid) 354 { 355 } 356 357 static inline int prepare_hugepage_range(struct file *file, 358 unsigned long addr, unsigned long len) 359 { 360 return -EINVAL; 361 } 362 363 static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 364 { 365 } 366 367 static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 368 { 369 } 370 371 static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 372 { 373 } 374 375 static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 376 { 377 } 378 379 static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 380 { 381 return 1; 382 } 383 384 static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 385 { 386 } 387 388 static inline int is_hugepage_only_range(struct mm_struct *mm, 389 unsigned long addr, unsigned long len) 390 { 391 return 0; 392 } 393 394 static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, 395 unsigned long addr, unsigned long end, 396 unsigned long floor, unsigned long ceiling) 397 { 398 BUG(); 399 } 400 401 #ifdef CONFIG_USERFAULTFD 402 static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 403 struct vm_area_struct *dst_vma, 404 unsigned long dst_addr, 405 unsigned long src_addr, 406 uffd_flags_t flags, 407 struct folio **foliop) 408 { 409 BUG(); 410 return 0; 411 } 412 #endif /* CONFIG_USERFAULTFD */ 413 414 static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 415 unsigned long sz) 416 { 417 return NULL; 418 } 419 420 static inline bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list) 421 { 422 return false; 423 } 424 425 static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 426 { 427 return 0; 428 } 429 430 static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 431 bool *migratable_cleared) 432 { 433 return 0; 434 } 435 436 static inline void folio_putback_hugetlb(struct folio *folio) 437 { 438 } 439 440 static inline void move_hugetlb_state(struct folio *old_folio, 441 struct folio *new_folio, int reason) 442 { 443 } 444 445 static inline long hugetlb_change_protection( 446 struct vm_area_struct *vma, unsigned long address, 447 unsigned long end, pgprot_t newprot, 448 unsigned long cp_flags) 449 { 450 return 0; 451 } 452 453 static inline void __unmap_hugepage_range(struct mmu_gather *tlb, 454 struct vm_area_struct *vma, unsigned long start, 455 unsigned long end, struct page *ref_page, 456 zap_flags_t zap_flags) 457 { 458 BUG(); 459 } 460 461 static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 462 struct vm_area_struct *vma, unsigned long address, 463 unsigned int flags) 464 { 465 BUG(); 466 return 0; 467 } 468 469 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 470 471 #endif /* !CONFIG_HUGETLB_PAGE */ 472 473 #ifndef pgd_write 474 static inline int pgd_write(pgd_t pgd) 475 { 476 BUG(); 477 return 0; 478 } 479 #endif 480 481 #define HUGETLB_ANON_FILE "anon_hugepage" 482 483 enum { 484 /* 485 * The file will be used as an shm file so shmfs accounting rules 486 * apply 487 */ 488 HUGETLB_SHMFS_INODE = 1, 489 /* 490 * The file is being created on the internal vfs mount and shmfs 491 * accounting rules do not apply 492 */ 493 HUGETLB_ANONHUGE_INODE = 2, 494 }; 495 496 #ifdef CONFIG_HUGETLBFS 497 struct hugetlbfs_sb_info { 498 long max_inodes; /* inodes allowed */ 499 long free_inodes; /* inodes free */ 500 spinlock_t stat_lock; 501 struct hstate *hstate; 502 struct hugepage_subpool *spool; 503 kuid_t uid; 504 kgid_t gid; 505 umode_t mode; 506 }; 507 508 static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 509 { 510 return sb->s_fs_info; 511 } 512 513 struct hugetlbfs_inode_info { 514 struct inode vfs_inode; 515 unsigned int seals; 516 }; 517 518 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 519 { 520 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 521 } 522 523 extern const struct vm_operations_struct hugetlb_vm_ops; 524 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 525 int creat_flags, int page_size_log); 526 527 static inline bool is_file_hugepages(const struct file *file) 528 { 529 return file->f_op->fop_flags & FOP_HUGE_PAGES; 530 } 531 532 static inline struct hstate *hstate_inode(struct inode *i) 533 { 534 return HUGETLBFS_SB(i->i_sb)->hstate; 535 } 536 #else /* !CONFIG_HUGETLBFS */ 537 538 #define is_file_hugepages(file) false 539 static inline struct file * 540 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 541 int creat_flags, int page_size_log) 542 { 543 return ERR_PTR(-ENOSYS); 544 } 545 546 static inline struct hstate *hstate_inode(struct inode *i) 547 { 548 return NULL; 549 } 550 #endif /* !CONFIG_HUGETLBFS */ 551 552 unsigned long 553 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 554 unsigned long len, unsigned long pgoff, 555 unsigned long flags); 556 557 /* 558 * huegtlb page specific state flags. These flags are located in page.private 559 * of the hugetlb head page. Functions created via the below macros should be 560 * used to manipulate these flags. 561 * 562 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 563 * allocation time. Cleared when page is fully instantiated. Free 564 * routine checks flag to restore a reservation on error paths. 565 * Synchronization: Examined or modified by code that knows it has 566 * the only reference to page. i.e. After allocation but before use 567 * or when the page is being freed. 568 * HPG_migratable - Set after a newly allocated page is added to the page 569 * cache and/or page tables. Indicates the page is a candidate for 570 * migration. 571 * Synchronization: Initially set after new page allocation with no 572 * locking. When examined and modified during migration processing 573 * (isolate, migrate, putback) the hugetlb_lock is held. 574 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 575 * allocator. Typically used for migration target pages when no pages 576 * are available in the pool. The hugetlb free page path will 577 * immediately free pages with this flag set to the buddy allocator. 578 * Synchronization: Can be set after huge page allocation from buddy when 579 * code knows it has only reference. All other examinations and 580 * modifications require hugetlb_lock. 581 * HPG_freed - Set when page is on the free lists. 582 * Synchronization: hugetlb_lock held for examination and modification. 583 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 584 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 585 * that is not tracked by raw_hwp_page list. 586 */ 587 enum hugetlb_page_flags { 588 HPG_restore_reserve = 0, 589 HPG_migratable, 590 HPG_temporary, 591 HPG_freed, 592 HPG_vmemmap_optimized, 593 HPG_raw_hwp_unreliable, 594 HPG_cma, 595 __NR_HPAGEFLAGS, 596 }; 597 598 /* 599 * Macros to create test, set and clear function definitions for 600 * hugetlb specific page flags. 601 */ 602 #ifdef CONFIG_HUGETLB_PAGE 603 #define TESTHPAGEFLAG(uname, flname) \ 604 static __always_inline \ 605 bool folio_test_hugetlb_##flname(struct folio *folio) \ 606 { void *private = &folio->private; \ 607 return test_bit(HPG_##flname, private); \ 608 } 609 610 #define SETHPAGEFLAG(uname, flname) \ 611 static __always_inline \ 612 void folio_set_hugetlb_##flname(struct folio *folio) \ 613 { void *private = &folio->private; \ 614 set_bit(HPG_##flname, private); \ 615 } 616 617 #define CLEARHPAGEFLAG(uname, flname) \ 618 static __always_inline \ 619 void folio_clear_hugetlb_##flname(struct folio *folio) \ 620 { void *private = &folio->private; \ 621 clear_bit(HPG_##flname, private); \ 622 } 623 #else 624 #define TESTHPAGEFLAG(uname, flname) \ 625 static inline bool \ 626 folio_test_hugetlb_##flname(struct folio *folio) \ 627 { return 0; } 628 629 #define SETHPAGEFLAG(uname, flname) \ 630 static inline void \ 631 folio_set_hugetlb_##flname(struct folio *folio) \ 632 { } 633 634 #define CLEARHPAGEFLAG(uname, flname) \ 635 static inline void \ 636 folio_clear_hugetlb_##flname(struct folio *folio) \ 637 { } 638 #endif 639 640 #define HPAGEFLAG(uname, flname) \ 641 TESTHPAGEFLAG(uname, flname) \ 642 SETHPAGEFLAG(uname, flname) \ 643 CLEARHPAGEFLAG(uname, flname) \ 644 645 /* 646 * Create functions associated with hugetlb page flags 647 */ 648 HPAGEFLAG(RestoreReserve, restore_reserve) 649 HPAGEFLAG(Migratable, migratable) 650 HPAGEFLAG(Temporary, temporary) 651 HPAGEFLAG(Freed, freed) 652 HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 653 HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 654 HPAGEFLAG(Cma, cma) 655 656 #ifdef CONFIG_HUGETLB_PAGE 657 658 #define HSTATE_NAME_LEN 32 659 /* Defines one hugetlb page size */ 660 struct hstate { 661 struct mutex resize_lock; 662 struct lock_class_key resize_key; 663 int next_nid_to_alloc; 664 int next_nid_to_free; 665 unsigned int order; 666 unsigned int demote_order; 667 unsigned long mask; 668 unsigned long max_huge_pages; 669 unsigned long nr_huge_pages; 670 unsigned long free_huge_pages; 671 unsigned long resv_huge_pages; 672 unsigned long surplus_huge_pages; 673 unsigned long nr_overcommit_huge_pages; 674 struct list_head hugepage_activelist; 675 struct list_head hugepage_freelists[MAX_NUMNODES]; 676 unsigned int max_huge_pages_node[MAX_NUMNODES]; 677 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 678 unsigned int free_huge_pages_node[MAX_NUMNODES]; 679 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 680 char name[HSTATE_NAME_LEN]; 681 }; 682 683 struct cma; 684 685 struct huge_bootmem_page { 686 struct list_head list; 687 struct hstate *hstate; 688 unsigned long flags; 689 struct cma *cma; 690 }; 691 692 #define HUGE_BOOTMEM_HVO 0x0001 693 #define HUGE_BOOTMEM_ZONES_VALID 0x0002 694 #define HUGE_BOOTMEM_CMA 0x0004 695 696 bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); 697 698 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); 699 int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); 700 void wait_for_freed_hugetlb_folios(void); 701 struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 702 unsigned long addr, bool cow_from_owner); 703 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 704 nodemask_t *nmask, gfp_t gfp_mask, 705 bool allow_alloc_fallback); 706 struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, 707 nodemask_t *nmask, gfp_t gfp_mask); 708 709 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 710 pgoff_t idx); 711 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 712 unsigned long address, struct folio *folio); 713 714 /* arch callback */ 715 int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 716 int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 717 bool __init hugetlb_node_alloc_supported(void); 718 719 void __init hugetlb_add_hstate(unsigned order); 720 bool __init arch_hugetlb_valid_size(unsigned long size); 721 struct hstate *size_to_hstate(unsigned long size); 722 723 #ifndef HUGE_MAX_HSTATE 724 #define HUGE_MAX_HSTATE 1 725 #endif 726 727 extern struct hstate hstates[HUGE_MAX_HSTATE]; 728 extern unsigned int default_hstate_idx; 729 730 #define default_hstate (hstates[default_hstate_idx]) 731 732 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 733 { 734 return folio->_hugetlb_subpool; 735 } 736 737 static inline void hugetlb_set_folio_subpool(struct folio *folio, 738 struct hugepage_subpool *subpool) 739 { 740 folio->_hugetlb_subpool = subpool; 741 } 742 743 static inline struct hstate *hstate_file(struct file *f) 744 { 745 return hstate_inode(file_inode(f)); 746 } 747 748 static inline struct hstate *hstate_sizelog(int page_size_log) 749 { 750 if (!page_size_log) 751 return &default_hstate; 752 753 if (page_size_log < BITS_PER_LONG) 754 return size_to_hstate(1UL << page_size_log); 755 756 return NULL; 757 } 758 759 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 760 { 761 return hstate_file(vma->vm_file); 762 } 763 764 static inline unsigned long huge_page_size(const struct hstate *h) 765 { 766 return (unsigned long)PAGE_SIZE << h->order; 767 } 768 769 extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); 770 771 extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); 772 773 static inline unsigned long huge_page_mask(struct hstate *h) 774 { 775 return h->mask; 776 } 777 778 static inline unsigned int huge_page_order(struct hstate *h) 779 { 780 return h->order; 781 } 782 783 static inline unsigned huge_page_shift(struct hstate *h) 784 { 785 return h->order + PAGE_SHIFT; 786 } 787 788 static inline bool hstate_is_gigantic(struct hstate *h) 789 { 790 return huge_page_order(h) > MAX_PAGE_ORDER; 791 } 792 793 static inline unsigned int pages_per_huge_page(const struct hstate *h) 794 { 795 return 1 << h->order; 796 } 797 798 static inline unsigned int blocks_per_huge_page(struct hstate *h) 799 { 800 return huge_page_size(h) / 512; 801 } 802 803 static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, 804 struct address_space *mapping, pgoff_t idx) 805 { 806 return filemap_lock_folio(mapping, idx << huge_page_order(h)); 807 } 808 809 #include <asm/hugetlb.h> 810 811 #ifndef is_hugepage_only_range 812 static inline int is_hugepage_only_range(struct mm_struct *mm, 813 unsigned long addr, unsigned long len) 814 { 815 return 0; 816 } 817 #define is_hugepage_only_range is_hugepage_only_range 818 #endif 819 820 #ifndef arch_clear_hugetlb_flags 821 static inline void arch_clear_hugetlb_flags(struct folio *folio) { } 822 #define arch_clear_hugetlb_flags arch_clear_hugetlb_flags 823 #endif 824 825 #ifndef arch_make_huge_pte 826 static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 827 vm_flags_t flags) 828 { 829 return pte_mkhuge(entry); 830 } 831 #endif 832 833 #ifndef arch_has_huge_bootmem_alloc 834 /* 835 * Some architectures do their own bootmem allocation, so they can't use 836 * early CMA allocation. 837 */ 838 static inline bool arch_has_huge_bootmem_alloc(void) 839 { 840 return false; 841 } 842 #endif 843 844 static inline struct hstate *folio_hstate(struct folio *folio) 845 { 846 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 847 return size_to_hstate(folio_size(folio)); 848 } 849 850 static inline unsigned hstate_index_to_shift(unsigned index) 851 { 852 return hstates[index].order + PAGE_SHIFT; 853 } 854 855 static inline int hstate_index(struct hstate *h) 856 { 857 return h - hstates; 858 } 859 860 int dissolve_free_hugetlb_folio(struct folio *folio); 861 int dissolve_free_hugetlb_folios(unsigned long start_pfn, 862 unsigned long end_pfn); 863 864 #ifdef CONFIG_MEMORY_FAILURE 865 extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 866 #else 867 static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 868 { 869 } 870 #endif 871 872 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 873 #ifndef arch_hugetlb_migration_supported 874 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 875 { 876 if ((huge_page_shift(h) == PMD_SHIFT) || 877 (huge_page_shift(h) == PUD_SHIFT) || 878 (huge_page_shift(h) == PGDIR_SHIFT)) 879 return true; 880 else 881 return false; 882 } 883 #endif 884 #else 885 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 886 { 887 return false; 888 } 889 #endif 890 891 static inline bool hugepage_migration_supported(struct hstate *h) 892 { 893 return arch_hugetlb_migration_supported(h); 894 } 895 896 /* 897 * Movability check is different as compared to migration check. 898 * It determines whether or not a huge page should be placed on 899 * movable zone or not. Movability of any huge page should be 900 * required only if huge page size is supported for migration. 901 * There won't be any reason for the huge page to be movable if 902 * it is not migratable to start with. Also the size of the huge 903 * page should be large enough to be placed under a movable zone 904 * and still feasible enough to be migratable. Just the presence 905 * in movable zone does not make the migration feasible. 906 * 907 * So even though large huge page sizes like the gigantic ones 908 * are migratable they should not be movable because its not 909 * feasible to migrate them from movable zone. 910 */ 911 static inline bool hugepage_movable_supported(struct hstate *h) 912 { 913 if (!hugepage_migration_supported(h)) 914 return false; 915 916 if (hstate_is_gigantic(h)) 917 return false; 918 return true; 919 } 920 921 /* Movability of hugepages depends on migration support. */ 922 static inline gfp_t htlb_alloc_mask(struct hstate *h) 923 { 924 gfp_t gfp = __GFP_COMP | __GFP_NOWARN; 925 926 gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; 927 928 return gfp; 929 } 930 931 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 932 { 933 gfp_t modified_mask = htlb_alloc_mask(h); 934 935 /* Some callers might want to enforce node */ 936 modified_mask |= (gfp_mask & __GFP_THISNODE); 937 938 modified_mask |= (gfp_mask & __GFP_NOWARN); 939 940 return modified_mask; 941 } 942 943 static inline bool htlb_allow_alloc_fallback(int reason) 944 { 945 bool allowed_fallback = false; 946 947 /* 948 * Note: the memory offline, memory failure and migration syscalls will 949 * be allowed to fallback to other nodes due to lack of a better chioce, 950 * that might break the per-node hugetlb pool. While other cases will 951 * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool. 952 */ 953 switch (reason) { 954 case MR_MEMORY_HOTPLUG: 955 case MR_MEMORY_FAILURE: 956 case MR_SYSCALL: 957 case MR_MEMPOLICY_MBIND: 958 allowed_fallback = true; 959 break; 960 default: 961 break; 962 } 963 964 return allowed_fallback; 965 } 966 967 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 968 struct mm_struct *mm, pte_t *pte) 969 { 970 const unsigned long size = huge_page_size(h); 971 972 VM_WARN_ON(size == PAGE_SIZE); 973 974 /* 975 * hugetlb must use the exact same PT locks as core-mm page table 976 * walkers would. When modifying a PTE table, hugetlb must take the 977 * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD 978 * PT lock etc. 979 * 980 * The expectation is that any hugetlb folio smaller than a PMD is 981 * always mapped into a single PTE table and that any hugetlb folio 982 * smaller than a PUD (but at least as big as a PMD) is always mapped 983 * into a single PMD table. 984 * 985 * If that does not hold for an architecture, then that architecture 986 * must disable split PT locks such that all *_lockptr() functions 987 * will give us the same result: the per-MM PT lock. 988 * 989 * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where 990 * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() 991 * and core-mm would use pmd_lockptr(). However, in such configurations 992 * split PMD locks are disabled -- they don't make sense on a single 993 * PGDIR page table -- and the end result is the same. 994 */ 995 if (size >= PUD_SIZE) 996 return pud_lockptr(mm, (pud_t *) pte); 997 else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) 998 return pmd_lockptr(mm, (pmd_t *) pte); 999 /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ 1000 return ptep_lockptr(mm, pte); 1001 } 1002 1003 #ifndef hugepages_supported 1004 /* 1005 * Some platform decide whether they support huge pages at boot 1006 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 1007 * when there is no such support 1008 */ 1009 #define hugepages_supported() (HPAGE_SHIFT != 0) 1010 #endif 1011 1012 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 1013 1014 static inline void hugetlb_count_init(struct mm_struct *mm) 1015 { 1016 atomic_long_set(&mm->hugetlb_usage, 0); 1017 } 1018 1019 static inline void hugetlb_count_add(long l, struct mm_struct *mm) 1020 { 1021 atomic_long_add(l, &mm->hugetlb_usage); 1022 } 1023 1024 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1025 { 1026 atomic_long_sub(l, &mm->hugetlb_usage); 1027 } 1028 1029 #ifndef huge_ptep_modify_prot_start 1030 #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 1031 static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 1032 unsigned long addr, pte_t *ptep) 1033 { 1034 unsigned long psize = huge_page_size(hstate_vma(vma)); 1035 1036 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); 1037 } 1038 #endif 1039 1040 #ifndef huge_ptep_modify_prot_commit 1041 #define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 1042 static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 1043 unsigned long addr, pte_t *ptep, 1044 pte_t old_pte, pte_t pte) 1045 { 1046 unsigned long psize = huge_page_size(hstate_vma(vma)); 1047 1048 set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); 1049 } 1050 #endif 1051 1052 #ifdef CONFIG_NUMA 1053 void hugetlb_register_node(struct node *node); 1054 void hugetlb_unregister_node(struct node *node); 1055 #endif 1056 1057 /* 1058 * Check if a given raw @page in a hugepage is HWPOISON. 1059 */ 1060 bool is_raw_hwpoison_page_in_hugepage(struct page *page); 1061 1062 static inline unsigned long huge_page_mask_align(struct file *file) 1063 { 1064 return PAGE_MASK & ~huge_page_mask(hstate_file(file)); 1065 } 1066 1067 #else /* CONFIG_HUGETLB_PAGE */ 1068 struct hstate {}; 1069 1070 static inline unsigned long huge_page_mask_align(struct file *file) 1071 { 1072 return 0; 1073 } 1074 1075 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1076 { 1077 return NULL; 1078 } 1079 1080 static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, 1081 struct address_space *mapping, pgoff_t idx) 1082 { 1083 return NULL; 1084 } 1085 1086 static inline int isolate_or_dissolve_huge_page(struct page *page, 1087 struct list_head *list) 1088 { 1089 return -ENOMEM; 1090 } 1091 1092 static inline int replace_free_hugepage_folios(unsigned long start_pfn, 1093 unsigned long end_pfn) 1094 { 1095 return 0; 1096 } 1097 1098 static inline void wait_for_freed_hugetlb_folios(void) 1099 { 1100 } 1101 1102 static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1103 unsigned long addr, 1104 bool cow_from_owner) 1105 { 1106 return NULL; 1107 } 1108 1109 static inline struct folio * 1110 alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, 1111 nodemask_t *nmask, gfp_t gfp_mask) 1112 { 1113 return NULL; 1114 } 1115 1116 static inline struct folio * 1117 alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1118 nodemask_t *nmask, gfp_t gfp_mask, 1119 bool allow_alloc_fallback) 1120 { 1121 return NULL; 1122 } 1123 1124 static inline int __alloc_bootmem_huge_page(struct hstate *h) 1125 { 1126 return 0; 1127 } 1128 1129 static inline struct hstate *hstate_file(struct file *f) 1130 { 1131 return NULL; 1132 } 1133 1134 static inline struct hstate *hstate_sizelog(int page_size_log) 1135 { 1136 return NULL; 1137 } 1138 1139 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1140 { 1141 return NULL; 1142 } 1143 1144 static inline struct hstate *folio_hstate(struct folio *folio) 1145 { 1146 return NULL; 1147 } 1148 1149 static inline struct hstate *size_to_hstate(unsigned long size) 1150 { 1151 return NULL; 1152 } 1153 1154 static inline unsigned long huge_page_size(struct hstate *h) 1155 { 1156 return PAGE_SIZE; 1157 } 1158 1159 static inline unsigned long huge_page_mask(struct hstate *h) 1160 { 1161 return PAGE_MASK; 1162 } 1163 1164 static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) 1165 { 1166 return PAGE_SIZE; 1167 } 1168 1169 static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 1170 { 1171 return PAGE_SIZE; 1172 } 1173 1174 static inline unsigned int huge_page_order(struct hstate *h) 1175 { 1176 return 0; 1177 } 1178 1179 static inline unsigned int huge_page_shift(struct hstate *h) 1180 { 1181 return PAGE_SHIFT; 1182 } 1183 1184 static inline bool hstate_is_gigantic(struct hstate *h) 1185 { 1186 return false; 1187 } 1188 1189 static inline unsigned int pages_per_huge_page(struct hstate *h) 1190 { 1191 return 1; 1192 } 1193 1194 static inline unsigned hstate_index_to_shift(unsigned index) 1195 { 1196 return 0; 1197 } 1198 1199 static inline int hstate_index(struct hstate *h) 1200 { 1201 return 0; 1202 } 1203 1204 static inline int dissolve_free_hugetlb_folio(struct folio *folio) 1205 { 1206 return 0; 1207 } 1208 1209 static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn, 1210 unsigned long end_pfn) 1211 { 1212 return 0; 1213 } 1214 1215 static inline bool hugepage_migration_supported(struct hstate *h) 1216 { 1217 return false; 1218 } 1219 1220 static inline bool hugepage_movable_supported(struct hstate *h) 1221 { 1222 return false; 1223 } 1224 1225 static inline gfp_t htlb_alloc_mask(struct hstate *h) 1226 { 1227 return 0; 1228 } 1229 1230 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1231 { 1232 return 0; 1233 } 1234 1235 static inline bool htlb_allow_alloc_fallback(int reason) 1236 { 1237 return false; 1238 } 1239 1240 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1241 struct mm_struct *mm, pte_t *pte) 1242 { 1243 return &mm->page_table_lock; 1244 } 1245 1246 static inline void hugetlb_count_init(struct mm_struct *mm) 1247 { 1248 } 1249 1250 static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1251 { 1252 } 1253 1254 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1255 { 1256 } 1257 1258 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1259 unsigned long addr, pte_t *ptep) 1260 { 1261 #ifdef CONFIG_MMU 1262 return ptep_get(ptep); 1263 #else 1264 return *ptep; 1265 #endif 1266 } 1267 1268 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1269 pte_t *ptep, pte_t pte, unsigned long sz) 1270 { 1271 } 1272 1273 static inline void hugetlb_register_node(struct node *node) 1274 { 1275 } 1276 1277 static inline void hugetlb_unregister_node(struct node *node) 1278 { 1279 } 1280 1281 static inline bool hugetlbfs_pagecache_present( 1282 struct hstate *h, struct vm_area_struct *vma, unsigned long address) 1283 { 1284 return false; 1285 } 1286 1287 static inline void hugetlb_bootmem_alloc(void) 1288 { 1289 } 1290 1291 static inline bool hugetlb_bootmem_allocated(void) 1292 { 1293 return false; 1294 } 1295 #endif /* CONFIG_HUGETLB_PAGE */ 1296 1297 static inline spinlock_t *huge_pte_lock(struct hstate *h, 1298 struct mm_struct *mm, pte_t *pte) 1299 { 1300 spinlock_t *ptl; 1301 1302 ptl = huge_pte_lockptr(h, mm, pte); 1303 spin_lock(ptl); 1304 return ptl; 1305 } 1306 1307 #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1308 extern void __init hugetlb_cma_reserve(int order); 1309 #else 1310 static inline __init void hugetlb_cma_reserve(int order) 1311 { 1312 } 1313 #endif 1314 1315 #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING 1316 static inline bool hugetlb_pmd_shared(pte_t *pte) 1317 { 1318 return page_count(virt_to_page(pte)) > 1; 1319 } 1320 #else 1321 static inline bool hugetlb_pmd_shared(pte_t *pte) 1322 { 1323 return false; 1324 } 1325 #endif 1326 1327 bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1328 1329 #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1330 /* 1331 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1332 * implement this. 1333 */ 1334 #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1335 #endif 1336 1337 static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1338 { 1339 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1340 } 1341 1342 bool __vma_private_lock(struct vm_area_struct *vma); 1343 1344 /* 1345 * Safe version of huge_pte_offset() to check the locks. See comments 1346 * above huge_pte_offset(). 1347 */ 1348 static inline pte_t * 1349 hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1350 { 1351 #if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP) 1352 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1353 1354 /* 1355 * If pmd sharing possible, locking needed to safely walk the 1356 * hugetlb pgtables. More information can be found at the comment 1357 * above huge_pte_offset() in the same file. 1358 * 1359 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1360 */ 1361 if (__vma_shareable_lock(vma)) 1362 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1363 !lockdep_is_held( 1364 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1365 #endif 1366 return huge_pte_offset(vma->vm_mm, addr, sz); 1367 } 1368 1369 #endif /* _LINUX_HUGETLB_H */ 1370