1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_HUGETLB_H 3 #define _LINUX_HUGETLB_H 4 5 #include <linux/mm.h> 6 #include <linux/mm_types.h> 7 #include <linux/mmdebug.h> 8 #include <linux/fs.h> 9 #include <linux/hugetlb_inline.h> 10 #include <linux/cgroup.h> 11 #include <linux/page_ref.h> 12 #include <linux/list.h> 13 #include <linux/kref.h> 14 #include <linux/pgtable.h> 15 #include <linux/gfp.h> 16 #include <linux/userfaultfd_k.h> 17 18 struct ctl_table; 19 struct user_struct; 20 struct mmu_gather; 21 struct node; 22 23 #ifndef CONFIG_ARCH_HAS_HUGEPD 24 typedef struct { unsigned long pd; } hugepd_t; 25 #define is_hugepd(hugepd) (0) 26 #define __hugepd(x) ((hugepd_t) { (x) }) 27 #endif 28 29 #ifdef CONFIG_HUGETLB_PAGE 30 31 #include <linux/mempolicy.h> 32 #include <linux/shm.h> 33 #include <asm/tlbflush.h> 34 35 /* 36 * For HugeTLB page, there are more metadata to save in the struct page. But 37 * the head struct page cannot meet our needs, so we have to abuse other tail 38 * struct page to store the metadata. 39 */ 40 #define __NR_USED_SUBPAGE 3 41 42 struct hugepage_subpool { 43 spinlock_t lock; 44 long count; 45 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 46 long used_hpages; /* Used count against maximum, includes */ 47 /* both allocated and reserved pages. */ 48 struct hstate *hstate; 49 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 50 long rsv_hpages; /* Pages reserved against global pool to */ 51 /* satisfy minimum size. */ 52 }; 53 54 struct resv_map { 55 struct kref refs; 56 spinlock_t lock; 57 struct list_head regions; 58 long adds_in_progress; 59 struct list_head region_cache; 60 long region_cache_count; 61 #ifdef CONFIG_CGROUP_HUGETLB 62 /* 63 * On private mappings, the counter to uncharge reservations is stored 64 * here. If these fields are 0, then either the mapping is shared, or 65 * cgroup accounting is disabled for this resv_map. 66 */ 67 struct page_counter *reservation_counter; 68 unsigned long pages_per_hpage; 69 struct cgroup_subsys_state *css; 70 #endif 71 }; 72 73 /* 74 * Region tracking -- allows tracking of reservations and instantiated pages 75 * across the pages in a mapping. 76 * 77 * The region data structures are embedded into a resv_map and protected 78 * by a resv_map's lock. The set of regions within the resv_map represent 79 * reservations for huge pages, or huge pages that have already been 80 * instantiated within the map. The from and to elements are huge page 81 * indices into the associated mapping. from indicates the starting index 82 * of the region. to represents the first index past the end of the region. 83 * 84 * For example, a file region structure with from == 0 and to == 4 represents 85 * four huge pages in a mapping. It is important to note that the to element 86 * represents the first element past the end of the region. This is used in 87 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 88 * 89 * Interval notation of the form [from, to) will be used to indicate that 90 * the endpoint from is inclusive and to is exclusive. 91 */ 92 struct file_region { 93 struct list_head link; 94 long from; 95 long to; 96 #ifdef CONFIG_CGROUP_HUGETLB 97 /* 98 * On shared mappings, each reserved region appears as a struct 99 * file_region in resv_map. These fields hold the info needed to 100 * uncharge each reservation. 101 */ 102 struct page_counter *reservation_counter; 103 struct cgroup_subsys_state *css; 104 #endif 105 }; 106 107 struct hugetlb_vma_lock { 108 struct kref refs; 109 struct rw_semaphore rw_sema; 110 struct vm_area_struct *vma; 111 }; 112 113 extern struct resv_map *resv_map_alloc(void); 114 void resv_map_release(struct kref *ref); 115 116 extern spinlock_t hugetlb_lock; 117 extern int hugetlb_max_hstate __read_mostly; 118 #define for_each_hstate(h) \ 119 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 120 121 struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 122 long min_hpages); 123 void hugepage_put_subpool(struct hugepage_subpool *spool); 124 125 void hugetlb_dup_vma_private(struct vm_area_struct *vma); 126 void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 127 int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); 128 int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, 129 loff_t *); 130 int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, 131 loff_t *); 132 int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, 133 loff_t *); 134 135 int move_hugetlb_page_tables(struct vm_area_struct *vma, 136 struct vm_area_struct *new_vma, 137 unsigned long old_addr, unsigned long new_addr, 138 unsigned long len); 139 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 140 struct vm_area_struct *, struct vm_area_struct *); 141 struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 142 unsigned long address, unsigned int flags); 143 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, 144 struct page **, struct vm_area_struct **, 145 unsigned long *, unsigned long *, long, unsigned int, 146 int *); 147 void unmap_hugepage_range(struct vm_area_struct *, 148 unsigned long, unsigned long, struct page *, 149 zap_flags_t); 150 void __unmap_hugepage_range_final(struct mmu_gather *tlb, 151 struct vm_area_struct *vma, 152 unsigned long start, unsigned long end, 153 struct page *ref_page, zap_flags_t zap_flags); 154 void hugetlb_report_meminfo(struct seq_file *); 155 int hugetlb_report_node_meminfo(char *buf, int len, int nid); 156 void hugetlb_show_meminfo_node(int nid); 157 unsigned long hugetlb_total_pages(void); 158 vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 159 unsigned long address, unsigned int flags); 160 #ifdef CONFIG_USERFAULTFD 161 int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, 162 struct vm_area_struct *dst_vma, 163 unsigned long dst_addr, 164 unsigned long src_addr, 165 enum mcopy_atomic_mode mode, 166 struct page **pagep, 167 bool wp_copy); 168 #endif /* CONFIG_USERFAULTFD */ 169 bool hugetlb_reserve_pages(struct inode *inode, long from, long to, 170 struct vm_area_struct *vma, 171 vm_flags_t vm_flags); 172 long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 173 long freed); 174 bool isolate_hugetlb(struct folio *folio, struct list_head *list); 175 int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 176 int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 177 bool *migratable_cleared); 178 void folio_putback_active_hugetlb(struct folio *folio); 179 void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 180 void free_huge_page(struct page *page); 181 void hugetlb_fix_reserve_counts(struct inode *inode); 182 extern struct mutex *hugetlb_fault_mutex_table; 183 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 184 185 pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 186 unsigned long addr, pud_t *pud); 187 188 struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); 189 190 extern int sysctl_hugetlb_shm_group; 191 extern struct list_head huge_boot_pages; 192 193 /* arch callbacks */ 194 195 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 196 unsigned long addr, unsigned long sz); 197 /* 198 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 199 * Returns the pte_t* if found, or NULL if the address is not mapped. 200 * 201 * IMPORTANT: we should normally not directly call this function, instead 202 * this is only a common interface to implement arch-specific 203 * walker. Please use hugetlb_walk() instead, because that will attempt to 204 * verify the locking for you. 205 * 206 * Since this function will walk all the pgtable pages (including not only 207 * high-level pgtable page, but also PUD entry that can be unshared 208 * concurrently for VM_SHARED), the caller of this function should be 209 * responsible of its thread safety. One can follow this rule: 210 * 211 * (1) For private mappings: pmd unsharing is not possible, so holding the 212 * mmap_lock for either read or write is sufficient. Most callers 213 * already hold the mmap_lock, so normally, no special action is 214 * required. 215 * 216 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 217 * pgtable page can go away from under us! It can be done by a pmd 218 * unshare with a follow up munmap() on the other process), then we 219 * need either: 220 * 221 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 222 * won't happen upon the range (it also makes sure the pte_t we 223 * read is the right and stable one), or, 224 * 225 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 226 * sure even if unshare happened the racy unmap() will wait until 227 * i_mmap_rwsem is released. 228 * 229 * Option (2.1) is the safest, which guarantees pte stability from pmd 230 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 231 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 232 * access. 233 */ 234 pte_t *huge_pte_offset(struct mm_struct *mm, 235 unsigned long addr, unsigned long sz); 236 unsigned long hugetlb_mask_last_page(struct hstate *h); 237 int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, 238 unsigned long addr, pte_t *ptep); 239 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 240 unsigned long *start, unsigned long *end); 241 242 void hugetlb_vma_lock_read(struct vm_area_struct *vma); 243 void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 244 void hugetlb_vma_lock_write(struct vm_area_struct *vma); 245 void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 246 int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 247 void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 248 void hugetlb_vma_lock_release(struct kref *kref); 249 250 int pmd_huge(pmd_t pmd); 251 int pud_huge(pud_t pud); 252 long hugetlb_change_protection(struct vm_area_struct *vma, 253 unsigned long address, unsigned long end, pgprot_t newprot, 254 unsigned long cp_flags); 255 256 bool is_hugetlb_entry_migration(pte_t pte); 257 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 258 259 #else /* !CONFIG_HUGETLB_PAGE */ 260 261 static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 262 { 263 } 264 265 static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 266 { 267 } 268 269 static inline unsigned long hugetlb_total_pages(void) 270 { 271 return 0; 272 } 273 274 static inline struct address_space *hugetlb_page_mapping_lock_write( 275 struct page *hpage) 276 { 277 return NULL; 278 } 279 280 static inline int huge_pmd_unshare(struct mm_struct *mm, 281 struct vm_area_struct *vma, 282 unsigned long addr, pte_t *ptep) 283 { 284 return 0; 285 } 286 287 static inline void adjust_range_if_pmd_sharing_possible( 288 struct vm_area_struct *vma, 289 unsigned long *start, unsigned long *end) 290 { 291 } 292 293 static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 294 unsigned long address, unsigned int flags) 295 { 296 BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ 297 } 298 299 static inline long follow_hugetlb_page(struct mm_struct *mm, 300 struct vm_area_struct *vma, struct page **pages, 301 struct vm_area_struct **vmas, unsigned long *position, 302 unsigned long *nr_pages, long i, unsigned int flags, 303 int *nonblocking) 304 { 305 BUG(); 306 return 0; 307 } 308 309 static inline int copy_hugetlb_page_range(struct mm_struct *dst, 310 struct mm_struct *src, 311 struct vm_area_struct *dst_vma, 312 struct vm_area_struct *src_vma) 313 { 314 BUG(); 315 return 0; 316 } 317 318 static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 319 struct vm_area_struct *new_vma, 320 unsigned long old_addr, 321 unsigned long new_addr, 322 unsigned long len) 323 { 324 BUG(); 325 return 0; 326 } 327 328 static inline void hugetlb_report_meminfo(struct seq_file *m) 329 { 330 } 331 332 static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 333 { 334 return 0; 335 } 336 337 static inline void hugetlb_show_meminfo_node(int nid) 338 { 339 } 340 341 static inline int prepare_hugepage_range(struct file *file, 342 unsigned long addr, unsigned long len) 343 { 344 return -EINVAL; 345 } 346 347 static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 348 { 349 } 350 351 static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 352 { 353 } 354 355 static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 356 { 357 } 358 359 static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 360 { 361 } 362 363 static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 364 { 365 return 1; 366 } 367 368 static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 369 { 370 } 371 372 static inline int pmd_huge(pmd_t pmd) 373 { 374 return 0; 375 } 376 377 static inline int pud_huge(pud_t pud) 378 { 379 return 0; 380 } 381 382 static inline int is_hugepage_only_range(struct mm_struct *mm, 383 unsigned long addr, unsigned long len) 384 { 385 return 0; 386 } 387 388 static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, 389 unsigned long addr, unsigned long end, 390 unsigned long floor, unsigned long ceiling) 391 { 392 BUG(); 393 } 394 395 #ifdef CONFIG_USERFAULTFD 396 static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, 397 pte_t *dst_pte, 398 struct vm_area_struct *dst_vma, 399 unsigned long dst_addr, 400 unsigned long src_addr, 401 enum mcopy_atomic_mode mode, 402 struct page **pagep, 403 bool wp_copy) 404 { 405 BUG(); 406 return 0; 407 } 408 #endif /* CONFIG_USERFAULTFD */ 409 410 static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 411 unsigned long sz) 412 { 413 return NULL; 414 } 415 416 static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) 417 { 418 return false; 419 } 420 421 static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 422 { 423 return 0; 424 } 425 426 static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 427 bool *migratable_cleared) 428 { 429 return 0; 430 } 431 432 static inline void folio_putback_active_hugetlb(struct folio *folio) 433 { 434 } 435 436 static inline void move_hugetlb_state(struct folio *old_folio, 437 struct folio *new_folio, int reason) 438 { 439 } 440 441 static inline long hugetlb_change_protection( 442 struct vm_area_struct *vma, unsigned long address, 443 unsigned long end, pgprot_t newprot, 444 unsigned long cp_flags) 445 { 446 return 0; 447 } 448 449 static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, 450 struct vm_area_struct *vma, unsigned long start, 451 unsigned long end, struct page *ref_page, 452 zap_flags_t zap_flags) 453 { 454 BUG(); 455 } 456 457 static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 458 struct vm_area_struct *vma, unsigned long address, 459 unsigned int flags) 460 { 461 BUG(); 462 return 0; 463 } 464 465 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 466 467 #endif /* !CONFIG_HUGETLB_PAGE */ 468 /* 469 * hugepages at page global directory. If arch support 470 * hugepages at pgd level, they need to define this. 471 */ 472 #ifndef pgd_huge 473 #define pgd_huge(x) 0 474 #endif 475 #ifndef p4d_huge 476 #define p4d_huge(x) 0 477 #endif 478 479 #ifndef pgd_write 480 static inline int pgd_write(pgd_t pgd) 481 { 482 BUG(); 483 return 0; 484 } 485 #endif 486 487 #define HUGETLB_ANON_FILE "anon_hugepage" 488 489 enum { 490 /* 491 * The file will be used as an shm file so shmfs accounting rules 492 * apply 493 */ 494 HUGETLB_SHMFS_INODE = 1, 495 /* 496 * The file is being created on the internal vfs mount and shmfs 497 * accounting rules do not apply 498 */ 499 HUGETLB_ANONHUGE_INODE = 2, 500 }; 501 502 #ifdef CONFIG_HUGETLBFS 503 struct hugetlbfs_sb_info { 504 long max_inodes; /* inodes allowed */ 505 long free_inodes; /* inodes free */ 506 spinlock_t stat_lock; 507 struct hstate *hstate; 508 struct hugepage_subpool *spool; 509 kuid_t uid; 510 kgid_t gid; 511 umode_t mode; 512 }; 513 514 static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 515 { 516 return sb->s_fs_info; 517 } 518 519 struct hugetlbfs_inode_info { 520 struct shared_policy policy; 521 struct inode vfs_inode; 522 unsigned int seals; 523 }; 524 525 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 526 { 527 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 528 } 529 530 extern const struct file_operations hugetlbfs_file_operations; 531 extern const struct vm_operations_struct hugetlb_vm_ops; 532 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 533 int creat_flags, int page_size_log); 534 535 static inline bool is_file_hugepages(struct file *file) 536 { 537 if (file->f_op == &hugetlbfs_file_operations) 538 return true; 539 540 return is_file_shm_hugepages(file); 541 } 542 543 static inline struct hstate *hstate_inode(struct inode *i) 544 { 545 return HUGETLBFS_SB(i->i_sb)->hstate; 546 } 547 #else /* !CONFIG_HUGETLBFS */ 548 549 #define is_file_hugepages(file) false 550 static inline struct file * 551 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 552 int creat_flags, int page_size_log) 553 { 554 return ERR_PTR(-ENOSYS); 555 } 556 557 static inline struct hstate *hstate_inode(struct inode *i) 558 { 559 return NULL; 560 } 561 #endif /* !CONFIG_HUGETLBFS */ 562 563 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 564 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 565 unsigned long len, unsigned long pgoff, 566 unsigned long flags); 567 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ 568 569 unsigned long 570 generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 571 unsigned long len, unsigned long pgoff, 572 unsigned long flags); 573 574 /* 575 * huegtlb page specific state flags. These flags are located in page.private 576 * of the hugetlb head page. Functions created via the below macros should be 577 * used to manipulate these flags. 578 * 579 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 580 * allocation time. Cleared when page is fully instantiated. Free 581 * routine checks flag to restore a reservation on error paths. 582 * Synchronization: Examined or modified by code that knows it has 583 * the only reference to page. i.e. After allocation but before use 584 * or when the page is being freed. 585 * HPG_migratable - Set after a newly allocated page is added to the page 586 * cache and/or page tables. Indicates the page is a candidate for 587 * migration. 588 * Synchronization: Initially set after new page allocation with no 589 * locking. When examined and modified during migration processing 590 * (isolate, migrate, putback) the hugetlb_lock is held. 591 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 592 * allocator. Typically used for migration target pages when no pages 593 * are available in the pool. The hugetlb free page path will 594 * immediately free pages with this flag set to the buddy allocator. 595 * Synchronization: Can be set after huge page allocation from buddy when 596 * code knows it has only reference. All other examinations and 597 * modifications require hugetlb_lock. 598 * HPG_freed - Set when page is on the free lists. 599 * Synchronization: hugetlb_lock held for examination and modification. 600 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 601 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 602 * that is not tracked by raw_hwp_page list. 603 */ 604 enum hugetlb_page_flags { 605 HPG_restore_reserve = 0, 606 HPG_migratable, 607 HPG_temporary, 608 HPG_freed, 609 HPG_vmemmap_optimized, 610 HPG_raw_hwp_unreliable, 611 __NR_HPAGEFLAGS, 612 }; 613 614 /* 615 * Macros to create test, set and clear function definitions for 616 * hugetlb specific page flags. 617 */ 618 #ifdef CONFIG_HUGETLB_PAGE 619 #define TESTHPAGEFLAG(uname, flname) \ 620 static __always_inline \ 621 bool folio_test_hugetlb_##flname(struct folio *folio) \ 622 { void *private = &folio->private; \ 623 return test_bit(HPG_##flname, private); \ 624 } \ 625 static inline int HPage##uname(struct page *page) \ 626 { return test_bit(HPG_##flname, &(page->private)); } 627 628 #define SETHPAGEFLAG(uname, flname) \ 629 static __always_inline \ 630 void folio_set_hugetlb_##flname(struct folio *folio) \ 631 { void *private = &folio->private; \ 632 set_bit(HPG_##flname, private); \ 633 } \ 634 static inline void SetHPage##uname(struct page *page) \ 635 { set_bit(HPG_##flname, &(page->private)); } 636 637 #define CLEARHPAGEFLAG(uname, flname) \ 638 static __always_inline \ 639 void folio_clear_hugetlb_##flname(struct folio *folio) \ 640 { void *private = &folio->private; \ 641 clear_bit(HPG_##flname, private); \ 642 } \ 643 static inline void ClearHPage##uname(struct page *page) \ 644 { clear_bit(HPG_##flname, &(page->private)); } 645 #else 646 #define TESTHPAGEFLAG(uname, flname) \ 647 static inline bool \ 648 folio_test_hugetlb_##flname(struct folio *folio) \ 649 { return 0; } \ 650 static inline int HPage##uname(struct page *page) \ 651 { return 0; } 652 653 #define SETHPAGEFLAG(uname, flname) \ 654 static inline void \ 655 folio_set_hugetlb_##flname(struct folio *folio) \ 656 { } \ 657 static inline void SetHPage##uname(struct page *page) \ 658 { } 659 660 #define CLEARHPAGEFLAG(uname, flname) \ 661 static inline void \ 662 folio_clear_hugetlb_##flname(struct folio *folio) \ 663 { } \ 664 static inline void ClearHPage##uname(struct page *page) \ 665 { } 666 #endif 667 668 #define HPAGEFLAG(uname, flname) \ 669 TESTHPAGEFLAG(uname, flname) \ 670 SETHPAGEFLAG(uname, flname) \ 671 CLEARHPAGEFLAG(uname, flname) \ 672 673 /* 674 * Create functions associated with hugetlb page flags 675 */ 676 HPAGEFLAG(RestoreReserve, restore_reserve) 677 HPAGEFLAG(Migratable, migratable) 678 HPAGEFLAG(Temporary, temporary) 679 HPAGEFLAG(Freed, freed) 680 HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 681 HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 682 683 #ifdef CONFIG_HUGETLB_PAGE 684 685 #define HSTATE_NAME_LEN 32 686 /* Defines one hugetlb page size */ 687 struct hstate { 688 struct mutex resize_lock; 689 int next_nid_to_alloc; 690 int next_nid_to_free; 691 unsigned int order; 692 unsigned int demote_order; 693 unsigned long mask; 694 unsigned long max_huge_pages; 695 unsigned long nr_huge_pages; 696 unsigned long free_huge_pages; 697 unsigned long resv_huge_pages; 698 unsigned long surplus_huge_pages; 699 unsigned long nr_overcommit_huge_pages; 700 struct list_head hugepage_activelist; 701 struct list_head hugepage_freelists[MAX_NUMNODES]; 702 unsigned int max_huge_pages_node[MAX_NUMNODES]; 703 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 704 unsigned int free_huge_pages_node[MAX_NUMNODES]; 705 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 706 #ifdef CONFIG_CGROUP_HUGETLB 707 /* cgroup control files */ 708 struct cftype cgroup_files_dfl[8]; 709 struct cftype cgroup_files_legacy[10]; 710 #endif 711 char name[HSTATE_NAME_LEN]; 712 }; 713 714 struct huge_bootmem_page { 715 struct list_head list; 716 struct hstate *hstate; 717 }; 718 719 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); 720 struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 721 unsigned long addr, int avoid_reserve); 722 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 723 nodemask_t *nmask, gfp_t gfp_mask); 724 struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, 725 unsigned long address); 726 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 727 pgoff_t idx); 728 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 729 unsigned long address, struct folio *folio); 730 731 /* arch callback */ 732 int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 733 int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 734 bool __init hugetlb_node_alloc_supported(void); 735 736 void __init hugetlb_add_hstate(unsigned order); 737 bool __init arch_hugetlb_valid_size(unsigned long size); 738 struct hstate *size_to_hstate(unsigned long size); 739 740 #ifndef HUGE_MAX_HSTATE 741 #define HUGE_MAX_HSTATE 1 742 #endif 743 744 extern struct hstate hstates[HUGE_MAX_HSTATE]; 745 extern unsigned int default_hstate_idx; 746 747 #define default_hstate (hstates[default_hstate_idx]) 748 749 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 750 { 751 return folio->_hugetlb_subpool; 752 } 753 754 /* 755 * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool 756 */ 757 static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) 758 { 759 return hugetlb_folio_subpool(page_folio(hpage)); 760 } 761 762 static inline void hugetlb_set_folio_subpool(struct folio *folio, 763 struct hugepage_subpool *subpool) 764 { 765 folio->_hugetlb_subpool = subpool; 766 } 767 768 static inline void hugetlb_set_page_subpool(struct page *hpage, 769 struct hugepage_subpool *subpool) 770 { 771 hugetlb_set_folio_subpool(page_folio(hpage), subpool); 772 } 773 774 static inline struct hstate *hstate_file(struct file *f) 775 { 776 return hstate_inode(file_inode(f)); 777 } 778 779 static inline struct hstate *hstate_sizelog(int page_size_log) 780 { 781 if (!page_size_log) 782 return &default_hstate; 783 784 if (page_size_log < BITS_PER_LONG) 785 return size_to_hstate(1UL << page_size_log); 786 787 return NULL; 788 } 789 790 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 791 { 792 return hstate_file(vma->vm_file); 793 } 794 795 static inline unsigned long huge_page_size(const struct hstate *h) 796 { 797 return (unsigned long)PAGE_SIZE << h->order; 798 } 799 800 extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); 801 802 extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); 803 804 static inline unsigned long huge_page_mask(struct hstate *h) 805 { 806 return h->mask; 807 } 808 809 static inline unsigned int huge_page_order(struct hstate *h) 810 { 811 return h->order; 812 } 813 814 static inline unsigned huge_page_shift(struct hstate *h) 815 { 816 return h->order + PAGE_SHIFT; 817 } 818 819 static inline bool hstate_is_gigantic(struct hstate *h) 820 { 821 return huge_page_order(h) >= MAX_ORDER; 822 } 823 824 static inline unsigned int pages_per_huge_page(const struct hstate *h) 825 { 826 return 1 << h->order; 827 } 828 829 static inline unsigned int blocks_per_huge_page(struct hstate *h) 830 { 831 return huge_page_size(h) / 512; 832 } 833 834 #include <asm/hugetlb.h> 835 836 #ifndef is_hugepage_only_range 837 static inline int is_hugepage_only_range(struct mm_struct *mm, 838 unsigned long addr, unsigned long len) 839 { 840 return 0; 841 } 842 #define is_hugepage_only_range is_hugepage_only_range 843 #endif 844 845 #ifndef arch_clear_hugepage_flags 846 static inline void arch_clear_hugepage_flags(struct page *page) { } 847 #define arch_clear_hugepage_flags arch_clear_hugepage_flags 848 #endif 849 850 #ifndef arch_make_huge_pte 851 static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 852 vm_flags_t flags) 853 { 854 return pte_mkhuge(entry); 855 } 856 #endif 857 858 static inline struct hstate *folio_hstate(struct folio *folio) 859 { 860 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 861 return size_to_hstate(folio_size(folio)); 862 } 863 864 static inline struct hstate *page_hstate(struct page *page) 865 { 866 return folio_hstate(page_folio(page)); 867 } 868 869 static inline unsigned hstate_index_to_shift(unsigned index) 870 { 871 return hstates[index].order + PAGE_SHIFT; 872 } 873 874 static inline int hstate_index(struct hstate *h) 875 { 876 return h - hstates; 877 } 878 879 extern int dissolve_free_huge_page(struct page *page); 880 extern int dissolve_free_huge_pages(unsigned long start_pfn, 881 unsigned long end_pfn); 882 883 #ifdef CONFIG_MEMORY_FAILURE 884 extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 885 #else 886 static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 887 { 888 } 889 #endif 890 891 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 892 #ifndef arch_hugetlb_migration_supported 893 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 894 { 895 if ((huge_page_shift(h) == PMD_SHIFT) || 896 (huge_page_shift(h) == PUD_SHIFT) || 897 (huge_page_shift(h) == PGDIR_SHIFT)) 898 return true; 899 else 900 return false; 901 } 902 #endif 903 #else 904 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 905 { 906 return false; 907 } 908 #endif 909 910 static inline bool hugepage_migration_supported(struct hstate *h) 911 { 912 return arch_hugetlb_migration_supported(h); 913 } 914 915 /* 916 * Movability check is different as compared to migration check. 917 * It determines whether or not a huge page should be placed on 918 * movable zone or not. Movability of any huge page should be 919 * required only if huge page size is supported for migration. 920 * There won't be any reason for the huge page to be movable if 921 * it is not migratable to start with. Also the size of the huge 922 * page should be large enough to be placed under a movable zone 923 * and still feasible enough to be migratable. Just the presence 924 * in movable zone does not make the migration feasible. 925 * 926 * So even though large huge page sizes like the gigantic ones 927 * are migratable they should not be movable because its not 928 * feasible to migrate them from movable zone. 929 */ 930 static inline bool hugepage_movable_supported(struct hstate *h) 931 { 932 if (!hugepage_migration_supported(h)) 933 return false; 934 935 if (hstate_is_gigantic(h)) 936 return false; 937 return true; 938 } 939 940 /* Movability of hugepages depends on migration support. */ 941 static inline gfp_t htlb_alloc_mask(struct hstate *h) 942 { 943 if (hugepage_movable_supported(h)) 944 return GFP_HIGHUSER_MOVABLE; 945 else 946 return GFP_HIGHUSER; 947 } 948 949 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 950 { 951 gfp_t modified_mask = htlb_alloc_mask(h); 952 953 /* Some callers might want to enforce node */ 954 modified_mask |= (gfp_mask & __GFP_THISNODE); 955 956 modified_mask |= (gfp_mask & __GFP_NOWARN); 957 958 return modified_mask; 959 } 960 961 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 962 struct mm_struct *mm, pte_t *pte) 963 { 964 if (huge_page_size(h) == PMD_SIZE) 965 return pmd_lockptr(mm, (pmd_t *) pte); 966 VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); 967 return &mm->page_table_lock; 968 } 969 970 #ifndef hugepages_supported 971 /* 972 * Some platform decide whether they support huge pages at boot 973 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 974 * when there is no such support 975 */ 976 #define hugepages_supported() (HPAGE_SHIFT != 0) 977 #endif 978 979 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 980 981 static inline void hugetlb_count_init(struct mm_struct *mm) 982 { 983 atomic_long_set(&mm->hugetlb_usage, 0); 984 } 985 986 static inline void hugetlb_count_add(long l, struct mm_struct *mm) 987 { 988 atomic_long_add(l, &mm->hugetlb_usage); 989 } 990 991 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 992 { 993 atomic_long_sub(l, &mm->hugetlb_usage); 994 } 995 996 #ifndef huge_ptep_modify_prot_start 997 #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 998 static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 999 unsigned long addr, pte_t *ptep) 1000 { 1001 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 1002 } 1003 #endif 1004 1005 #ifndef huge_ptep_modify_prot_commit 1006 #define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 1007 static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 1008 unsigned long addr, pte_t *ptep, 1009 pte_t old_pte, pte_t pte) 1010 { 1011 set_huge_pte_at(vma->vm_mm, addr, ptep, pte); 1012 } 1013 #endif 1014 1015 #ifdef CONFIG_NUMA 1016 void hugetlb_register_node(struct node *node); 1017 void hugetlb_unregister_node(struct node *node); 1018 #endif 1019 1020 #else /* CONFIG_HUGETLB_PAGE */ 1021 struct hstate {}; 1022 1023 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1024 { 1025 return NULL; 1026 } 1027 1028 static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) 1029 { 1030 return NULL; 1031 } 1032 1033 static inline int isolate_or_dissolve_huge_page(struct page *page, 1034 struct list_head *list) 1035 { 1036 return -ENOMEM; 1037 } 1038 1039 static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1040 unsigned long addr, 1041 int avoid_reserve) 1042 { 1043 return NULL; 1044 } 1045 1046 static inline struct folio * 1047 alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1048 nodemask_t *nmask, gfp_t gfp_mask) 1049 { 1050 return NULL; 1051 } 1052 1053 static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, 1054 struct vm_area_struct *vma, 1055 unsigned long address) 1056 { 1057 return NULL; 1058 } 1059 1060 static inline int __alloc_bootmem_huge_page(struct hstate *h) 1061 { 1062 return 0; 1063 } 1064 1065 static inline struct hstate *hstate_file(struct file *f) 1066 { 1067 return NULL; 1068 } 1069 1070 static inline struct hstate *hstate_sizelog(int page_size_log) 1071 { 1072 return NULL; 1073 } 1074 1075 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1076 { 1077 return NULL; 1078 } 1079 1080 static inline struct hstate *folio_hstate(struct folio *folio) 1081 { 1082 return NULL; 1083 } 1084 1085 static inline struct hstate *page_hstate(struct page *page) 1086 { 1087 return NULL; 1088 } 1089 1090 static inline struct hstate *size_to_hstate(unsigned long size) 1091 { 1092 return NULL; 1093 } 1094 1095 static inline unsigned long huge_page_size(struct hstate *h) 1096 { 1097 return PAGE_SIZE; 1098 } 1099 1100 static inline unsigned long huge_page_mask(struct hstate *h) 1101 { 1102 return PAGE_MASK; 1103 } 1104 1105 static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) 1106 { 1107 return PAGE_SIZE; 1108 } 1109 1110 static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 1111 { 1112 return PAGE_SIZE; 1113 } 1114 1115 static inline unsigned int huge_page_order(struct hstate *h) 1116 { 1117 return 0; 1118 } 1119 1120 static inline unsigned int huge_page_shift(struct hstate *h) 1121 { 1122 return PAGE_SHIFT; 1123 } 1124 1125 static inline bool hstate_is_gigantic(struct hstate *h) 1126 { 1127 return false; 1128 } 1129 1130 static inline unsigned int pages_per_huge_page(struct hstate *h) 1131 { 1132 return 1; 1133 } 1134 1135 static inline unsigned hstate_index_to_shift(unsigned index) 1136 { 1137 return 0; 1138 } 1139 1140 static inline int hstate_index(struct hstate *h) 1141 { 1142 return 0; 1143 } 1144 1145 static inline int dissolve_free_huge_page(struct page *page) 1146 { 1147 return 0; 1148 } 1149 1150 static inline int dissolve_free_huge_pages(unsigned long start_pfn, 1151 unsigned long end_pfn) 1152 { 1153 return 0; 1154 } 1155 1156 static inline bool hugepage_migration_supported(struct hstate *h) 1157 { 1158 return false; 1159 } 1160 1161 static inline bool hugepage_movable_supported(struct hstate *h) 1162 { 1163 return false; 1164 } 1165 1166 static inline gfp_t htlb_alloc_mask(struct hstate *h) 1167 { 1168 return 0; 1169 } 1170 1171 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1172 { 1173 return 0; 1174 } 1175 1176 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1177 struct mm_struct *mm, pte_t *pte) 1178 { 1179 return &mm->page_table_lock; 1180 } 1181 1182 static inline void hugetlb_count_init(struct mm_struct *mm) 1183 { 1184 } 1185 1186 static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1187 { 1188 } 1189 1190 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1191 { 1192 } 1193 1194 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1195 unsigned long addr, pte_t *ptep) 1196 { 1197 return *ptep; 1198 } 1199 1200 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1201 pte_t *ptep, pte_t pte) 1202 { 1203 } 1204 1205 static inline void hugetlb_register_node(struct node *node) 1206 { 1207 } 1208 1209 static inline void hugetlb_unregister_node(struct node *node) 1210 { 1211 } 1212 #endif /* CONFIG_HUGETLB_PAGE */ 1213 1214 static inline spinlock_t *huge_pte_lock(struct hstate *h, 1215 struct mm_struct *mm, pte_t *pte) 1216 { 1217 spinlock_t *ptl; 1218 1219 ptl = huge_pte_lockptr(h, mm, pte); 1220 spin_lock(ptl); 1221 return ptl; 1222 } 1223 1224 #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1225 extern void __init hugetlb_cma_reserve(int order); 1226 #else 1227 static inline __init void hugetlb_cma_reserve(int order) 1228 { 1229 } 1230 #endif 1231 1232 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 1233 static inline bool hugetlb_pmd_shared(pte_t *pte) 1234 { 1235 return page_count(virt_to_page(pte)) > 1; 1236 } 1237 #else 1238 static inline bool hugetlb_pmd_shared(pte_t *pte) 1239 { 1240 return false; 1241 } 1242 #endif 1243 1244 bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1245 1246 #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1247 /* 1248 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1249 * implement this. 1250 */ 1251 #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1252 #endif 1253 1254 static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1255 { 1256 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1257 } 1258 1259 /* 1260 * Safe version of huge_pte_offset() to check the locks. See comments 1261 * above huge_pte_offset(). 1262 */ 1263 static inline pte_t * 1264 hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1265 { 1266 #if defined(CONFIG_HUGETLB_PAGE) && \ 1267 defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) 1268 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1269 1270 /* 1271 * If pmd sharing possible, locking needed to safely walk the 1272 * hugetlb pgtables. More information can be found at the comment 1273 * above huge_pte_offset() in the same file. 1274 * 1275 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1276 */ 1277 if (__vma_shareable_lock(vma)) 1278 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1279 !lockdep_is_held( 1280 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1281 #endif 1282 return huge_pte_offset(vma->vm_mm, addr, sz); 1283 } 1284 1285 #endif /* _LINUX_HUGETLB_H */ 1286