1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_HUGETLB_H 3 #define _LINUX_HUGETLB_H 4 5 #include <linux/mm.h> 6 #include <linux/mm_types.h> 7 #include <linux/mmdebug.h> 8 #include <linux/fs.h> 9 #include <linux/hugetlb_inline.h> 10 #include <linux/cgroup.h> 11 #include <linux/page_ref.h> 12 #include <linux/list.h> 13 #include <linux/kref.h> 14 #include <linux/pgtable.h> 15 #include <linux/gfp.h> 16 #include <linux/userfaultfd_k.h> 17 18 struct ctl_table; 19 struct user_struct; 20 struct mmu_gather; 21 struct node; 22 23 #ifndef CONFIG_ARCH_HAS_HUGEPD 24 typedef struct { unsigned long pd; } hugepd_t; 25 #define is_hugepd(hugepd) (0) 26 #define __hugepd(x) ((hugepd_t) { (x) }) 27 #endif 28 29 #ifdef CONFIG_HUGETLB_PAGE 30 31 #include <linux/mempolicy.h> 32 #include <linux/shm.h> 33 #include <asm/tlbflush.h> 34 35 /* 36 * For HugeTLB page, there are more metadata to save in the struct page. But 37 * the head struct page cannot meet our needs, so we have to abuse other tail 38 * struct page to store the metadata. 39 */ 40 #define __NR_USED_SUBPAGE 3 41 42 struct hugepage_subpool { 43 spinlock_t lock; 44 long count; 45 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 46 long used_hpages; /* Used count against maximum, includes */ 47 /* both allocated and reserved pages. */ 48 struct hstate *hstate; 49 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 50 long rsv_hpages; /* Pages reserved against global pool to */ 51 /* satisfy minimum size. */ 52 }; 53 54 struct resv_map { 55 struct kref refs; 56 spinlock_t lock; 57 struct list_head regions; 58 long adds_in_progress; 59 struct list_head region_cache; 60 long region_cache_count; 61 #ifdef CONFIG_CGROUP_HUGETLB 62 /* 63 * On private mappings, the counter to uncharge reservations is stored 64 * here. If these fields are 0, then either the mapping is shared, or 65 * cgroup accounting is disabled for this resv_map. 66 */ 67 struct page_counter *reservation_counter; 68 unsigned long pages_per_hpage; 69 struct cgroup_subsys_state *css; 70 #endif 71 }; 72 73 /* 74 * Region tracking -- allows tracking of reservations and instantiated pages 75 * across the pages in a mapping. 76 * 77 * The region data structures are embedded into a resv_map and protected 78 * by a resv_map's lock. The set of regions within the resv_map represent 79 * reservations for huge pages, or huge pages that have already been 80 * instantiated within the map. The from and to elements are huge page 81 * indices into the associated mapping. from indicates the starting index 82 * of the region. to represents the first index past the end of the region. 83 * 84 * For example, a file region structure with from == 0 and to == 4 represents 85 * four huge pages in a mapping. It is important to note that the to element 86 * represents the first element past the end of the region. This is used in 87 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 88 * 89 * Interval notation of the form [from, to) will be used to indicate that 90 * the endpoint from is inclusive and to is exclusive. 91 */ 92 struct file_region { 93 struct list_head link; 94 long from; 95 long to; 96 #ifdef CONFIG_CGROUP_HUGETLB 97 /* 98 * On shared mappings, each reserved region appears as a struct 99 * file_region in resv_map. These fields hold the info needed to 100 * uncharge each reservation. 101 */ 102 struct page_counter *reservation_counter; 103 struct cgroup_subsys_state *css; 104 #endif 105 }; 106 107 struct hugetlb_vma_lock { 108 struct kref refs; 109 struct rw_semaphore rw_sema; 110 struct vm_area_struct *vma; 111 }; 112 113 extern struct resv_map *resv_map_alloc(void); 114 void resv_map_release(struct kref *ref); 115 116 extern spinlock_t hugetlb_lock; 117 extern int hugetlb_max_hstate __read_mostly; 118 #define for_each_hstate(h) \ 119 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 120 121 struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 122 long min_hpages); 123 void hugepage_put_subpool(struct hugepage_subpool *spool); 124 125 void hugetlb_dup_vma_private(struct vm_area_struct *vma); 126 void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 127 int move_hugetlb_page_tables(struct vm_area_struct *vma, 128 struct vm_area_struct *new_vma, 129 unsigned long old_addr, unsigned long new_addr, 130 unsigned long len); 131 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 132 struct vm_area_struct *, struct vm_area_struct *); 133 struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 134 unsigned long address, unsigned int flags); 135 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, 136 struct page **, unsigned long *, unsigned long *, 137 long, unsigned int, int *); 138 void unmap_hugepage_range(struct vm_area_struct *, 139 unsigned long, unsigned long, struct page *, 140 zap_flags_t); 141 void __unmap_hugepage_range_final(struct mmu_gather *tlb, 142 struct vm_area_struct *vma, 143 unsigned long start, unsigned long end, 144 struct page *ref_page, zap_flags_t zap_flags); 145 void hugetlb_report_meminfo(struct seq_file *); 146 int hugetlb_report_node_meminfo(char *buf, int len, int nid); 147 void hugetlb_show_meminfo_node(int nid); 148 unsigned long hugetlb_total_pages(void); 149 vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 150 unsigned long address, unsigned int flags); 151 #ifdef CONFIG_USERFAULTFD 152 int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 153 struct vm_area_struct *dst_vma, 154 unsigned long dst_addr, 155 unsigned long src_addr, 156 uffd_flags_t flags, 157 struct folio **foliop); 158 #endif /* CONFIG_USERFAULTFD */ 159 bool hugetlb_reserve_pages(struct inode *inode, long from, long to, 160 struct vm_area_struct *vma, 161 vm_flags_t vm_flags); 162 long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 163 long freed); 164 bool isolate_hugetlb(struct folio *folio, struct list_head *list); 165 int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 166 int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 167 bool *migratable_cleared); 168 void folio_putback_active_hugetlb(struct folio *folio); 169 void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 170 void free_huge_page(struct page *page); 171 void hugetlb_fix_reserve_counts(struct inode *inode); 172 extern struct mutex *hugetlb_fault_mutex_table; 173 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 174 175 pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 176 unsigned long addr, pud_t *pud); 177 178 struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); 179 180 extern int sysctl_hugetlb_shm_group; 181 extern struct list_head huge_boot_pages; 182 183 /* arch callbacks */ 184 185 #ifndef CONFIG_HIGHPTE 186 /* 187 * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures 188 * which may go down to the lowest PTE level in their huge_pte_offset() and 189 * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). 190 */ 191 static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) 192 { 193 return pte_offset_kernel(pmd, address); 194 } 195 static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, 196 unsigned long address) 197 { 198 return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); 199 } 200 #endif 201 202 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 203 unsigned long addr, unsigned long sz); 204 /* 205 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 206 * Returns the pte_t* if found, or NULL if the address is not mapped. 207 * 208 * IMPORTANT: we should normally not directly call this function, instead 209 * this is only a common interface to implement arch-specific 210 * walker. Please use hugetlb_walk() instead, because that will attempt to 211 * verify the locking for you. 212 * 213 * Since this function will walk all the pgtable pages (including not only 214 * high-level pgtable page, but also PUD entry that can be unshared 215 * concurrently for VM_SHARED), the caller of this function should be 216 * responsible of its thread safety. One can follow this rule: 217 * 218 * (1) For private mappings: pmd unsharing is not possible, so holding the 219 * mmap_lock for either read or write is sufficient. Most callers 220 * already hold the mmap_lock, so normally, no special action is 221 * required. 222 * 223 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 224 * pgtable page can go away from under us! It can be done by a pmd 225 * unshare with a follow up munmap() on the other process), then we 226 * need either: 227 * 228 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 229 * won't happen upon the range (it also makes sure the pte_t we 230 * read is the right and stable one), or, 231 * 232 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 233 * sure even if unshare happened the racy unmap() will wait until 234 * i_mmap_rwsem is released. 235 * 236 * Option (2.1) is the safest, which guarantees pte stability from pmd 237 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 238 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 239 * access. 240 */ 241 pte_t *huge_pte_offset(struct mm_struct *mm, 242 unsigned long addr, unsigned long sz); 243 unsigned long hugetlb_mask_last_page(struct hstate *h); 244 int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, 245 unsigned long addr, pte_t *ptep); 246 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 247 unsigned long *start, unsigned long *end); 248 249 void hugetlb_vma_lock_read(struct vm_area_struct *vma); 250 void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 251 void hugetlb_vma_lock_write(struct vm_area_struct *vma); 252 void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 253 int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 254 void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 255 void hugetlb_vma_lock_release(struct kref *kref); 256 257 int pmd_huge(pmd_t pmd); 258 int pud_huge(pud_t pud); 259 long hugetlb_change_protection(struct vm_area_struct *vma, 260 unsigned long address, unsigned long end, pgprot_t newprot, 261 unsigned long cp_flags); 262 263 bool is_hugetlb_entry_migration(pte_t pte); 264 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 265 266 #else /* !CONFIG_HUGETLB_PAGE */ 267 268 static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 269 { 270 } 271 272 static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 273 { 274 } 275 276 static inline unsigned long hugetlb_total_pages(void) 277 { 278 return 0; 279 } 280 281 static inline struct address_space *hugetlb_page_mapping_lock_write( 282 struct page *hpage) 283 { 284 return NULL; 285 } 286 287 static inline int huge_pmd_unshare(struct mm_struct *mm, 288 struct vm_area_struct *vma, 289 unsigned long addr, pte_t *ptep) 290 { 291 return 0; 292 } 293 294 static inline void adjust_range_if_pmd_sharing_possible( 295 struct vm_area_struct *vma, 296 unsigned long *start, unsigned long *end) 297 { 298 } 299 300 static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 301 unsigned long address, unsigned int flags) 302 { 303 BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ 304 } 305 306 static inline long follow_hugetlb_page(struct mm_struct *mm, 307 struct vm_area_struct *vma, struct page **pages, 308 unsigned long *position, unsigned long *nr_pages, 309 long i, unsigned int flags, int *nonblocking) 310 { 311 BUG(); 312 return 0; 313 } 314 315 static inline int copy_hugetlb_page_range(struct mm_struct *dst, 316 struct mm_struct *src, 317 struct vm_area_struct *dst_vma, 318 struct vm_area_struct *src_vma) 319 { 320 BUG(); 321 return 0; 322 } 323 324 static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 325 struct vm_area_struct *new_vma, 326 unsigned long old_addr, 327 unsigned long new_addr, 328 unsigned long len) 329 { 330 BUG(); 331 return 0; 332 } 333 334 static inline void hugetlb_report_meminfo(struct seq_file *m) 335 { 336 } 337 338 static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 339 { 340 return 0; 341 } 342 343 static inline void hugetlb_show_meminfo_node(int nid) 344 { 345 } 346 347 static inline int prepare_hugepage_range(struct file *file, 348 unsigned long addr, unsigned long len) 349 { 350 return -EINVAL; 351 } 352 353 static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 354 { 355 } 356 357 static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 358 { 359 } 360 361 static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 362 { 363 } 364 365 static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 366 { 367 } 368 369 static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 370 { 371 return 1; 372 } 373 374 static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 375 { 376 } 377 378 static inline int pmd_huge(pmd_t pmd) 379 { 380 return 0; 381 } 382 383 static inline int pud_huge(pud_t pud) 384 { 385 return 0; 386 } 387 388 static inline int is_hugepage_only_range(struct mm_struct *mm, 389 unsigned long addr, unsigned long len) 390 { 391 return 0; 392 } 393 394 static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, 395 unsigned long addr, unsigned long end, 396 unsigned long floor, unsigned long ceiling) 397 { 398 BUG(); 399 } 400 401 #ifdef CONFIG_USERFAULTFD 402 static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 403 struct vm_area_struct *dst_vma, 404 unsigned long dst_addr, 405 unsigned long src_addr, 406 uffd_flags_t flags, 407 struct folio **foliop) 408 { 409 BUG(); 410 return 0; 411 } 412 #endif /* CONFIG_USERFAULTFD */ 413 414 static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 415 unsigned long sz) 416 { 417 return NULL; 418 } 419 420 static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) 421 { 422 return false; 423 } 424 425 static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 426 { 427 return 0; 428 } 429 430 static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 431 bool *migratable_cleared) 432 { 433 return 0; 434 } 435 436 static inline void folio_putback_active_hugetlb(struct folio *folio) 437 { 438 } 439 440 static inline void move_hugetlb_state(struct folio *old_folio, 441 struct folio *new_folio, int reason) 442 { 443 } 444 445 static inline long hugetlb_change_protection( 446 struct vm_area_struct *vma, unsigned long address, 447 unsigned long end, pgprot_t newprot, 448 unsigned long cp_flags) 449 { 450 return 0; 451 } 452 453 static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, 454 struct vm_area_struct *vma, unsigned long start, 455 unsigned long end, struct page *ref_page, 456 zap_flags_t zap_flags) 457 { 458 BUG(); 459 } 460 461 static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 462 struct vm_area_struct *vma, unsigned long address, 463 unsigned int flags) 464 { 465 BUG(); 466 return 0; 467 } 468 469 static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 470 471 #endif /* !CONFIG_HUGETLB_PAGE */ 472 /* 473 * hugepages at page global directory. If arch support 474 * hugepages at pgd level, they need to define this. 475 */ 476 #ifndef pgd_huge 477 #define pgd_huge(x) 0 478 #endif 479 #ifndef p4d_huge 480 #define p4d_huge(x) 0 481 #endif 482 483 #ifndef pgd_write 484 static inline int pgd_write(pgd_t pgd) 485 { 486 BUG(); 487 return 0; 488 } 489 #endif 490 491 #define HUGETLB_ANON_FILE "anon_hugepage" 492 493 enum { 494 /* 495 * The file will be used as an shm file so shmfs accounting rules 496 * apply 497 */ 498 HUGETLB_SHMFS_INODE = 1, 499 /* 500 * The file is being created on the internal vfs mount and shmfs 501 * accounting rules do not apply 502 */ 503 HUGETLB_ANONHUGE_INODE = 2, 504 }; 505 506 #ifdef CONFIG_HUGETLBFS 507 struct hugetlbfs_sb_info { 508 long max_inodes; /* inodes allowed */ 509 long free_inodes; /* inodes free */ 510 spinlock_t stat_lock; 511 struct hstate *hstate; 512 struct hugepage_subpool *spool; 513 kuid_t uid; 514 kgid_t gid; 515 umode_t mode; 516 }; 517 518 static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 519 { 520 return sb->s_fs_info; 521 } 522 523 struct hugetlbfs_inode_info { 524 struct shared_policy policy; 525 struct inode vfs_inode; 526 unsigned int seals; 527 }; 528 529 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 530 { 531 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 532 } 533 534 extern const struct file_operations hugetlbfs_file_operations; 535 extern const struct vm_operations_struct hugetlb_vm_ops; 536 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 537 int creat_flags, int page_size_log); 538 539 static inline bool is_file_hugepages(struct file *file) 540 { 541 if (file->f_op == &hugetlbfs_file_operations) 542 return true; 543 544 return is_file_shm_hugepages(file); 545 } 546 547 static inline struct hstate *hstate_inode(struct inode *i) 548 { 549 return HUGETLBFS_SB(i->i_sb)->hstate; 550 } 551 #else /* !CONFIG_HUGETLBFS */ 552 553 #define is_file_hugepages(file) false 554 static inline struct file * 555 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 556 int creat_flags, int page_size_log) 557 { 558 return ERR_PTR(-ENOSYS); 559 } 560 561 static inline struct hstate *hstate_inode(struct inode *i) 562 { 563 return NULL; 564 } 565 #endif /* !CONFIG_HUGETLBFS */ 566 567 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 568 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 569 unsigned long len, unsigned long pgoff, 570 unsigned long flags); 571 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ 572 573 unsigned long 574 generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 575 unsigned long len, unsigned long pgoff, 576 unsigned long flags); 577 578 /* 579 * huegtlb page specific state flags. These flags are located in page.private 580 * of the hugetlb head page. Functions created via the below macros should be 581 * used to manipulate these flags. 582 * 583 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 584 * allocation time. Cleared when page is fully instantiated. Free 585 * routine checks flag to restore a reservation on error paths. 586 * Synchronization: Examined or modified by code that knows it has 587 * the only reference to page. i.e. After allocation but before use 588 * or when the page is being freed. 589 * HPG_migratable - Set after a newly allocated page is added to the page 590 * cache and/or page tables. Indicates the page is a candidate for 591 * migration. 592 * Synchronization: Initially set after new page allocation with no 593 * locking. When examined and modified during migration processing 594 * (isolate, migrate, putback) the hugetlb_lock is held. 595 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 596 * allocator. Typically used for migration target pages when no pages 597 * are available in the pool. The hugetlb free page path will 598 * immediately free pages with this flag set to the buddy allocator. 599 * Synchronization: Can be set after huge page allocation from buddy when 600 * code knows it has only reference. All other examinations and 601 * modifications require hugetlb_lock. 602 * HPG_freed - Set when page is on the free lists. 603 * Synchronization: hugetlb_lock held for examination and modification. 604 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 605 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 606 * that is not tracked by raw_hwp_page list. 607 */ 608 enum hugetlb_page_flags { 609 HPG_restore_reserve = 0, 610 HPG_migratable, 611 HPG_temporary, 612 HPG_freed, 613 HPG_vmemmap_optimized, 614 HPG_raw_hwp_unreliable, 615 __NR_HPAGEFLAGS, 616 }; 617 618 /* 619 * Macros to create test, set and clear function definitions for 620 * hugetlb specific page flags. 621 */ 622 #ifdef CONFIG_HUGETLB_PAGE 623 #define TESTHPAGEFLAG(uname, flname) \ 624 static __always_inline \ 625 bool folio_test_hugetlb_##flname(struct folio *folio) \ 626 { void *private = &folio->private; \ 627 return test_bit(HPG_##flname, private); \ 628 } \ 629 static inline int HPage##uname(struct page *page) \ 630 { return test_bit(HPG_##flname, &(page->private)); } 631 632 #define SETHPAGEFLAG(uname, flname) \ 633 static __always_inline \ 634 void folio_set_hugetlb_##flname(struct folio *folio) \ 635 { void *private = &folio->private; \ 636 set_bit(HPG_##flname, private); \ 637 } \ 638 static inline void SetHPage##uname(struct page *page) \ 639 { set_bit(HPG_##flname, &(page->private)); } 640 641 #define CLEARHPAGEFLAG(uname, flname) \ 642 static __always_inline \ 643 void folio_clear_hugetlb_##flname(struct folio *folio) \ 644 { void *private = &folio->private; \ 645 clear_bit(HPG_##flname, private); \ 646 } \ 647 static inline void ClearHPage##uname(struct page *page) \ 648 { clear_bit(HPG_##flname, &(page->private)); } 649 #else 650 #define TESTHPAGEFLAG(uname, flname) \ 651 static inline bool \ 652 folio_test_hugetlb_##flname(struct folio *folio) \ 653 { return 0; } \ 654 static inline int HPage##uname(struct page *page) \ 655 { return 0; } 656 657 #define SETHPAGEFLAG(uname, flname) \ 658 static inline void \ 659 folio_set_hugetlb_##flname(struct folio *folio) \ 660 { } \ 661 static inline void SetHPage##uname(struct page *page) \ 662 { } 663 664 #define CLEARHPAGEFLAG(uname, flname) \ 665 static inline void \ 666 folio_clear_hugetlb_##flname(struct folio *folio) \ 667 { } \ 668 static inline void ClearHPage##uname(struct page *page) \ 669 { } 670 #endif 671 672 #define HPAGEFLAG(uname, flname) \ 673 TESTHPAGEFLAG(uname, flname) \ 674 SETHPAGEFLAG(uname, flname) \ 675 CLEARHPAGEFLAG(uname, flname) \ 676 677 /* 678 * Create functions associated with hugetlb page flags 679 */ 680 HPAGEFLAG(RestoreReserve, restore_reserve) 681 HPAGEFLAG(Migratable, migratable) 682 HPAGEFLAG(Temporary, temporary) 683 HPAGEFLAG(Freed, freed) 684 HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 685 HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 686 687 #ifdef CONFIG_HUGETLB_PAGE 688 689 #define HSTATE_NAME_LEN 32 690 /* Defines one hugetlb page size */ 691 struct hstate { 692 struct mutex resize_lock; 693 int next_nid_to_alloc; 694 int next_nid_to_free; 695 unsigned int order; 696 unsigned int demote_order; 697 unsigned long mask; 698 unsigned long max_huge_pages; 699 unsigned long nr_huge_pages; 700 unsigned long free_huge_pages; 701 unsigned long resv_huge_pages; 702 unsigned long surplus_huge_pages; 703 unsigned long nr_overcommit_huge_pages; 704 struct list_head hugepage_activelist; 705 struct list_head hugepage_freelists[MAX_NUMNODES]; 706 unsigned int max_huge_pages_node[MAX_NUMNODES]; 707 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 708 unsigned int free_huge_pages_node[MAX_NUMNODES]; 709 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 710 #ifdef CONFIG_CGROUP_HUGETLB 711 /* cgroup control files */ 712 struct cftype cgroup_files_dfl[8]; 713 struct cftype cgroup_files_legacy[10]; 714 #endif 715 char name[HSTATE_NAME_LEN]; 716 }; 717 718 struct huge_bootmem_page { 719 struct list_head list; 720 struct hstate *hstate; 721 }; 722 723 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); 724 struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 725 unsigned long addr, int avoid_reserve); 726 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 727 nodemask_t *nmask, gfp_t gfp_mask); 728 struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, 729 unsigned long address); 730 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 731 pgoff_t idx); 732 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 733 unsigned long address, struct folio *folio); 734 735 /* arch callback */ 736 int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 737 int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 738 bool __init hugetlb_node_alloc_supported(void); 739 740 void __init hugetlb_add_hstate(unsigned order); 741 bool __init arch_hugetlb_valid_size(unsigned long size); 742 struct hstate *size_to_hstate(unsigned long size); 743 744 #ifndef HUGE_MAX_HSTATE 745 #define HUGE_MAX_HSTATE 1 746 #endif 747 748 extern struct hstate hstates[HUGE_MAX_HSTATE]; 749 extern unsigned int default_hstate_idx; 750 751 #define default_hstate (hstates[default_hstate_idx]) 752 753 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 754 { 755 return folio->_hugetlb_subpool; 756 } 757 758 static inline void hugetlb_set_folio_subpool(struct folio *folio, 759 struct hugepage_subpool *subpool) 760 { 761 folio->_hugetlb_subpool = subpool; 762 } 763 764 static inline struct hstate *hstate_file(struct file *f) 765 { 766 return hstate_inode(file_inode(f)); 767 } 768 769 static inline struct hstate *hstate_sizelog(int page_size_log) 770 { 771 if (!page_size_log) 772 return &default_hstate; 773 774 if (page_size_log < BITS_PER_LONG) 775 return size_to_hstate(1UL << page_size_log); 776 777 return NULL; 778 } 779 780 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 781 { 782 return hstate_file(vma->vm_file); 783 } 784 785 static inline unsigned long huge_page_size(const struct hstate *h) 786 { 787 return (unsigned long)PAGE_SIZE << h->order; 788 } 789 790 extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); 791 792 extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); 793 794 static inline unsigned long huge_page_mask(struct hstate *h) 795 { 796 return h->mask; 797 } 798 799 static inline unsigned int huge_page_order(struct hstate *h) 800 { 801 return h->order; 802 } 803 804 static inline unsigned huge_page_shift(struct hstate *h) 805 { 806 return h->order + PAGE_SHIFT; 807 } 808 809 static inline bool hstate_is_gigantic(struct hstate *h) 810 { 811 return huge_page_order(h) > MAX_ORDER; 812 } 813 814 static inline unsigned int pages_per_huge_page(const struct hstate *h) 815 { 816 return 1 << h->order; 817 } 818 819 static inline unsigned int blocks_per_huge_page(struct hstate *h) 820 { 821 return huge_page_size(h) / 512; 822 } 823 824 #include <asm/hugetlb.h> 825 826 #ifndef is_hugepage_only_range 827 static inline int is_hugepage_only_range(struct mm_struct *mm, 828 unsigned long addr, unsigned long len) 829 { 830 return 0; 831 } 832 #define is_hugepage_only_range is_hugepage_only_range 833 #endif 834 835 #ifndef arch_clear_hugepage_flags 836 static inline void arch_clear_hugepage_flags(struct page *page) { } 837 #define arch_clear_hugepage_flags arch_clear_hugepage_flags 838 #endif 839 840 #ifndef arch_make_huge_pte 841 static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 842 vm_flags_t flags) 843 { 844 return pte_mkhuge(entry); 845 } 846 #endif 847 848 static inline struct hstate *folio_hstate(struct folio *folio) 849 { 850 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 851 return size_to_hstate(folio_size(folio)); 852 } 853 854 static inline struct hstate *page_hstate(struct page *page) 855 { 856 return folio_hstate(page_folio(page)); 857 } 858 859 static inline unsigned hstate_index_to_shift(unsigned index) 860 { 861 return hstates[index].order + PAGE_SHIFT; 862 } 863 864 static inline int hstate_index(struct hstate *h) 865 { 866 return h - hstates; 867 } 868 869 extern int dissolve_free_huge_page(struct page *page); 870 extern int dissolve_free_huge_pages(unsigned long start_pfn, 871 unsigned long end_pfn); 872 873 #ifdef CONFIG_MEMORY_FAILURE 874 extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 875 #else 876 static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 877 { 878 } 879 #endif 880 881 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 882 #ifndef arch_hugetlb_migration_supported 883 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 884 { 885 if ((huge_page_shift(h) == PMD_SHIFT) || 886 (huge_page_shift(h) == PUD_SHIFT) || 887 (huge_page_shift(h) == PGDIR_SHIFT)) 888 return true; 889 else 890 return false; 891 } 892 #endif 893 #else 894 static inline bool arch_hugetlb_migration_supported(struct hstate *h) 895 { 896 return false; 897 } 898 #endif 899 900 static inline bool hugepage_migration_supported(struct hstate *h) 901 { 902 return arch_hugetlb_migration_supported(h); 903 } 904 905 /* 906 * Movability check is different as compared to migration check. 907 * It determines whether or not a huge page should be placed on 908 * movable zone or not. Movability of any huge page should be 909 * required only if huge page size is supported for migration. 910 * There won't be any reason for the huge page to be movable if 911 * it is not migratable to start with. Also the size of the huge 912 * page should be large enough to be placed under a movable zone 913 * and still feasible enough to be migratable. Just the presence 914 * in movable zone does not make the migration feasible. 915 * 916 * So even though large huge page sizes like the gigantic ones 917 * are migratable they should not be movable because its not 918 * feasible to migrate them from movable zone. 919 */ 920 static inline bool hugepage_movable_supported(struct hstate *h) 921 { 922 if (!hugepage_migration_supported(h)) 923 return false; 924 925 if (hstate_is_gigantic(h)) 926 return false; 927 return true; 928 } 929 930 /* Movability of hugepages depends on migration support. */ 931 static inline gfp_t htlb_alloc_mask(struct hstate *h) 932 { 933 if (hugepage_movable_supported(h)) 934 return GFP_HIGHUSER_MOVABLE; 935 else 936 return GFP_HIGHUSER; 937 } 938 939 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 940 { 941 gfp_t modified_mask = htlb_alloc_mask(h); 942 943 /* Some callers might want to enforce node */ 944 modified_mask |= (gfp_mask & __GFP_THISNODE); 945 946 modified_mask |= (gfp_mask & __GFP_NOWARN); 947 948 return modified_mask; 949 } 950 951 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 952 struct mm_struct *mm, pte_t *pte) 953 { 954 if (huge_page_size(h) == PMD_SIZE) 955 return pmd_lockptr(mm, (pmd_t *) pte); 956 VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); 957 return &mm->page_table_lock; 958 } 959 960 #ifndef hugepages_supported 961 /* 962 * Some platform decide whether they support huge pages at boot 963 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 964 * when there is no such support 965 */ 966 #define hugepages_supported() (HPAGE_SHIFT != 0) 967 #endif 968 969 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 970 971 static inline void hugetlb_count_init(struct mm_struct *mm) 972 { 973 atomic_long_set(&mm->hugetlb_usage, 0); 974 } 975 976 static inline void hugetlb_count_add(long l, struct mm_struct *mm) 977 { 978 atomic_long_add(l, &mm->hugetlb_usage); 979 } 980 981 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 982 { 983 atomic_long_sub(l, &mm->hugetlb_usage); 984 } 985 986 #ifndef huge_ptep_modify_prot_start 987 #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 988 static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 989 unsigned long addr, pte_t *ptep) 990 { 991 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 992 } 993 #endif 994 995 #ifndef huge_ptep_modify_prot_commit 996 #define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 997 static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 998 unsigned long addr, pte_t *ptep, 999 pte_t old_pte, pte_t pte) 1000 { 1001 set_huge_pte_at(vma->vm_mm, addr, ptep, pte); 1002 } 1003 #endif 1004 1005 #ifdef CONFIG_NUMA 1006 void hugetlb_register_node(struct node *node); 1007 void hugetlb_unregister_node(struct node *node); 1008 #endif 1009 1010 #else /* CONFIG_HUGETLB_PAGE */ 1011 struct hstate {}; 1012 1013 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1014 { 1015 return NULL; 1016 } 1017 1018 static inline int isolate_or_dissolve_huge_page(struct page *page, 1019 struct list_head *list) 1020 { 1021 return -ENOMEM; 1022 } 1023 1024 static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1025 unsigned long addr, 1026 int avoid_reserve) 1027 { 1028 return NULL; 1029 } 1030 1031 static inline struct folio * 1032 alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1033 nodemask_t *nmask, gfp_t gfp_mask) 1034 { 1035 return NULL; 1036 } 1037 1038 static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, 1039 struct vm_area_struct *vma, 1040 unsigned long address) 1041 { 1042 return NULL; 1043 } 1044 1045 static inline int __alloc_bootmem_huge_page(struct hstate *h) 1046 { 1047 return 0; 1048 } 1049 1050 static inline struct hstate *hstate_file(struct file *f) 1051 { 1052 return NULL; 1053 } 1054 1055 static inline struct hstate *hstate_sizelog(int page_size_log) 1056 { 1057 return NULL; 1058 } 1059 1060 static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1061 { 1062 return NULL; 1063 } 1064 1065 static inline struct hstate *folio_hstate(struct folio *folio) 1066 { 1067 return NULL; 1068 } 1069 1070 static inline struct hstate *page_hstate(struct page *page) 1071 { 1072 return NULL; 1073 } 1074 1075 static inline struct hstate *size_to_hstate(unsigned long size) 1076 { 1077 return NULL; 1078 } 1079 1080 static inline unsigned long huge_page_size(struct hstate *h) 1081 { 1082 return PAGE_SIZE; 1083 } 1084 1085 static inline unsigned long huge_page_mask(struct hstate *h) 1086 { 1087 return PAGE_MASK; 1088 } 1089 1090 static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) 1091 { 1092 return PAGE_SIZE; 1093 } 1094 1095 static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 1096 { 1097 return PAGE_SIZE; 1098 } 1099 1100 static inline unsigned int huge_page_order(struct hstate *h) 1101 { 1102 return 0; 1103 } 1104 1105 static inline unsigned int huge_page_shift(struct hstate *h) 1106 { 1107 return PAGE_SHIFT; 1108 } 1109 1110 static inline bool hstate_is_gigantic(struct hstate *h) 1111 { 1112 return false; 1113 } 1114 1115 static inline unsigned int pages_per_huge_page(struct hstate *h) 1116 { 1117 return 1; 1118 } 1119 1120 static inline unsigned hstate_index_to_shift(unsigned index) 1121 { 1122 return 0; 1123 } 1124 1125 static inline int hstate_index(struct hstate *h) 1126 { 1127 return 0; 1128 } 1129 1130 static inline int dissolve_free_huge_page(struct page *page) 1131 { 1132 return 0; 1133 } 1134 1135 static inline int dissolve_free_huge_pages(unsigned long start_pfn, 1136 unsigned long end_pfn) 1137 { 1138 return 0; 1139 } 1140 1141 static inline bool hugepage_migration_supported(struct hstate *h) 1142 { 1143 return false; 1144 } 1145 1146 static inline bool hugepage_movable_supported(struct hstate *h) 1147 { 1148 return false; 1149 } 1150 1151 static inline gfp_t htlb_alloc_mask(struct hstate *h) 1152 { 1153 return 0; 1154 } 1155 1156 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1157 { 1158 return 0; 1159 } 1160 1161 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1162 struct mm_struct *mm, pte_t *pte) 1163 { 1164 return &mm->page_table_lock; 1165 } 1166 1167 static inline void hugetlb_count_init(struct mm_struct *mm) 1168 { 1169 } 1170 1171 static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1172 { 1173 } 1174 1175 static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1176 { 1177 } 1178 1179 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1180 unsigned long addr, pte_t *ptep) 1181 { 1182 #ifdef CONFIG_MMU 1183 return ptep_get(ptep); 1184 #else 1185 return *ptep; 1186 #endif 1187 } 1188 1189 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1190 pte_t *ptep, pte_t pte) 1191 { 1192 } 1193 1194 static inline void hugetlb_register_node(struct node *node) 1195 { 1196 } 1197 1198 static inline void hugetlb_unregister_node(struct node *node) 1199 { 1200 } 1201 #endif /* CONFIG_HUGETLB_PAGE */ 1202 1203 static inline spinlock_t *huge_pte_lock(struct hstate *h, 1204 struct mm_struct *mm, pte_t *pte) 1205 { 1206 spinlock_t *ptl; 1207 1208 ptl = huge_pte_lockptr(h, mm, pte); 1209 spin_lock(ptl); 1210 return ptl; 1211 } 1212 1213 #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1214 extern void __init hugetlb_cma_reserve(int order); 1215 #else 1216 static inline __init void hugetlb_cma_reserve(int order) 1217 { 1218 } 1219 #endif 1220 1221 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 1222 static inline bool hugetlb_pmd_shared(pte_t *pte) 1223 { 1224 return page_count(virt_to_page(pte)) > 1; 1225 } 1226 #else 1227 static inline bool hugetlb_pmd_shared(pte_t *pte) 1228 { 1229 return false; 1230 } 1231 #endif 1232 1233 bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1234 1235 #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1236 /* 1237 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1238 * implement this. 1239 */ 1240 #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1241 #endif 1242 1243 static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1244 { 1245 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1246 } 1247 1248 /* 1249 * Safe version of huge_pte_offset() to check the locks. See comments 1250 * above huge_pte_offset(). 1251 */ 1252 static inline pte_t * 1253 hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1254 { 1255 #if defined(CONFIG_HUGETLB_PAGE) && \ 1256 defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) 1257 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1258 1259 /* 1260 * If pmd sharing possible, locking needed to safely walk the 1261 * hugetlb pgtables. More information can be found at the comment 1262 * above huge_pte_offset() in the same file. 1263 * 1264 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1265 */ 1266 if (__vma_shareable_lock(vma)) 1267 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1268 !lockdep_is_held( 1269 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1270 #endif 1271 return huge_pte_offset(vma->vm_mm, addr, sz); 1272 } 1273 1274 #endif /* _LINUX_HUGETLB_H */ 1275