1ca5999fdSMike Rapoport /* SPDX-License-Identifier: GPL-2.0 */ 2ca5999fdSMike Rapoport #ifndef _LINUX_PGTABLE_H 3ca5999fdSMike Rapoport #define _LINUX_PGTABLE_H 4ca5999fdSMike Rapoport 5ca5999fdSMike Rapoport #include <linux/pfn.h> 6ca5999fdSMike Rapoport #include <asm/pgtable.h> 7ca5999fdSMike Rapoport 8ca5999fdSMike Rapoport #ifndef __ASSEMBLY__ 9ca5999fdSMike Rapoport #ifdef CONFIG_MMU 10ca5999fdSMike Rapoport 11ca5999fdSMike Rapoport #include <linux/mm_types.h> 12ca5999fdSMike Rapoport #include <linux/bug.h> 13ca5999fdSMike Rapoport #include <linux/errno.h> 14ca5999fdSMike Rapoport #include <asm-generic/pgtable_uffd.h> 15ca5999fdSMike Rapoport 16ca5999fdSMike Rapoport #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ 17ca5999fdSMike Rapoport defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS 18ca5999fdSMike Rapoport #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED 19ca5999fdSMike Rapoport #endif 20ca5999fdSMike Rapoport 21ca5999fdSMike Rapoport /* 22ca5999fdSMike Rapoport * On almost all architectures and configurations, 0 can be used as the 23ca5999fdSMike Rapoport * upper ceiling to free_pgtables(): on many architectures it has the same 24ca5999fdSMike Rapoport * effect as using TASK_SIZE. However, there is one configuration which 25ca5999fdSMike Rapoport * must impose a more careful limit, to avoid freeing kernel pgtables. 26ca5999fdSMike Rapoport */ 27ca5999fdSMike Rapoport #ifndef USER_PGTABLES_CEILING 28ca5999fdSMike Rapoport #define USER_PGTABLES_CEILING 0UL 29ca5999fdSMike Rapoport #endif 30ca5999fdSMike Rapoport 31e05c7b1fSMike Rapoport /* 32974b9b2cSMike Rapoport * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD] 33974b9b2cSMike Rapoport * 34974b9b2cSMike Rapoport * The pXx_index() functions return the index of the entry in the page 35974b9b2cSMike Rapoport * table page which would control the given virtual address 36974b9b2cSMike Rapoport * 37974b9b2cSMike Rapoport * As these functions may be used by the same code for different levels of 38974b9b2cSMike Rapoport * the page table folding, they are always available, regardless of 39974b9b2cSMike Rapoport * CONFIG_PGTABLE_LEVELS value. For the folded levels they simply return 0 40974b9b2cSMike Rapoport * because in such cases PTRS_PER_PxD equals 1. 41974b9b2cSMike Rapoport */ 42974b9b2cSMike Rapoport 43974b9b2cSMike Rapoport static inline unsigned long pte_index(unsigned long address) 44974b9b2cSMike Rapoport { 45974b9b2cSMike Rapoport return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); 46974b9b2cSMike Rapoport } 47974b9b2cSMike Rapoport 48974b9b2cSMike Rapoport #ifndef pmd_index 49974b9b2cSMike Rapoport static inline unsigned long pmd_index(unsigned long address) 50974b9b2cSMike Rapoport { 51974b9b2cSMike Rapoport return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); 52974b9b2cSMike Rapoport } 53974b9b2cSMike Rapoport #define pmd_index pmd_index 54974b9b2cSMike Rapoport #endif 55974b9b2cSMike Rapoport 56974b9b2cSMike Rapoport #ifndef pud_index 57974b9b2cSMike Rapoport static inline unsigned long pud_index(unsigned long address) 58974b9b2cSMike Rapoport { 59974b9b2cSMike Rapoport return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); 60974b9b2cSMike Rapoport } 61974b9b2cSMike Rapoport #define pud_index pud_index 62974b9b2cSMike Rapoport #endif 63974b9b2cSMike Rapoport 64974b9b2cSMike Rapoport #ifndef pgd_index 65974b9b2cSMike Rapoport /* Must be a compile-time constant, so implement it as a macro */ 66974b9b2cSMike Rapoport #define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) 67974b9b2cSMike Rapoport #endif 68974b9b2cSMike Rapoport 69974b9b2cSMike Rapoport #ifndef pte_offset_kernel 70974b9b2cSMike Rapoport static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) 71974b9b2cSMike Rapoport { 72974b9b2cSMike Rapoport return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); 73974b9b2cSMike Rapoport } 74974b9b2cSMike Rapoport #define pte_offset_kernel pte_offset_kernel 75974b9b2cSMike Rapoport #endif 76974b9b2cSMike Rapoport 77974b9b2cSMike Rapoport #if defined(CONFIG_HIGHPTE) 78974b9b2cSMike Rapoport #define pte_offset_map(dir, address) \ 79974b9b2cSMike Rapoport ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \ 80974b9b2cSMike Rapoport pte_index((address))) 81974b9b2cSMike Rapoport #define pte_unmap(pte) kunmap_atomic((pte)) 82974b9b2cSMike Rapoport #else 83974b9b2cSMike Rapoport #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) 84974b9b2cSMike Rapoport #define pte_unmap(pte) ((void)(pte)) /* NOP */ 85974b9b2cSMike Rapoport #endif 86974b9b2cSMike Rapoport 87974b9b2cSMike Rapoport /* Find an entry in the second-level page table.. */ 88974b9b2cSMike Rapoport #ifndef pmd_offset 89974b9b2cSMike Rapoport static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) 90974b9b2cSMike Rapoport { 91974b9b2cSMike Rapoport return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); 92974b9b2cSMike Rapoport } 93974b9b2cSMike Rapoport #define pmd_offset pmd_offset 94974b9b2cSMike Rapoport #endif 95974b9b2cSMike Rapoport 96974b9b2cSMike Rapoport #ifndef pud_offset 97974b9b2cSMike Rapoport static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) 98974b9b2cSMike Rapoport { 99974b9b2cSMike Rapoport return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); 100974b9b2cSMike Rapoport } 101974b9b2cSMike Rapoport #define pud_offset pud_offset 102974b9b2cSMike Rapoport #endif 103974b9b2cSMike Rapoport 104974b9b2cSMike Rapoport static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address) 105974b9b2cSMike Rapoport { 106974b9b2cSMike Rapoport return (pgd + pgd_index(address)); 107974b9b2cSMike Rapoport }; 108974b9b2cSMike Rapoport 109974b9b2cSMike Rapoport /* 110974b9b2cSMike Rapoport * a shortcut to get a pgd_t in a given mm 111974b9b2cSMike Rapoport */ 112974b9b2cSMike Rapoport #ifndef pgd_offset 113974b9b2cSMike Rapoport #define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) 114974b9b2cSMike Rapoport #endif 115974b9b2cSMike Rapoport 116974b9b2cSMike Rapoport /* 117974b9b2cSMike Rapoport * a shortcut which implies the use of the kernel's pgd, instead 118974b9b2cSMike Rapoport * of a process's 119974b9b2cSMike Rapoport */ 120bd05220cSJessica Clarke #ifndef pgd_offset_k 121974b9b2cSMike Rapoport #define pgd_offset_k(address) pgd_offset(&init_mm, (address)) 122bd05220cSJessica Clarke #endif 123974b9b2cSMike Rapoport 124974b9b2cSMike Rapoport /* 125e05c7b1fSMike Rapoport * In many cases it is known that a virtual address is mapped at PMD or PTE 126e05c7b1fSMike Rapoport * level, so instead of traversing all the page table levels, we can get a 127e05c7b1fSMike Rapoport * pointer to the PMD entry in user or kernel page table or translate a virtual 128e05c7b1fSMike Rapoport * address to the pointer in the PTE in the kernel page tables with simple 129e05c7b1fSMike Rapoport * helpers. 130e05c7b1fSMike Rapoport */ 131e05c7b1fSMike Rapoport static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va) 132e05c7b1fSMike Rapoport { 133e05c7b1fSMike Rapoport return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), va); 134e05c7b1fSMike Rapoport } 135e05c7b1fSMike Rapoport 136e05c7b1fSMike Rapoport static inline pmd_t *pmd_off_k(unsigned long va) 137e05c7b1fSMike Rapoport { 138e05c7b1fSMike Rapoport return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); 139e05c7b1fSMike Rapoport } 140e05c7b1fSMike Rapoport 141e05c7b1fSMike Rapoport static inline pte_t *virt_to_kpte(unsigned long vaddr) 142e05c7b1fSMike Rapoport { 143e05c7b1fSMike Rapoport pmd_t *pmd = pmd_off_k(vaddr); 144e05c7b1fSMike Rapoport 145e05c7b1fSMike Rapoport return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); 146e05c7b1fSMike Rapoport } 147e05c7b1fSMike Rapoport 148ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 149ca5999fdSMike Rapoport extern int ptep_set_access_flags(struct vm_area_struct *vma, 150ca5999fdSMike Rapoport unsigned long address, pte_t *ptep, 151ca5999fdSMike Rapoport pte_t entry, int dirty); 152ca5999fdSMike Rapoport #endif 153ca5999fdSMike Rapoport 154ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 155ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 156ca5999fdSMike Rapoport extern int pmdp_set_access_flags(struct vm_area_struct *vma, 157ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp, 158ca5999fdSMike Rapoport pmd_t entry, int dirty); 159ca5999fdSMike Rapoport extern int pudp_set_access_flags(struct vm_area_struct *vma, 160ca5999fdSMike Rapoport unsigned long address, pud_t *pudp, 161ca5999fdSMike Rapoport pud_t entry, int dirty); 162ca5999fdSMike Rapoport #else 163ca5999fdSMike Rapoport static inline int pmdp_set_access_flags(struct vm_area_struct *vma, 164ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp, 165ca5999fdSMike Rapoport pmd_t entry, int dirty) 166ca5999fdSMike Rapoport { 167ca5999fdSMike Rapoport BUILD_BUG(); 168ca5999fdSMike Rapoport return 0; 169ca5999fdSMike Rapoport } 170ca5999fdSMike Rapoport static inline int pudp_set_access_flags(struct vm_area_struct *vma, 171ca5999fdSMike Rapoport unsigned long address, pud_t *pudp, 172ca5999fdSMike Rapoport pud_t entry, int dirty) 173ca5999fdSMike Rapoport { 174ca5999fdSMike Rapoport BUILD_BUG(); 175ca5999fdSMike Rapoport return 0; 176ca5999fdSMike Rapoport } 177ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 178ca5999fdSMike Rapoport #endif 179ca5999fdSMike Rapoport 180ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 181ca5999fdSMike Rapoport static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 182ca5999fdSMike Rapoport unsigned long address, 183ca5999fdSMike Rapoport pte_t *ptep) 184ca5999fdSMike Rapoport { 185ca5999fdSMike Rapoport pte_t pte = *ptep; 186ca5999fdSMike Rapoport int r = 1; 187ca5999fdSMike Rapoport if (!pte_young(pte)) 188ca5999fdSMike Rapoport r = 0; 189ca5999fdSMike Rapoport else 190ca5999fdSMike Rapoport set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte)); 191ca5999fdSMike Rapoport return r; 192ca5999fdSMike Rapoport } 193ca5999fdSMike Rapoport #endif 194ca5999fdSMike Rapoport 195ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 196ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 197ca5999fdSMike Rapoport static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 198ca5999fdSMike Rapoport unsigned long address, 199ca5999fdSMike Rapoport pmd_t *pmdp) 200ca5999fdSMike Rapoport { 201ca5999fdSMike Rapoport pmd_t pmd = *pmdp; 202ca5999fdSMike Rapoport int r = 1; 203ca5999fdSMike Rapoport if (!pmd_young(pmd)) 204ca5999fdSMike Rapoport r = 0; 205ca5999fdSMike Rapoport else 206ca5999fdSMike Rapoport set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); 207ca5999fdSMike Rapoport return r; 208ca5999fdSMike Rapoport } 209ca5999fdSMike Rapoport #else 210ca5999fdSMike Rapoport static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 211ca5999fdSMike Rapoport unsigned long address, 212ca5999fdSMike Rapoport pmd_t *pmdp) 213ca5999fdSMike Rapoport { 214ca5999fdSMike Rapoport BUILD_BUG(); 215ca5999fdSMike Rapoport return 0; 216ca5999fdSMike Rapoport } 217ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 218ca5999fdSMike Rapoport #endif 219ca5999fdSMike Rapoport 220ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 221ca5999fdSMike Rapoport int ptep_clear_flush_young(struct vm_area_struct *vma, 222ca5999fdSMike Rapoport unsigned long address, pte_t *ptep); 223ca5999fdSMike Rapoport #endif 224ca5999fdSMike Rapoport 225ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH 226ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 227ca5999fdSMike Rapoport extern int pmdp_clear_flush_young(struct vm_area_struct *vma, 228ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp); 229ca5999fdSMike Rapoport #else 230ca5999fdSMike Rapoport /* 231ca5999fdSMike Rapoport * Despite relevant to THP only, this API is called from generic rmap code 232ca5999fdSMike Rapoport * under PageTransHuge(), hence needs a dummy implementation for !THP 233ca5999fdSMike Rapoport */ 234ca5999fdSMike Rapoport static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, 235ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp) 236ca5999fdSMike Rapoport { 237ca5999fdSMike Rapoport BUILD_BUG(); 238ca5999fdSMike Rapoport return 0; 239ca5999fdSMike Rapoport } 240ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 241ca5999fdSMike Rapoport #endif 242ca5999fdSMike Rapoport 243ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR 244ca5999fdSMike Rapoport static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 245ca5999fdSMike Rapoport unsigned long address, 246ca5999fdSMike Rapoport pte_t *ptep) 247ca5999fdSMike Rapoport { 248ca5999fdSMike Rapoport pte_t pte = *ptep; 249ca5999fdSMike Rapoport pte_clear(mm, address, ptep); 250ca5999fdSMike Rapoport return pte; 251ca5999fdSMike Rapoport } 252ca5999fdSMike Rapoport #endif 253ca5999fdSMike Rapoport 254481e980aSChristophe Leroy #ifndef __HAVE_ARCH_PTEP_GET 255481e980aSChristophe Leroy static inline pte_t ptep_get(pte_t *ptep) 256481e980aSChristophe Leroy { 257481e980aSChristophe Leroy return READ_ONCE(*ptep); 258481e980aSChristophe Leroy } 259481e980aSChristophe Leroy #endif 260481e980aSChristophe Leroy 2612a4a06daSPeter Zijlstra #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH 2622a4a06daSPeter Zijlstra /* 2632a4a06daSPeter Zijlstra * WARNING: only to be used in the get_user_pages_fast() implementation. 2642a4a06daSPeter Zijlstra * 2652a4a06daSPeter Zijlstra * With get_user_pages_fast(), we walk down the pagetables without taking any 2662a4a06daSPeter Zijlstra * locks. For this we would like to load the pointers atomically, but sometimes 2672a4a06daSPeter Zijlstra * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What 2682a4a06daSPeter Zijlstra * we do have is the guarantee that a PTE will only either go from not present 2692a4a06daSPeter Zijlstra * to present, or present to not present or both -- it will not switch to a 2702a4a06daSPeter Zijlstra * completely different present page without a TLB flush in between; something 2712a4a06daSPeter Zijlstra * that we are blocking by holding interrupts off. 2722a4a06daSPeter Zijlstra * 2732a4a06daSPeter Zijlstra * Setting ptes from not present to present goes: 2742a4a06daSPeter Zijlstra * 2752a4a06daSPeter Zijlstra * ptep->pte_high = h; 2762a4a06daSPeter Zijlstra * smp_wmb(); 2772a4a06daSPeter Zijlstra * ptep->pte_low = l; 2782a4a06daSPeter Zijlstra * 2792a4a06daSPeter Zijlstra * And present to not present goes: 2802a4a06daSPeter Zijlstra * 2812a4a06daSPeter Zijlstra * ptep->pte_low = 0; 2822a4a06daSPeter Zijlstra * smp_wmb(); 2832a4a06daSPeter Zijlstra * ptep->pte_high = 0; 2842a4a06daSPeter Zijlstra * 2852a4a06daSPeter Zijlstra * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'. 2862a4a06daSPeter Zijlstra * We load pte_high *after* loading pte_low, which ensures we don't see an older 2872a4a06daSPeter Zijlstra * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't 2882a4a06daSPeter Zijlstra * picked up a changed pte high. We might have gotten rubbish values from 2892a4a06daSPeter Zijlstra * pte_low and pte_high, but we are guaranteed that pte_low will not have the 2902a4a06daSPeter Zijlstra * present bit set *unless* it is 'l'. Because get_user_pages_fast() only 2912a4a06daSPeter Zijlstra * operates on present ptes we're safe. 2922a4a06daSPeter Zijlstra */ 2932a4a06daSPeter Zijlstra static inline pte_t ptep_get_lockless(pte_t *ptep) 2942a4a06daSPeter Zijlstra { 2952a4a06daSPeter Zijlstra pte_t pte; 2962a4a06daSPeter Zijlstra 2972a4a06daSPeter Zijlstra do { 2982a4a06daSPeter Zijlstra pte.pte_low = ptep->pte_low; 2992a4a06daSPeter Zijlstra smp_rmb(); 3002a4a06daSPeter Zijlstra pte.pte_high = ptep->pte_high; 3012a4a06daSPeter Zijlstra smp_rmb(); 3022a4a06daSPeter Zijlstra } while (unlikely(pte.pte_low != ptep->pte_low)); 3032a4a06daSPeter Zijlstra 3042a4a06daSPeter Zijlstra return pte; 3052a4a06daSPeter Zijlstra } 3062a4a06daSPeter Zijlstra #else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ 3072a4a06daSPeter Zijlstra /* 3082a4a06daSPeter Zijlstra * We require that the PTE can be read atomically. 3092a4a06daSPeter Zijlstra */ 3102a4a06daSPeter Zijlstra static inline pte_t ptep_get_lockless(pte_t *ptep) 3112a4a06daSPeter Zijlstra { 3122a4a06daSPeter Zijlstra return ptep_get(ptep); 3132a4a06daSPeter Zijlstra } 3142a4a06daSPeter Zijlstra #endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ 3152a4a06daSPeter Zijlstra 316ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 317ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR 318ca5999fdSMike Rapoport static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, 319ca5999fdSMike Rapoport unsigned long address, 320ca5999fdSMike Rapoport pmd_t *pmdp) 321ca5999fdSMike Rapoport { 322ca5999fdSMike Rapoport pmd_t pmd = *pmdp; 323ca5999fdSMike Rapoport pmd_clear(pmdp); 324ca5999fdSMike Rapoport return pmd; 325ca5999fdSMike Rapoport } 326ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */ 327ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR 328ca5999fdSMike Rapoport static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, 329ca5999fdSMike Rapoport unsigned long address, 330ca5999fdSMike Rapoport pud_t *pudp) 331ca5999fdSMike Rapoport { 332ca5999fdSMike Rapoport pud_t pud = *pudp; 333ca5999fdSMike Rapoport 334ca5999fdSMike Rapoport pud_clear(pudp); 335ca5999fdSMike Rapoport return pud; 336ca5999fdSMike Rapoport } 337ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */ 338ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 339ca5999fdSMike Rapoport 340ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 341ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL 342ca5999fdSMike Rapoport static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, 343ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp, 344ca5999fdSMike Rapoport int full) 345ca5999fdSMike Rapoport { 346ca5999fdSMike Rapoport return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); 347ca5999fdSMike Rapoport } 348ca5999fdSMike Rapoport #endif 349ca5999fdSMike Rapoport 350ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL 351ca5999fdSMike Rapoport static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, 352ca5999fdSMike Rapoport unsigned long address, pud_t *pudp, 353ca5999fdSMike Rapoport int full) 354ca5999fdSMike Rapoport { 355ca5999fdSMike Rapoport return pudp_huge_get_and_clear(mm, address, pudp); 356ca5999fdSMike Rapoport } 357ca5999fdSMike Rapoport #endif 358ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 359ca5999fdSMike Rapoport 360ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 361ca5999fdSMike Rapoport static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 362ca5999fdSMike Rapoport unsigned long address, pte_t *ptep, 363ca5999fdSMike Rapoport int full) 364ca5999fdSMike Rapoport { 365ca5999fdSMike Rapoport pte_t pte; 366ca5999fdSMike Rapoport pte = ptep_get_and_clear(mm, address, ptep); 367ca5999fdSMike Rapoport return pte; 368ca5999fdSMike Rapoport } 369ca5999fdSMike Rapoport #endif 370ca5999fdSMike Rapoport 371ca5999fdSMike Rapoport 372ca5999fdSMike Rapoport /* 373ca5999fdSMike Rapoport * If two threads concurrently fault at the same page, the thread that 374ca5999fdSMike Rapoport * won the race updates the PTE and its local TLB/Cache. The other thread 375ca5999fdSMike Rapoport * gives up, simply does nothing, and continues; on architectures where 376ca5999fdSMike Rapoport * software can update TLB, local TLB can be updated here to avoid next page 377ca5999fdSMike Rapoport * fault. This function updates TLB only, do nothing with cache or others. 378ca5999fdSMike Rapoport * It is the difference with function update_mmu_cache. 379ca5999fdSMike Rapoport */ 380ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_UPDATE_MMU_TLB 381ca5999fdSMike Rapoport static inline void update_mmu_tlb(struct vm_area_struct *vma, 382ca5999fdSMike Rapoport unsigned long address, pte_t *ptep) 383ca5999fdSMike Rapoport { 384ca5999fdSMike Rapoport } 385ca5999fdSMike Rapoport #define __HAVE_ARCH_UPDATE_MMU_TLB 386ca5999fdSMike Rapoport #endif 387ca5999fdSMike Rapoport 388ca5999fdSMike Rapoport /* 389ca5999fdSMike Rapoport * Some architectures may be able to avoid expensive synchronization 390ca5999fdSMike Rapoport * primitives when modifications are made to PTE's which are already 391ca5999fdSMike Rapoport * not present, or in the process of an address space destruction. 392ca5999fdSMike Rapoport */ 393ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL 394ca5999fdSMike Rapoport static inline void pte_clear_not_present_full(struct mm_struct *mm, 395ca5999fdSMike Rapoport unsigned long address, 396ca5999fdSMike Rapoport pte_t *ptep, 397ca5999fdSMike Rapoport int full) 398ca5999fdSMike Rapoport { 399ca5999fdSMike Rapoport pte_clear(mm, address, ptep); 400ca5999fdSMike Rapoport } 401ca5999fdSMike Rapoport #endif 402ca5999fdSMike Rapoport 403ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH 404ca5999fdSMike Rapoport extern pte_t ptep_clear_flush(struct vm_area_struct *vma, 405ca5999fdSMike Rapoport unsigned long address, 406ca5999fdSMike Rapoport pte_t *ptep); 407ca5999fdSMike Rapoport #endif 408ca5999fdSMike Rapoport 409ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH 410ca5999fdSMike Rapoport extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, 411ca5999fdSMike Rapoport unsigned long address, 412ca5999fdSMike Rapoport pmd_t *pmdp); 413ca5999fdSMike Rapoport extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, 414ca5999fdSMike Rapoport unsigned long address, 415ca5999fdSMike Rapoport pud_t *pudp); 416ca5999fdSMike Rapoport #endif 417ca5999fdSMike Rapoport 418ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT 419ca5999fdSMike Rapoport struct mm_struct; 420ca5999fdSMike Rapoport static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) 421ca5999fdSMike Rapoport { 422ca5999fdSMike Rapoport pte_t old_pte = *ptep; 423ca5999fdSMike Rapoport set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); 424ca5999fdSMike Rapoport } 425ca5999fdSMike Rapoport #endif 426ca5999fdSMike Rapoport 427ca5999fdSMike Rapoport /* 428ca5999fdSMike Rapoport * On some architectures hardware does not set page access bit when accessing 429ca5999fdSMike Rapoport * memory page, it is responsibilty of software setting this bit. It brings 430ca5999fdSMike Rapoport * out extra page fault penalty to track page access bit. For optimization page 431ca5999fdSMike Rapoport * access bit can be set during all page fault flow on these arches. 432ca5999fdSMike Rapoport * To be differentiate with macro pte_mkyoung, this macro is used on platforms 433ca5999fdSMike Rapoport * where software maintains page access bit. 434ca5999fdSMike Rapoport */ 435ca5999fdSMike Rapoport #ifndef pte_savedwrite 436ca5999fdSMike Rapoport #define pte_savedwrite pte_write 437ca5999fdSMike Rapoport #endif 438ca5999fdSMike Rapoport 439ca5999fdSMike Rapoport #ifndef pte_mk_savedwrite 440ca5999fdSMike Rapoport #define pte_mk_savedwrite pte_mkwrite 441ca5999fdSMike Rapoport #endif 442ca5999fdSMike Rapoport 443ca5999fdSMike Rapoport #ifndef pte_clear_savedwrite 444ca5999fdSMike Rapoport #define pte_clear_savedwrite pte_wrprotect 445ca5999fdSMike Rapoport #endif 446ca5999fdSMike Rapoport 447ca5999fdSMike Rapoport #ifndef pmd_savedwrite 448ca5999fdSMike Rapoport #define pmd_savedwrite pmd_write 449ca5999fdSMike Rapoport #endif 450ca5999fdSMike Rapoport 451ca5999fdSMike Rapoport #ifndef pmd_mk_savedwrite 452ca5999fdSMike Rapoport #define pmd_mk_savedwrite pmd_mkwrite 453ca5999fdSMike Rapoport #endif 454ca5999fdSMike Rapoport 455ca5999fdSMike Rapoport #ifndef pmd_clear_savedwrite 456ca5999fdSMike Rapoport #define pmd_clear_savedwrite pmd_wrprotect 457ca5999fdSMike Rapoport #endif 458ca5999fdSMike Rapoport 459ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT 460ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 461ca5999fdSMike Rapoport static inline void pmdp_set_wrprotect(struct mm_struct *mm, 462ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp) 463ca5999fdSMike Rapoport { 464ca5999fdSMike Rapoport pmd_t old_pmd = *pmdp; 465ca5999fdSMike Rapoport set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); 466ca5999fdSMike Rapoport } 467ca5999fdSMike Rapoport #else 468ca5999fdSMike Rapoport static inline void pmdp_set_wrprotect(struct mm_struct *mm, 469ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp) 470ca5999fdSMike Rapoport { 471ca5999fdSMike Rapoport BUILD_BUG(); 472ca5999fdSMike Rapoport } 473ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 474ca5999fdSMike Rapoport #endif 475ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT 476ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 477ca5999fdSMike Rapoport static inline void pudp_set_wrprotect(struct mm_struct *mm, 478ca5999fdSMike Rapoport unsigned long address, pud_t *pudp) 479ca5999fdSMike Rapoport { 480ca5999fdSMike Rapoport pud_t old_pud = *pudp; 481ca5999fdSMike Rapoport 482ca5999fdSMike Rapoport set_pud_at(mm, address, pudp, pud_wrprotect(old_pud)); 483ca5999fdSMike Rapoport } 484ca5999fdSMike Rapoport #else 485ca5999fdSMike Rapoport static inline void pudp_set_wrprotect(struct mm_struct *mm, 486ca5999fdSMike Rapoport unsigned long address, pud_t *pudp) 487ca5999fdSMike Rapoport { 488ca5999fdSMike Rapoport BUILD_BUG(); 489ca5999fdSMike Rapoport } 490ca5999fdSMike Rapoport #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 491ca5999fdSMike Rapoport #endif 492ca5999fdSMike Rapoport 493ca5999fdSMike Rapoport #ifndef pmdp_collapse_flush 494ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 495ca5999fdSMike Rapoport extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, 496ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp); 497ca5999fdSMike Rapoport #else 498ca5999fdSMike Rapoport static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, 499ca5999fdSMike Rapoport unsigned long address, 500ca5999fdSMike Rapoport pmd_t *pmdp) 501ca5999fdSMike Rapoport { 502ca5999fdSMike Rapoport BUILD_BUG(); 503ca5999fdSMike Rapoport return *pmdp; 504ca5999fdSMike Rapoport } 505ca5999fdSMike Rapoport #define pmdp_collapse_flush pmdp_collapse_flush 506ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 507ca5999fdSMike Rapoport #endif 508ca5999fdSMike Rapoport 509ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT 510ca5999fdSMike Rapoport extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 511ca5999fdSMike Rapoport pgtable_t pgtable); 512ca5999fdSMike Rapoport #endif 513ca5999fdSMike Rapoport 514ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW 515ca5999fdSMike Rapoport extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); 516ca5999fdSMike Rapoport #endif 517ca5999fdSMike Rapoport 518ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 519ca5999fdSMike Rapoport /* 520ca5999fdSMike Rapoport * This is an implementation of pmdp_establish() that is only suitable for an 521ca5999fdSMike Rapoport * architecture that doesn't have hardware dirty/accessed bits. In this case we 522ca5999fdSMike Rapoport * can't race with CPU which sets these bits and non-atomic aproach is fine. 523ca5999fdSMike Rapoport */ 524ca5999fdSMike Rapoport static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma, 525ca5999fdSMike Rapoport unsigned long address, pmd_t *pmdp, pmd_t pmd) 526ca5999fdSMike Rapoport { 527ca5999fdSMike Rapoport pmd_t old_pmd = *pmdp; 528ca5999fdSMike Rapoport set_pmd_at(vma->vm_mm, address, pmdp, pmd); 529ca5999fdSMike Rapoport return old_pmd; 530ca5999fdSMike Rapoport } 531ca5999fdSMike Rapoport #endif 532ca5999fdSMike Rapoport 533ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_INVALIDATE 534ca5999fdSMike Rapoport extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 535ca5999fdSMike Rapoport pmd_t *pmdp); 536ca5999fdSMike Rapoport #endif 537ca5999fdSMike Rapoport 538ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_SAME 539ca5999fdSMike Rapoport static inline int pte_same(pte_t pte_a, pte_t pte_b) 540ca5999fdSMike Rapoport { 541ca5999fdSMike Rapoport return pte_val(pte_a) == pte_val(pte_b); 542ca5999fdSMike Rapoport } 543ca5999fdSMike Rapoport #endif 544ca5999fdSMike Rapoport 545ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_UNUSED 546ca5999fdSMike Rapoport /* 547ca5999fdSMike Rapoport * Some architectures provide facilities to virtualization guests 548ca5999fdSMike Rapoport * so that they can flag allocated pages as unused. This allows the 549ca5999fdSMike Rapoport * host to transparently reclaim unused pages. This function returns 550ca5999fdSMike Rapoport * whether the pte's page is unused. 551ca5999fdSMike Rapoport */ 552ca5999fdSMike Rapoport static inline int pte_unused(pte_t pte) 553ca5999fdSMike Rapoport { 554ca5999fdSMike Rapoport return 0; 555ca5999fdSMike Rapoport } 556ca5999fdSMike Rapoport #endif 557ca5999fdSMike Rapoport 558ca5999fdSMike Rapoport #ifndef pte_access_permitted 559ca5999fdSMike Rapoport #define pte_access_permitted(pte, write) \ 560ca5999fdSMike Rapoport (pte_present(pte) && (!(write) || pte_write(pte))) 561ca5999fdSMike Rapoport #endif 562ca5999fdSMike Rapoport 563ca5999fdSMike Rapoport #ifndef pmd_access_permitted 564ca5999fdSMike Rapoport #define pmd_access_permitted(pmd, write) \ 565ca5999fdSMike Rapoport (pmd_present(pmd) && (!(write) || pmd_write(pmd))) 566ca5999fdSMike Rapoport #endif 567ca5999fdSMike Rapoport 568ca5999fdSMike Rapoport #ifndef pud_access_permitted 569ca5999fdSMike Rapoport #define pud_access_permitted(pud, write) \ 570ca5999fdSMike Rapoport (pud_present(pud) && (!(write) || pud_write(pud))) 571ca5999fdSMike Rapoport #endif 572ca5999fdSMike Rapoport 573ca5999fdSMike Rapoport #ifndef p4d_access_permitted 574ca5999fdSMike Rapoport #define p4d_access_permitted(p4d, write) \ 575ca5999fdSMike Rapoport (p4d_present(p4d) && (!(write) || p4d_write(p4d))) 576ca5999fdSMike Rapoport #endif 577ca5999fdSMike Rapoport 578ca5999fdSMike Rapoport #ifndef pgd_access_permitted 579ca5999fdSMike Rapoport #define pgd_access_permitted(pgd, write) \ 580ca5999fdSMike Rapoport (pgd_present(pgd) && (!(write) || pgd_write(pgd))) 581ca5999fdSMike Rapoport #endif 582ca5999fdSMike Rapoport 583ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMD_SAME 584ca5999fdSMike Rapoport static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 585ca5999fdSMike Rapoport { 586ca5999fdSMike Rapoport return pmd_val(pmd_a) == pmd_val(pmd_b); 587ca5999fdSMike Rapoport } 588ca5999fdSMike Rapoport 589ca5999fdSMike Rapoport static inline int pud_same(pud_t pud_a, pud_t pud_b) 590ca5999fdSMike Rapoport { 591ca5999fdSMike Rapoport return pud_val(pud_a) == pud_val(pud_b); 592ca5999fdSMike Rapoport } 593ca5999fdSMike Rapoport #endif 594ca5999fdSMike Rapoport 595ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_P4D_SAME 596ca5999fdSMike Rapoport static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b) 597ca5999fdSMike Rapoport { 598ca5999fdSMike Rapoport return p4d_val(p4d_a) == p4d_val(p4d_b); 599ca5999fdSMike Rapoport } 600ca5999fdSMike Rapoport #endif 601ca5999fdSMike Rapoport 602ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGD_SAME 603ca5999fdSMike Rapoport static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) 604ca5999fdSMike Rapoport { 605ca5999fdSMike Rapoport return pgd_val(pgd_a) == pgd_val(pgd_b); 606ca5999fdSMike Rapoport } 607ca5999fdSMike Rapoport #endif 608ca5999fdSMike Rapoport 609ca5999fdSMike Rapoport /* 610ca5999fdSMike Rapoport * Use set_p*_safe(), and elide TLB flushing, when confident that *no* 611ca5999fdSMike Rapoport * TLB flush will be required as a result of the "set". For example, use 612ca5999fdSMike Rapoport * in scenarios where it is known ahead of time that the routine is 613ca5999fdSMike Rapoport * setting non-present entries, or re-setting an existing entry to the 614ca5999fdSMike Rapoport * same value. Otherwise, use the typical "set" helpers and flush the 615ca5999fdSMike Rapoport * TLB. 616ca5999fdSMike Rapoport */ 617ca5999fdSMike Rapoport #define set_pte_safe(ptep, pte) \ 618ca5999fdSMike Rapoport ({ \ 619ca5999fdSMike Rapoport WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \ 620ca5999fdSMike Rapoport set_pte(ptep, pte); \ 621ca5999fdSMike Rapoport }) 622ca5999fdSMike Rapoport 623ca5999fdSMike Rapoport #define set_pmd_safe(pmdp, pmd) \ 624ca5999fdSMike Rapoport ({ \ 625ca5999fdSMike Rapoport WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \ 626ca5999fdSMike Rapoport set_pmd(pmdp, pmd); \ 627ca5999fdSMike Rapoport }) 628ca5999fdSMike Rapoport 629ca5999fdSMike Rapoport #define set_pud_safe(pudp, pud) \ 630ca5999fdSMike Rapoport ({ \ 631ca5999fdSMike Rapoport WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \ 632ca5999fdSMike Rapoport set_pud(pudp, pud); \ 633ca5999fdSMike Rapoport }) 634ca5999fdSMike Rapoport 635ca5999fdSMike Rapoport #define set_p4d_safe(p4dp, p4d) \ 636ca5999fdSMike Rapoport ({ \ 637ca5999fdSMike Rapoport WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ 638ca5999fdSMike Rapoport set_p4d(p4dp, p4d); \ 639ca5999fdSMike Rapoport }) 640ca5999fdSMike Rapoport 641ca5999fdSMike Rapoport #define set_pgd_safe(pgdp, pgd) \ 642ca5999fdSMike Rapoport ({ \ 643ca5999fdSMike Rapoport WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ 644ca5999fdSMike Rapoport set_pgd(pgdp, pgd); \ 645ca5999fdSMike Rapoport }) 646ca5999fdSMike Rapoport 647ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_DO_SWAP_PAGE 648ca5999fdSMike Rapoport /* 649ca5999fdSMike Rapoport * Some architectures support metadata associated with a page. When a 650ca5999fdSMike Rapoport * page is being swapped out, this metadata must be saved so it can be 651ca5999fdSMike Rapoport * restored when the page is swapped back in. SPARC M7 and newer 652ca5999fdSMike Rapoport * processors support an ADI (Application Data Integrity) tag for the 653ca5999fdSMike Rapoport * page as metadata for the page. arch_do_swap_page() can restore this 654ca5999fdSMike Rapoport * metadata when a page is swapped back in. 655ca5999fdSMike Rapoport */ 656ca5999fdSMike Rapoport static inline void arch_do_swap_page(struct mm_struct *mm, 657ca5999fdSMike Rapoport struct vm_area_struct *vma, 658ca5999fdSMike Rapoport unsigned long addr, 659ca5999fdSMike Rapoport pte_t pte, pte_t oldpte) 660ca5999fdSMike Rapoport { 661ca5999fdSMike Rapoport 662ca5999fdSMike Rapoport } 663ca5999fdSMike Rapoport #endif 664ca5999fdSMike Rapoport 665ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_UNMAP_ONE 666ca5999fdSMike Rapoport /* 667ca5999fdSMike Rapoport * Some architectures support metadata associated with a page. When a 668ca5999fdSMike Rapoport * page is being swapped out, this metadata must be saved so it can be 669ca5999fdSMike Rapoport * restored when the page is swapped back in. SPARC M7 and newer 670ca5999fdSMike Rapoport * processors support an ADI (Application Data Integrity) tag for the 671ca5999fdSMike Rapoport * page as metadata for the page. arch_unmap_one() can save this 672ca5999fdSMike Rapoport * metadata on a swap-out of a page. 673ca5999fdSMike Rapoport */ 674ca5999fdSMike Rapoport static inline int arch_unmap_one(struct mm_struct *mm, 675ca5999fdSMike Rapoport struct vm_area_struct *vma, 676ca5999fdSMike Rapoport unsigned long addr, 677ca5999fdSMike Rapoport pte_t orig_pte) 678ca5999fdSMike Rapoport { 679ca5999fdSMike Rapoport return 0; 680ca5999fdSMike Rapoport } 681ca5999fdSMike Rapoport #endif 682ca5999fdSMike Rapoport 6838a84802eSSteven Price /* 6848a84802eSSteven Price * Allow architectures to preserve additional metadata associated with 6858a84802eSSteven Price * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function 6868a84802eSSteven Price * prototypes must be defined in the arch-specific asm/pgtable.h file. 6878a84802eSSteven Price */ 6888a84802eSSteven Price #ifndef __HAVE_ARCH_PREPARE_TO_SWAP 6898a84802eSSteven Price static inline int arch_prepare_to_swap(struct page *page) 6908a84802eSSteven Price { 6918a84802eSSteven Price return 0; 6928a84802eSSteven Price } 6938a84802eSSteven Price #endif 6948a84802eSSteven Price 6958a84802eSSteven Price #ifndef __HAVE_ARCH_SWAP_INVALIDATE 6968a84802eSSteven Price static inline void arch_swap_invalidate_page(int type, pgoff_t offset) 6978a84802eSSteven Price { 6988a84802eSSteven Price } 6998a84802eSSteven Price 7008a84802eSSteven Price static inline void arch_swap_invalidate_area(int type) 7018a84802eSSteven Price { 7028a84802eSSteven Price } 7038a84802eSSteven Price #endif 7048a84802eSSteven Price 7058a84802eSSteven Price #ifndef __HAVE_ARCH_SWAP_RESTORE 7068a84802eSSteven Price static inline void arch_swap_restore(swp_entry_t entry, struct page *page) 7078a84802eSSteven Price { 7088a84802eSSteven Price } 7098a84802eSSteven Price #endif 7108a84802eSSteven Price 711ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGD_OFFSET_GATE 712ca5999fdSMike Rapoport #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) 713ca5999fdSMike Rapoport #endif 714ca5999fdSMike Rapoport 715ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_MOVE_PTE 716ca5999fdSMike Rapoport #define move_pte(pte, prot, old_addr, new_addr) (pte) 717ca5999fdSMike Rapoport #endif 718ca5999fdSMike Rapoport 719ca5999fdSMike Rapoport #ifndef pte_accessible 720ca5999fdSMike Rapoport # define pte_accessible(mm, pte) ((void)(pte), 1) 721ca5999fdSMike Rapoport #endif 722ca5999fdSMike Rapoport 723ca5999fdSMike Rapoport #ifndef flush_tlb_fix_spurious_fault 724ca5999fdSMike Rapoport #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) 725ca5999fdSMike Rapoport #endif 726ca5999fdSMike Rapoport 727ca5999fdSMike Rapoport /* 728ca5999fdSMike Rapoport * When walking page tables, get the address of the next boundary, 729ca5999fdSMike Rapoport * or the end address of the range if that comes earlier. Although no 730ca5999fdSMike Rapoport * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. 731ca5999fdSMike Rapoport */ 732ca5999fdSMike Rapoport 733ca5999fdSMike Rapoport #define pgd_addr_end(addr, end) \ 734ca5999fdSMike Rapoport ({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 735ca5999fdSMike Rapoport (__boundary - 1 < (end) - 1)? __boundary: (end); \ 736ca5999fdSMike Rapoport }) 737ca5999fdSMike Rapoport 738ca5999fdSMike Rapoport #ifndef p4d_addr_end 739ca5999fdSMike Rapoport #define p4d_addr_end(addr, end) \ 740ca5999fdSMike Rapoport ({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \ 741ca5999fdSMike Rapoport (__boundary - 1 < (end) - 1)? __boundary: (end); \ 742ca5999fdSMike Rapoport }) 743ca5999fdSMike Rapoport #endif 744ca5999fdSMike Rapoport 745ca5999fdSMike Rapoport #ifndef pud_addr_end 746ca5999fdSMike Rapoport #define pud_addr_end(addr, end) \ 747ca5999fdSMike Rapoport ({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ 748ca5999fdSMike Rapoport (__boundary - 1 < (end) - 1)? __boundary: (end); \ 749ca5999fdSMike Rapoport }) 750ca5999fdSMike Rapoport #endif 751ca5999fdSMike Rapoport 752ca5999fdSMike Rapoport #ifndef pmd_addr_end 753ca5999fdSMike Rapoport #define pmd_addr_end(addr, end) \ 754ca5999fdSMike Rapoport ({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ 755ca5999fdSMike Rapoport (__boundary - 1 < (end) - 1)? __boundary: (end); \ 756ca5999fdSMike Rapoport }) 757ca5999fdSMike Rapoport #endif 758ca5999fdSMike Rapoport 759ca5999fdSMike Rapoport /* 760ca5999fdSMike Rapoport * When walking page tables, we usually want to skip any p?d_none entries; 761ca5999fdSMike Rapoport * and any p?d_bad entries - reporting the error before resetting to none. 762ca5999fdSMike Rapoport * Do the tests inline, but report and clear the bad entry in mm/memory.c. 763ca5999fdSMike Rapoport */ 764ca5999fdSMike Rapoport void pgd_clear_bad(pgd_t *); 765ca5999fdSMike Rapoport 766ca5999fdSMike Rapoport #ifndef __PAGETABLE_P4D_FOLDED 767ca5999fdSMike Rapoport void p4d_clear_bad(p4d_t *); 768ca5999fdSMike Rapoport #else 769ca5999fdSMike Rapoport #define p4d_clear_bad(p4d) do { } while (0) 770ca5999fdSMike Rapoport #endif 771ca5999fdSMike Rapoport 772ca5999fdSMike Rapoport #ifndef __PAGETABLE_PUD_FOLDED 773ca5999fdSMike Rapoport void pud_clear_bad(pud_t *); 774ca5999fdSMike Rapoport #else 775ca5999fdSMike Rapoport #define pud_clear_bad(p4d) do { } while (0) 776ca5999fdSMike Rapoport #endif 777ca5999fdSMike Rapoport 778ca5999fdSMike Rapoport void pmd_clear_bad(pmd_t *); 779ca5999fdSMike Rapoport 780ca5999fdSMike Rapoport static inline int pgd_none_or_clear_bad(pgd_t *pgd) 781ca5999fdSMike Rapoport { 782ca5999fdSMike Rapoport if (pgd_none(*pgd)) 783ca5999fdSMike Rapoport return 1; 784ca5999fdSMike Rapoport if (unlikely(pgd_bad(*pgd))) { 785ca5999fdSMike Rapoport pgd_clear_bad(pgd); 786ca5999fdSMike Rapoport return 1; 787ca5999fdSMike Rapoport } 788ca5999fdSMike Rapoport return 0; 789ca5999fdSMike Rapoport } 790ca5999fdSMike Rapoport 791ca5999fdSMike Rapoport static inline int p4d_none_or_clear_bad(p4d_t *p4d) 792ca5999fdSMike Rapoport { 793ca5999fdSMike Rapoport if (p4d_none(*p4d)) 794ca5999fdSMike Rapoport return 1; 795ca5999fdSMike Rapoport if (unlikely(p4d_bad(*p4d))) { 796ca5999fdSMike Rapoport p4d_clear_bad(p4d); 797ca5999fdSMike Rapoport return 1; 798ca5999fdSMike Rapoport } 799ca5999fdSMike Rapoport return 0; 800ca5999fdSMike Rapoport } 801ca5999fdSMike Rapoport 802ca5999fdSMike Rapoport static inline int pud_none_or_clear_bad(pud_t *pud) 803ca5999fdSMike Rapoport { 804ca5999fdSMike Rapoport if (pud_none(*pud)) 805ca5999fdSMike Rapoport return 1; 806ca5999fdSMike Rapoport if (unlikely(pud_bad(*pud))) { 807ca5999fdSMike Rapoport pud_clear_bad(pud); 808ca5999fdSMike Rapoport return 1; 809ca5999fdSMike Rapoport } 810ca5999fdSMike Rapoport return 0; 811ca5999fdSMike Rapoport } 812ca5999fdSMike Rapoport 813ca5999fdSMike Rapoport static inline int pmd_none_or_clear_bad(pmd_t *pmd) 814ca5999fdSMike Rapoport { 815ca5999fdSMike Rapoport if (pmd_none(*pmd)) 816ca5999fdSMike Rapoport return 1; 817ca5999fdSMike Rapoport if (unlikely(pmd_bad(*pmd))) { 818ca5999fdSMike Rapoport pmd_clear_bad(pmd); 819ca5999fdSMike Rapoport return 1; 820ca5999fdSMike Rapoport } 821ca5999fdSMike Rapoport return 0; 822ca5999fdSMike Rapoport } 823ca5999fdSMike Rapoport 824ca5999fdSMike Rapoport static inline pte_t __ptep_modify_prot_start(struct vm_area_struct *vma, 825ca5999fdSMike Rapoport unsigned long addr, 826ca5999fdSMike Rapoport pte_t *ptep) 827ca5999fdSMike Rapoport { 828ca5999fdSMike Rapoport /* 829ca5999fdSMike Rapoport * Get the current pte state, but zero it out to make it 830ca5999fdSMike Rapoport * non-present, preventing the hardware from asynchronously 831ca5999fdSMike Rapoport * updating it. 832ca5999fdSMike Rapoport */ 833ca5999fdSMike Rapoport return ptep_get_and_clear(vma->vm_mm, addr, ptep); 834ca5999fdSMike Rapoport } 835ca5999fdSMike Rapoport 836ca5999fdSMike Rapoport static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma, 837ca5999fdSMike Rapoport unsigned long addr, 838ca5999fdSMike Rapoport pte_t *ptep, pte_t pte) 839ca5999fdSMike Rapoport { 840ca5999fdSMike Rapoport /* 841ca5999fdSMike Rapoport * The pte is non-present, so there's no hardware state to 842ca5999fdSMike Rapoport * preserve. 843ca5999fdSMike Rapoport */ 844ca5999fdSMike Rapoport set_pte_at(vma->vm_mm, addr, ptep, pte); 845ca5999fdSMike Rapoport } 846ca5999fdSMike Rapoport 847ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 848ca5999fdSMike Rapoport /* 849ca5999fdSMike Rapoport * Start a pte protection read-modify-write transaction, which 850ca5999fdSMike Rapoport * protects against asynchronous hardware modifications to the pte. 851ca5999fdSMike Rapoport * The intention is not to prevent the hardware from making pte 852ca5999fdSMike Rapoport * updates, but to prevent any updates it may make from being lost. 853ca5999fdSMike Rapoport * 854ca5999fdSMike Rapoport * This does not protect against other software modifications of the 855ca5999fdSMike Rapoport * pte; the appropriate pte lock must be held over the transation. 856ca5999fdSMike Rapoport * 857ca5999fdSMike Rapoport * Note that this interface is intended to be batchable, meaning that 858ca5999fdSMike Rapoport * ptep_modify_prot_commit may not actually update the pte, but merely 859ca5999fdSMike Rapoport * queue the update to be done at some later time. The update must be 860ca5999fdSMike Rapoport * actually committed before the pte lock is released, however. 861ca5999fdSMike Rapoport */ 862ca5999fdSMike Rapoport static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, 863ca5999fdSMike Rapoport unsigned long addr, 864ca5999fdSMike Rapoport pte_t *ptep) 865ca5999fdSMike Rapoport { 866ca5999fdSMike Rapoport return __ptep_modify_prot_start(vma, addr, ptep); 867ca5999fdSMike Rapoport } 868ca5999fdSMike Rapoport 869ca5999fdSMike Rapoport /* 870ca5999fdSMike Rapoport * Commit an update to a pte, leaving any hardware-controlled bits in 871ca5999fdSMike Rapoport * the PTE unmodified. 872ca5999fdSMike Rapoport */ 873ca5999fdSMike Rapoport static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, 874ca5999fdSMike Rapoport unsigned long addr, 875ca5999fdSMike Rapoport pte_t *ptep, pte_t old_pte, pte_t pte) 876ca5999fdSMike Rapoport { 877ca5999fdSMike Rapoport __ptep_modify_prot_commit(vma, addr, ptep, pte); 878ca5999fdSMike Rapoport } 879ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ 880ca5999fdSMike Rapoport #endif /* CONFIG_MMU */ 881ca5999fdSMike Rapoport 882ca5999fdSMike Rapoport /* 883ca5999fdSMike Rapoport * No-op macros that just return the current protection value. Defined here 8841067b261SRandy Dunlap * because these macros can be used even if CONFIG_MMU is not defined. 885ca5999fdSMike Rapoport */ 88663bb76deSPekka Enberg 88763bb76deSPekka Enberg #ifndef pgprot_nx 88863bb76deSPekka Enberg #define pgprot_nx(prot) (prot) 88963bb76deSPekka Enberg #endif 89063bb76deSPekka Enberg 89163bb76deSPekka Enberg #ifndef pgprot_noncached 89263bb76deSPekka Enberg #define pgprot_noncached(prot) (prot) 89363bb76deSPekka Enberg #endif 89463bb76deSPekka Enberg 89563bb76deSPekka Enberg #ifndef pgprot_writecombine 89663bb76deSPekka Enberg #define pgprot_writecombine pgprot_noncached 89763bb76deSPekka Enberg #endif 89863bb76deSPekka Enberg 89963bb76deSPekka Enberg #ifndef pgprot_writethrough 90063bb76deSPekka Enberg #define pgprot_writethrough pgprot_noncached 90163bb76deSPekka Enberg #endif 90263bb76deSPekka Enberg 90363bb76deSPekka Enberg #ifndef pgprot_device 90463bb76deSPekka Enberg #define pgprot_device pgprot_noncached 90563bb76deSPekka Enberg #endif 90663bb76deSPekka Enberg 907*d15dfd31SCatalin Marinas #ifndef pgprot_mhp 908*d15dfd31SCatalin Marinas #define pgprot_mhp(prot) (prot) 909*d15dfd31SCatalin Marinas #endif 910*d15dfd31SCatalin Marinas 91163bb76deSPekka Enberg #ifdef CONFIG_MMU 91263bb76deSPekka Enberg #ifndef pgprot_modify 91363bb76deSPekka Enberg #define pgprot_modify pgprot_modify 91463bb76deSPekka Enberg static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 91563bb76deSPekka Enberg { 91663bb76deSPekka Enberg if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot))) 91763bb76deSPekka Enberg newprot = pgprot_noncached(newprot); 91863bb76deSPekka Enberg if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot))) 91963bb76deSPekka Enberg newprot = pgprot_writecombine(newprot); 92063bb76deSPekka Enberg if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot))) 92163bb76deSPekka Enberg newprot = pgprot_device(newprot); 92263bb76deSPekka Enberg return newprot; 92363bb76deSPekka Enberg } 92463bb76deSPekka Enberg #endif 92563bb76deSPekka Enberg #endif /* CONFIG_MMU */ 92663bb76deSPekka Enberg 927ca5999fdSMike Rapoport #ifndef pgprot_encrypted 928ca5999fdSMike Rapoport #define pgprot_encrypted(prot) (prot) 929ca5999fdSMike Rapoport #endif 930ca5999fdSMike Rapoport 931ca5999fdSMike Rapoport #ifndef pgprot_decrypted 932ca5999fdSMike Rapoport #define pgprot_decrypted(prot) (prot) 933ca5999fdSMike Rapoport #endif 934ca5999fdSMike Rapoport 935ca5999fdSMike Rapoport /* 936ca5999fdSMike Rapoport * A facility to provide lazy MMU batching. This allows PTE updates and 937ca5999fdSMike Rapoport * page invalidations to be delayed until a call to leave lazy MMU mode 938ca5999fdSMike Rapoport * is issued. Some architectures may benefit from doing this, and it is 939ca5999fdSMike Rapoport * beneficial for both shadow and direct mode hypervisors, which may batch 940ca5999fdSMike Rapoport * the PTE updates which happen during this window. Note that using this 941ca5999fdSMike Rapoport * interface requires that read hazards be removed from the code. A read 942ca5999fdSMike Rapoport * hazard could result in the direct mode hypervisor case, since the actual 943ca5999fdSMike Rapoport * write to the page tables may not yet have taken place, so reads though 944ca5999fdSMike Rapoport * a raw PTE pointer after it has been modified are not guaranteed to be 945ca5999fdSMike Rapoport * up to date. This mode can only be entered and left under the protection of 946ca5999fdSMike Rapoport * the page table locks for all page tables which may be modified. In the UP 947ca5999fdSMike Rapoport * case, this is required so that preemption is disabled, and in the SMP case, 948ca5999fdSMike Rapoport * it must synchronize the delayed page table writes properly on other CPUs. 949ca5999fdSMike Rapoport */ 950ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE 951ca5999fdSMike Rapoport #define arch_enter_lazy_mmu_mode() do {} while (0) 952ca5999fdSMike Rapoport #define arch_leave_lazy_mmu_mode() do {} while (0) 953ca5999fdSMike Rapoport #define arch_flush_lazy_mmu_mode() do {} while (0) 954ca5999fdSMike Rapoport #endif 955ca5999fdSMike Rapoport 956ca5999fdSMike Rapoport /* 957ca5999fdSMike Rapoport * A facility to provide batching of the reload of page tables and 958ca5999fdSMike Rapoport * other process state with the actual context switch code for 959ca5999fdSMike Rapoport * paravirtualized guests. By convention, only one of the batched 960ca5999fdSMike Rapoport * update (lazy) modes (CPU, MMU) should be active at any given time, 961ca5999fdSMike Rapoport * entry should never be nested, and entry and exits should always be 962ca5999fdSMike Rapoport * paired. This is for sanity of maintaining and reasoning about the 963ca5999fdSMike Rapoport * kernel code. In this case, the exit (end of the context switch) is 964ca5999fdSMike Rapoport * in architecture-specific code, and so doesn't need a generic 965ca5999fdSMike Rapoport * definition. 966ca5999fdSMike Rapoport */ 967ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH 968ca5999fdSMike Rapoport #define arch_start_context_switch(prev) do {} while (0) 969ca5999fdSMike Rapoport #endif 970ca5999fdSMike Rapoport 971ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY 972ca5999fdSMike Rapoport #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION 973ca5999fdSMike Rapoport static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) 974ca5999fdSMike Rapoport { 975ca5999fdSMike Rapoport return pmd; 976ca5999fdSMike Rapoport } 977ca5999fdSMike Rapoport 978ca5999fdSMike Rapoport static inline int pmd_swp_soft_dirty(pmd_t pmd) 979ca5999fdSMike Rapoport { 980ca5999fdSMike Rapoport return 0; 981ca5999fdSMike Rapoport } 982ca5999fdSMike Rapoport 983ca5999fdSMike Rapoport static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) 984ca5999fdSMike Rapoport { 985ca5999fdSMike Rapoport return pmd; 986ca5999fdSMike Rapoport } 987ca5999fdSMike Rapoport #endif 988ca5999fdSMike Rapoport #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */ 989ca5999fdSMike Rapoport static inline int pte_soft_dirty(pte_t pte) 990ca5999fdSMike Rapoport { 991ca5999fdSMike Rapoport return 0; 992ca5999fdSMike Rapoport } 993ca5999fdSMike Rapoport 994ca5999fdSMike Rapoport static inline int pmd_soft_dirty(pmd_t pmd) 995ca5999fdSMike Rapoport { 996ca5999fdSMike Rapoport return 0; 997ca5999fdSMike Rapoport } 998ca5999fdSMike Rapoport 999ca5999fdSMike Rapoport static inline pte_t pte_mksoft_dirty(pte_t pte) 1000ca5999fdSMike Rapoport { 1001ca5999fdSMike Rapoport return pte; 1002ca5999fdSMike Rapoport } 1003ca5999fdSMike Rapoport 1004ca5999fdSMike Rapoport static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) 1005ca5999fdSMike Rapoport { 1006ca5999fdSMike Rapoport return pmd; 1007ca5999fdSMike Rapoport } 1008ca5999fdSMike Rapoport 1009ca5999fdSMike Rapoport static inline pte_t pte_clear_soft_dirty(pte_t pte) 1010ca5999fdSMike Rapoport { 1011ca5999fdSMike Rapoport return pte; 1012ca5999fdSMike Rapoport } 1013ca5999fdSMike Rapoport 1014ca5999fdSMike Rapoport static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) 1015ca5999fdSMike Rapoport { 1016ca5999fdSMike Rapoport return pmd; 1017ca5999fdSMike Rapoport } 1018ca5999fdSMike Rapoport 1019ca5999fdSMike Rapoport static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 1020ca5999fdSMike Rapoport { 1021ca5999fdSMike Rapoport return pte; 1022ca5999fdSMike Rapoport } 1023ca5999fdSMike Rapoport 1024ca5999fdSMike Rapoport static inline int pte_swp_soft_dirty(pte_t pte) 1025ca5999fdSMike Rapoport { 1026ca5999fdSMike Rapoport return 0; 1027ca5999fdSMike Rapoport } 1028ca5999fdSMike Rapoport 1029ca5999fdSMike Rapoport static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 1030ca5999fdSMike Rapoport { 1031ca5999fdSMike Rapoport return pte; 1032ca5999fdSMike Rapoport } 1033ca5999fdSMike Rapoport 1034ca5999fdSMike Rapoport static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) 1035ca5999fdSMike Rapoport { 1036ca5999fdSMike Rapoport return pmd; 1037ca5999fdSMike Rapoport } 1038ca5999fdSMike Rapoport 1039ca5999fdSMike Rapoport static inline int pmd_swp_soft_dirty(pmd_t pmd) 1040ca5999fdSMike Rapoport { 1041ca5999fdSMike Rapoport return 0; 1042ca5999fdSMike Rapoport } 1043ca5999fdSMike Rapoport 1044ca5999fdSMike Rapoport static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) 1045ca5999fdSMike Rapoport { 1046ca5999fdSMike Rapoport return pmd; 1047ca5999fdSMike Rapoport } 1048ca5999fdSMike Rapoport #endif 1049ca5999fdSMike Rapoport 1050ca5999fdSMike Rapoport #ifndef __HAVE_PFNMAP_TRACKING 1051ca5999fdSMike Rapoport /* 1052ca5999fdSMike Rapoport * Interfaces that can be used by architecture code to keep track of 1053ca5999fdSMike Rapoport * memory type of pfn mappings specified by the remap_pfn_range, 1054ca5999fdSMike Rapoport * vmf_insert_pfn. 1055ca5999fdSMike Rapoport */ 1056ca5999fdSMike Rapoport 1057ca5999fdSMike Rapoport /* 1058ca5999fdSMike Rapoport * track_pfn_remap is called when a _new_ pfn mapping is being established 1059ca5999fdSMike Rapoport * by remap_pfn_range() for physical range indicated by pfn and size. 1060ca5999fdSMike Rapoport */ 1061ca5999fdSMike Rapoport static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 1062ca5999fdSMike Rapoport unsigned long pfn, unsigned long addr, 1063ca5999fdSMike Rapoport unsigned long size) 1064ca5999fdSMike Rapoport { 1065ca5999fdSMike Rapoport return 0; 1066ca5999fdSMike Rapoport } 1067ca5999fdSMike Rapoport 1068ca5999fdSMike Rapoport /* 1069ca5999fdSMike Rapoport * track_pfn_insert is called when a _new_ single pfn is established 1070ca5999fdSMike Rapoport * by vmf_insert_pfn(). 1071ca5999fdSMike Rapoport */ 1072ca5999fdSMike Rapoport static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 1073ca5999fdSMike Rapoport pfn_t pfn) 1074ca5999fdSMike Rapoport { 1075ca5999fdSMike Rapoport } 1076ca5999fdSMike Rapoport 1077ca5999fdSMike Rapoport /* 1078ca5999fdSMike Rapoport * track_pfn_copy is called when vma that is covering the pfnmap gets 1079ca5999fdSMike Rapoport * copied through copy_page_range(). 1080ca5999fdSMike Rapoport */ 1081ca5999fdSMike Rapoport static inline int track_pfn_copy(struct vm_area_struct *vma) 1082ca5999fdSMike Rapoport { 1083ca5999fdSMike Rapoport return 0; 1084ca5999fdSMike Rapoport } 1085ca5999fdSMike Rapoport 1086ca5999fdSMike Rapoport /* 1087ca5999fdSMike Rapoport * untrack_pfn is called while unmapping a pfnmap for a region. 1088ca5999fdSMike Rapoport * untrack can be called for a specific region indicated by pfn and size or 1089ca5999fdSMike Rapoport * can be for the entire vma (in which case pfn, size are zero). 1090ca5999fdSMike Rapoport */ 1091ca5999fdSMike Rapoport static inline void untrack_pfn(struct vm_area_struct *vma, 1092ca5999fdSMike Rapoport unsigned long pfn, unsigned long size) 1093ca5999fdSMike Rapoport { 1094ca5999fdSMike Rapoport } 1095ca5999fdSMike Rapoport 1096ca5999fdSMike Rapoport /* 1097ca5999fdSMike Rapoport * untrack_pfn_moved is called while mremapping a pfnmap for a new region. 1098ca5999fdSMike Rapoport */ 1099ca5999fdSMike Rapoport static inline void untrack_pfn_moved(struct vm_area_struct *vma) 1100ca5999fdSMike Rapoport { 1101ca5999fdSMike Rapoport } 1102ca5999fdSMike Rapoport #else 1103ca5999fdSMike Rapoport extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 1104ca5999fdSMike Rapoport unsigned long pfn, unsigned long addr, 1105ca5999fdSMike Rapoport unsigned long size); 1106ca5999fdSMike Rapoport extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 1107ca5999fdSMike Rapoport pfn_t pfn); 1108ca5999fdSMike Rapoport extern int track_pfn_copy(struct vm_area_struct *vma); 1109ca5999fdSMike Rapoport extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, 1110ca5999fdSMike Rapoport unsigned long size); 1111ca5999fdSMike Rapoport extern void untrack_pfn_moved(struct vm_area_struct *vma); 1112ca5999fdSMike Rapoport #endif 1113ca5999fdSMike Rapoport 1114ca5999fdSMike Rapoport #ifdef __HAVE_COLOR_ZERO_PAGE 1115ca5999fdSMike Rapoport static inline int is_zero_pfn(unsigned long pfn) 1116ca5999fdSMike Rapoport { 1117ca5999fdSMike Rapoport extern unsigned long zero_pfn; 1118ca5999fdSMike Rapoport unsigned long offset_from_zero_pfn = pfn - zero_pfn; 1119ca5999fdSMike Rapoport return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); 1120ca5999fdSMike Rapoport } 1121ca5999fdSMike Rapoport 1122ca5999fdSMike Rapoport #define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) 1123ca5999fdSMike Rapoport 1124ca5999fdSMike Rapoport #else 1125ca5999fdSMike Rapoport static inline int is_zero_pfn(unsigned long pfn) 1126ca5999fdSMike Rapoport { 1127ca5999fdSMike Rapoport extern unsigned long zero_pfn; 1128ca5999fdSMike Rapoport return pfn == zero_pfn; 1129ca5999fdSMike Rapoport } 1130ca5999fdSMike Rapoport 1131ca5999fdSMike Rapoport static inline unsigned long my_zero_pfn(unsigned long addr) 1132ca5999fdSMike Rapoport { 1133ca5999fdSMike Rapoport extern unsigned long zero_pfn; 1134ca5999fdSMike Rapoport return zero_pfn; 1135ca5999fdSMike Rapoport } 1136ca5999fdSMike Rapoport #endif 1137ca5999fdSMike Rapoport 1138ca5999fdSMike Rapoport #ifdef CONFIG_MMU 1139ca5999fdSMike Rapoport 1140ca5999fdSMike Rapoport #ifndef CONFIG_TRANSPARENT_HUGEPAGE 1141ca5999fdSMike Rapoport static inline int pmd_trans_huge(pmd_t pmd) 1142ca5999fdSMike Rapoport { 1143ca5999fdSMike Rapoport return 0; 1144ca5999fdSMike Rapoport } 1145ca5999fdSMike Rapoport #ifndef pmd_write 1146ca5999fdSMike Rapoport static inline int pmd_write(pmd_t pmd) 1147ca5999fdSMike Rapoport { 1148ca5999fdSMike Rapoport BUG(); 1149ca5999fdSMike Rapoport return 0; 1150ca5999fdSMike Rapoport } 1151ca5999fdSMike Rapoport #endif /* pmd_write */ 1152ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1153ca5999fdSMike Rapoport 1154ca5999fdSMike Rapoport #ifndef pud_write 1155ca5999fdSMike Rapoport static inline int pud_write(pud_t pud) 1156ca5999fdSMike Rapoport { 1157ca5999fdSMike Rapoport BUG(); 1158ca5999fdSMike Rapoport return 0; 1159ca5999fdSMike Rapoport } 1160ca5999fdSMike Rapoport #endif /* pud_write */ 1161ca5999fdSMike Rapoport 1162ca5999fdSMike Rapoport #if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) 1163ca5999fdSMike Rapoport static inline int pmd_devmap(pmd_t pmd) 1164ca5999fdSMike Rapoport { 1165ca5999fdSMike Rapoport return 0; 1166ca5999fdSMike Rapoport } 1167ca5999fdSMike Rapoport static inline int pud_devmap(pud_t pud) 1168ca5999fdSMike Rapoport { 1169ca5999fdSMike Rapoport return 0; 1170ca5999fdSMike Rapoport } 1171ca5999fdSMike Rapoport static inline int pgd_devmap(pgd_t pgd) 1172ca5999fdSMike Rapoport { 1173ca5999fdSMike Rapoport return 0; 1174ca5999fdSMike Rapoport } 1175ca5999fdSMike Rapoport #endif 1176ca5999fdSMike Rapoport 1177ca5999fdSMike Rapoport #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ 1178ca5999fdSMike Rapoport (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ 1179ca5999fdSMike Rapoport !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) 1180ca5999fdSMike Rapoport static inline int pud_trans_huge(pud_t pud) 1181ca5999fdSMike Rapoport { 1182ca5999fdSMike Rapoport return 0; 1183ca5999fdSMike Rapoport } 1184ca5999fdSMike Rapoport #endif 1185ca5999fdSMike Rapoport 1186ca5999fdSMike Rapoport /* See pmd_none_or_trans_huge_or_clear_bad for discussion. */ 1187ca5999fdSMike Rapoport static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud) 1188ca5999fdSMike Rapoport { 1189ca5999fdSMike Rapoport pud_t pudval = READ_ONCE(*pud); 1190ca5999fdSMike Rapoport 1191ca5999fdSMike Rapoport if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval)) 1192ca5999fdSMike Rapoport return 1; 1193ca5999fdSMike Rapoport if (unlikely(pud_bad(pudval))) { 1194ca5999fdSMike Rapoport pud_clear_bad(pud); 1195ca5999fdSMike Rapoport return 1; 1196ca5999fdSMike Rapoport } 1197ca5999fdSMike Rapoport return 0; 1198ca5999fdSMike Rapoport } 1199ca5999fdSMike Rapoport 1200ca5999fdSMike Rapoport /* See pmd_trans_unstable for discussion. */ 1201ca5999fdSMike Rapoport static inline int pud_trans_unstable(pud_t *pud) 1202ca5999fdSMike Rapoport { 1203ca5999fdSMike Rapoport #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ 1204ca5999fdSMike Rapoport defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) 1205ca5999fdSMike Rapoport return pud_none_or_trans_huge_or_dev_or_clear_bad(pud); 1206ca5999fdSMike Rapoport #else 1207ca5999fdSMike Rapoport return 0; 1208ca5999fdSMike Rapoport #endif 1209ca5999fdSMike Rapoport } 1210ca5999fdSMike Rapoport 1211ca5999fdSMike Rapoport #ifndef pmd_read_atomic 1212ca5999fdSMike Rapoport static inline pmd_t pmd_read_atomic(pmd_t *pmdp) 1213ca5999fdSMike Rapoport { 1214ca5999fdSMike Rapoport /* 1215ca5999fdSMike Rapoport * Depend on compiler for an atomic pmd read. NOTE: this is 1216ca5999fdSMike Rapoport * only going to work, if the pmdval_t isn't larger than 1217ca5999fdSMike Rapoport * an unsigned long. 1218ca5999fdSMike Rapoport */ 1219ca5999fdSMike Rapoport return *pmdp; 1220ca5999fdSMike Rapoport } 1221ca5999fdSMike Rapoport #endif 1222ca5999fdSMike Rapoport 1223ca5999fdSMike Rapoport #ifndef arch_needs_pgtable_deposit 1224ca5999fdSMike Rapoport #define arch_needs_pgtable_deposit() (false) 1225ca5999fdSMike Rapoport #endif 1226ca5999fdSMike Rapoport /* 1227ca5999fdSMike Rapoport * This function is meant to be used by sites walking pagetables with 1228c1e8d7c6SMichel Lespinasse * the mmap_lock held in read mode to protect against MADV_DONTNEED and 1229ca5999fdSMike Rapoport * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd 1230ca5999fdSMike Rapoport * into a null pmd and the transhuge page fault can convert a null pmd 1231ca5999fdSMike Rapoport * into an hugepmd or into a regular pmd (if the hugepage allocation 1232c1e8d7c6SMichel Lespinasse * fails). While holding the mmap_lock in read mode the pmd becomes 1233ca5999fdSMike Rapoport * stable and stops changing under us only if it's not null and not a 1234ca5999fdSMike Rapoport * transhuge pmd. When those races occurs and this function makes a 1235ca5999fdSMike Rapoport * difference vs the standard pmd_none_or_clear_bad, the result is 1236ca5999fdSMike Rapoport * undefined so behaving like if the pmd was none is safe (because it 1237ca5999fdSMike Rapoport * can return none anyway). The compiler level barrier() is critically 1238ca5999fdSMike Rapoport * important to compute the two checks atomically on the same pmdval. 1239ca5999fdSMike Rapoport * 1240ca5999fdSMike Rapoport * For 32bit kernels with a 64bit large pmd_t this automatically takes 1241ca5999fdSMike Rapoport * care of reading the pmd atomically to avoid SMP race conditions 1242c1e8d7c6SMichel Lespinasse * against pmd_populate() when the mmap_lock is hold for reading by the 1243ca5999fdSMike Rapoport * caller (a special atomic read not done by "gcc" as in the generic 1244ca5999fdSMike Rapoport * version above, is also needed when THP is disabled because the page 1245ca5999fdSMike Rapoport * fault can populate the pmd from under us). 1246ca5999fdSMike Rapoport */ 1247ca5999fdSMike Rapoport static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 1248ca5999fdSMike Rapoport { 1249ca5999fdSMike Rapoport pmd_t pmdval = pmd_read_atomic(pmd); 1250ca5999fdSMike Rapoport /* 1251ca5999fdSMike Rapoport * The barrier will stabilize the pmdval in a register or on 1252ca5999fdSMike Rapoport * the stack so that it will stop changing under the code. 1253ca5999fdSMike Rapoport * 1254ca5999fdSMike Rapoport * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, 1255ca5999fdSMike Rapoport * pmd_read_atomic is allowed to return a not atomic pmdval 1256ca5999fdSMike Rapoport * (for example pointing to an hugepage that has never been 1257ca5999fdSMike Rapoport * mapped in the pmd). The below checks will only care about 1258ca5999fdSMike Rapoport * the low part of the pmd with 32bit PAE x86 anyway, with the 1259ca5999fdSMike Rapoport * exception of pmd_none(). So the important thing is that if 1260ca5999fdSMike Rapoport * the low part of the pmd is found null, the high part will 1261ca5999fdSMike Rapoport * be also null or the pmd_none() check below would be 1262ca5999fdSMike Rapoport * confused. 1263ca5999fdSMike Rapoport */ 1264ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1265ca5999fdSMike Rapoport barrier(); 1266ca5999fdSMike Rapoport #endif 1267ca5999fdSMike Rapoport /* 1268ca5999fdSMike Rapoport * !pmd_present() checks for pmd migration entries 1269ca5999fdSMike Rapoport * 1270ca5999fdSMike Rapoport * The complete check uses is_pmd_migration_entry() in linux/swapops.h 1271ca5999fdSMike Rapoport * But using that requires moving current function and pmd_trans_unstable() 1272ca5999fdSMike Rapoport * to linux/swapops.h to resovle dependency, which is too much code move. 1273ca5999fdSMike Rapoport * 1274ca5999fdSMike Rapoport * !pmd_present() is equivalent to is_pmd_migration_entry() currently, 1275ca5999fdSMike Rapoport * because !pmd_present() pages can only be under migration not swapped 1276ca5999fdSMike Rapoport * out. 1277ca5999fdSMike Rapoport * 1278ca5999fdSMike Rapoport * pmd_none() is preseved for future condition checks on pmd migration 1279ca5999fdSMike Rapoport * entries and not confusing with this function name, although it is 1280ca5999fdSMike Rapoport * redundant with !pmd_present(). 1281ca5999fdSMike Rapoport */ 1282ca5999fdSMike Rapoport if (pmd_none(pmdval) || pmd_trans_huge(pmdval) || 1283ca5999fdSMike Rapoport (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval))) 1284ca5999fdSMike Rapoport return 1; 1285ca5999fdSMike Rapoport if (unlikely(pmd_bad(pmdval))) { 1286ca5999fdSMike Rapoport pmd_clear_bad(pmd); 1287ca5999fdSMike Rapoport return 1; 1288ca5999fdSMike Rapoport } 1289ca5999fdSMike Rapoport return 0; 1290ca5999fdSMike Rapoport } 1291ca5999fdSMike Rapoport 1292ca5999fdSMike Rapoport /* 1293ca5999fdSMike Rapoport * This is a noop if Transparent Hugepage Support is not built into 1294ca5999fdSMike Rapoport * the kernel. Otherwise it is equivalent to 1295ca5999fdSMike Rapoport * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in 1296ca5999fdSMike Rapoport * places that already verified the pmd is not none and they want to 1297ca5999fdSMike Rapoport * walk ptes while holding the mmap sem in read mode (write mode don't 1298ca5999fdSMike Rapoport * need this). If THP is not enabled, the pmd can't go away under the 1299ca5999fdSMike Rapoport * code even if MADV_DONTNEED runs, but if THP is enabled we need to 1300ca5999fdSMike Rapoport * run a pmd_trans_unstable before walking the ptes after 1301ca5999fdSMike Rapoport * split_huge_pmd returns (because it may have run when the pmd become 1302ca5999fdSMike Rapoport * null, but then a page fault can map in a THP and not a regular page). 1303ca5999fdSMike Rapoport */ 1304ca5999fdSMike Rapoport static inline int pmd_trans_unstable(pmd_t *pmd) 1305ca5999fdSMike Rapoport { 1306ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1307ca5999fdSMike Rapoport return pmd_none_or_trans_huge_or_clear_bad(pmd); 1308ca5999fdSMike Rapoport #else 1309ca5999fdSMike Rapoport return 0; 1310ca5999fdSMike Rapoport #endif 1311ca5999fdSMike Rapoport } 1312ca5999fdSMike Rapoport 1313f9ce0be7SKirill A. Shutemov /* 1314f9ce0be7SKirill A. Shutemov * the ordering of these checks is important for pmds with _page_devmap set. 1315f9ce0be7SKirill A. Shutemov * if we check pmd_trans_unstable() first we will trip the bad_pmd() check 1316f9ce0be7SKirill A. Shutemov * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly 1317f9ce0be7SKirill A. Shutemov * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. 1318f9ce0be7SKirill A. Shutemov */ 1319f9ce0be7SKirill A. Shutemov static inline int pmd_devmap_trans_unstable(pmd_t *pmd) 1320f9ce0be7SKirill A. Shutemov { 1321f9ce0be7SKirill A. Shutemov return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); 1322f9ce0be7SKirill A. Shutemov } 1323f9ce0be7SKirill A. Shutemov 1324ca5999fdSMike Rapoport #ifndef CONFIG_NUMA_BALANCING 1325ca5999fdSMike Rapoport /* 1326ca5999fdSMike Rapoport * Technically a PTE can be PROTNONE even when not doing NUMA balancing but 1327ca5999fdSMike Rapoport * the only case the kernel cares is for NUMA balancing and is only ever set 1328ca5999fdSMike Rapoport * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked 13291067b261SRandy Dunlap * _PAGE_PROTNONE so by default, implement the helper as "always no". It 1330ca5999fdSMike Rapoport * is the responsibility of the caller to distinguish between PROT_NONE 1331ca5999fdSMike Rapoport * protections and NUMA hinting fault protections. 1332ca5999fdSMike Rapoport */ 1333ca5999fdSMike Rapoport static inline int pte_protnone(pte_t pte) 1334ca5999fdSMike Rapoport { 1335ca5999fdSMike Rapoport return 0; 1336ca5999fdSMike Rapoport } 1337ca5999fdSMike Rapoport 1338ca5999fdSMike Rapoport static inline int pmd_protnone(pmd_t pmd) 1339ca5999fdSMike Rapoport { 1340ca5999fdSMike Rapoport return 0; 1341ca5999fdSMike Rapoport } 1342ca5999fdSMike Rapoport #endif /* CONFIG_NUMA_BALANCING */ 1343ca5999fdSMike Rapoport 1344ca5999fdSMike Rapoport #endif /* CONFIG_MMU */ 1345ca5999fdSMike Rapoport 1346ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 1347ca5999fdSMike Rapoport 1348ca5999fdSMike Rapoport #ifndef __PAGETABLE_P4D_FOLDED 1349ca5999fdSMike Rapoport int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot); 1350ca5999fdSMike Rapoport int p4d_clear_huge(p4d_t *p4d); 1351ca5999fdSMike Rapoport #else 1352ca5999fdSMike Rapoport static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 1353ca5999fdSMike Rapoport { 1354ca5999fdSMike Rapoport return 0; 1355ca5999fdSMike Rapoport } 1356ca5999fdSMike Rapoport static inline int p4d_clear_huge(p4d_t *p4d) 1357ca5999fdSMike Rapoport { 1358ca5999fdSMike Rapoport return 0; 1359ca5999fdSMike Rapoport } 1360ca5999fdSMike Rapoport #endif /* !__PAGETABLE_P4D_FOLDED */ 1361ca5999fdSMike Rapoport 1362ca5999fdSMike Rapoport int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); 1363ca5999fdSMike Rapoport int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); 1364ca5999fdSMike Rapoport int pud_clear_huge(pud_t *pud); 1365ca5999fdSMike Rapoport int pmd_clear_huge(pmd_t *pmd); 1366ca5999fdSMike Rapoport int p4d_free_pud_page(p4d_t *p4d, unsigned long addr); 1367ca5999fdSMike Rapoport int pud_free_pmd_page(pud_t *pud, unsigned long addr); 1368ca5999fdSMike Rapoport int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); 1369ca5999fdSMike Rapoport #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 1370ca5999fdSMike Rapoport static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 1371ca5999fdSMike Rapoport { 1372ca5999fdSMike Rapoport return 0; 1373ca5999fdSMike Rapoport } 1374ca5999fdSMike Rapoport static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) 1375ca5999fdSMike Rapoport { 1376ca5999fdSMike Rapoport return 0; 1377ca5999fdSMike Rapoport } 1378ca5999fdSMike Rapoport static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) 1379ca5999fdSMike Rapoport { 1380ca5999fdSMike Rapoport return 0; 1381ca5999fdSMike Rapoport } 1382ca5999fdSMike Rapoport static inline int p4d_clear_huge(p4d_t *p4d) 1383ca5999fdSMike Rapoport { 1384ca5999fdSMike Rapoport return 0; 1385ca5999fdSMike Rapoport } 1386ca5999fdSMike Rapoport static inline int pud_clear_huge(pud_t *pud) 1387ca5999fdSMike Rapoport { 1388ca5999fdSMike Rapoport return 0; 1389ca5999fdSMike Rapoport } 1390ca5999fdSMike Rapoport static inline int pmd_clear_huge(pmd_t *pmd) 1391ca5999fdSMike Rapoport { 1392ca5999fdSMike Rapoport return 0; 1393ca5999fdSMike Rapoport } 1394ca5999fdSMike Rapoport static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) 1395ca5999fdSMike Rapoport { 1396ca5999fdSMike Rapoport return 0; 1397ca5999fdSMike Rapoport } 1398ca5999fdSMike Rapoport static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr) 1399ca5999fdSMike Rapoport { 1400ca5999fdSMike Rapoport return 0; 1401ca5999fdSMike Rapoport } 1402ca5999fdSMike Rapoport static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) 1403ca5999fdSMike Rapoport { 1404ca5999fdSMike Rapoport return 0; 1405ca5999fdSMike Rapoport } 1406ca5999fdSMike Rapoport #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 1407ca5999fdSMike Rapoport 1408ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE 1409ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1410ca5999fdSMike Rapoport /* 1411ca5999fdSMike Rapoport * ARCHes with special requirements for evicting THP backing TLB entries can 1412ca5999fdSMike Rapoport * implement this. Otherwise also, it can help optimize normal TLB flush in 14131067b261SRandy Dunlap * THP regime. Stock flush_tlb_range() typically has optimization to nuke the 14141067b261SRandy Dunlap * entire TLB if flush span is greater than a threshold, which will 14151067b261SRandy Dunlap * likely be true for a single huge page. Thus a single THP flush will 14161067b261SRandy Dunlap * invalidate the entire TLB which is not desirable. 1417ca5999fdSMike Rapoport * e.g. see arch/arc: flush_pmd_tlb_range 1418ca5999fdSMike Rapoport */ 1419ca5999fdSMike Rapoport #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1420ca5999fdSMike Rapoport #define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1421ca5999fdSMike Rapoport #else 1422ca5999fdSMike Rapoport #define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG() 1423ca5999fdSMike Rapoport #define flush_pud_tlb_range(vma, addr, end) BUILD_BUG() 1424ca5999fdSMike Rapoport #endif 1425ca5999fdSMike Rapoport #endif 1426ca5999fdSMike Rapoport 1427ca5999fdSMike Rapoport struct file; 1428ca5999fdSMike Rapoport int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 1429ca5999fdSMike Rapoport unsigned long size, pgprot_t *vma_prot); 1430ca5999fdSMike Rapoport 1431ca5999fdSMike Rapoport #ifndef CONFIG_X86_ESPFIX64 1432ca5999fdSMike Rapoport static inline void init_espfix_bsp(void) { } 1433ca5999fdSMike Rapoport #endif 1434ca5999fdSMike Rapoport 1435ca5999fdSMike Rapoport extern void __init pgtable_cache_init(void); 1436ca5999fdSMike Rapoport 1437ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED 1438ca5999fdSMike Rapoport static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) 1439ca5999fdSMike Rapoport { 1440ca5999fdSMike Rapoport return true; 1441ca5999fdSMike Rapoport } 1442ca5999fdSMike Rapoport 1443ca5999fdSMike Rapoport static inline bool arch_has_pfn_modify_check(void) 1444ca5999fdSMike Rapoport { 1445ca5999fdSMike Rapoport return false; 1446ca5999fdSMike Rapoport } 1447ca5999fdSMike Rapoport #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */ 1448ca5999fdSMike Rapoport 1449ca5999fdSMike Rapoport /* 1450ca5999fdSMike Rapoport * Architecture PAGE_KERNEL_* fallbacks 1451ca5999fdSMike Rapoport * 1452ca5999fdSMike Rapoport * Some architectures don't define certain PAGE_KERNEL_* flags. This is either 1453ca5999fdSMike Rapoport * because they really don't support them, or the port needs to be updated to 1454ca5999fdSMike Rapoport * reflect the required functionality. Below are a set of relatively safe 1455ca5999fdSMike Rapoport * fallbacks, as best effort, which we can count on in lieu of the architectures 1456ca5999fdSMike Rapoport * not defining them on their own yet. 1457ca5999fdSMike Rapoport */ 1458ca5999fdSMike Rapoport 1459ca5999fdSMike Rapoport #ifndef PAGE_KERNEL_RO 1460ca5999fdSMike Rapoport # define PAGE_KERNEL_RO PAGE_KERNEL 1461ca5999fdSMike Rapoport #endif 1462ca5999fdSMike Rapoport 1463ca5999fdSMike Rapoport #ifndef PAGE_KERNEL_EXEC 1464ca5999fdSMike Rapoport # define PAGE_KERNEL_EXEC PAGE_KERNEL 1465ca5999fdSMike Rapoport #endif 1466ca5999fdSMike Rapoport 1467ca5999fdSMike Rapoport /* 1468ca5999fdSMike Rapoport * Page Table Modification bits for pgtbl_mod_mask. 1469ca5999fdSMike Rapoport * 1470ca5999fdSMike Rapoport * These are used by the p?d_alloc_track*() set of functions an in the generic 1471ca5999fdSMike Rapoport * vmalloc/ioremap code to track at which page-table levels entries have been 1472ca5999fdSMike Rapoport * modified. Based on that the code can better decide when vmalloc and ioremap 1473ca5999fdSMike Rapoport * mapping changes need to be synchronized to other page-tables in the system. 1474ca5999fdSMike Rapoport */ 1475ca5999fdSMike Rapoport #define __PGTBL_PGD_MODIFIED 0 1476ca5999fdSMike Rapoport #define __PGTBL_P4D_MODIFIED 1 1477ca5999fdSMike Rapoport #define __PGTBL_PUD_MODIFIED 2 1478ca5999fdSMike Rapoport #define __PGTBL_PMD_MODIFIED 3 1479ca5999fdSMike Rapoport #define __PGTBL_PTE_MODIFIED 4 1480ca5999fdSMike Rapoport 1481ca5999fdSMike Rapoport #define PGTBL_PGD_MODIFIED BIT(__PGTBL_PGD_MODIFIED) 1482ca5999fdSMike Rapoport #define PGTBL_P4D_MODIFIED BIT(__PGTBL_P4D_MODIFIED) 1483ca5999fdSMike Rapoport #define PGTBL_PUD_MODIFIED BIT(__PGTBL_PUD_MODIFIED) 1484ca5999fdSMike Rapoport #define PGTBL_PMD_MODIFIED BIT(__PGTBL_PMD_MODIFIED) 1485ca5999fdSMike Rapoport #define PGTBL_PTE_MODIFIED BIT(__PGTBL_PTE_MODIFIED) 1486ca5999fdSMike Rapoport 1487ca5999fdSMike Rapoport /* Page-Table Modification Mask */ 1488ca5999fdSMike Rapoport typedef unsigned int pgtbl_mod_mask; 1489ca5999fdSMike Rapoport 1490ca5999fdSMike Rapoport #endif /* !__ASSEMBLY__ */ 1491ca5999fdSMike Rapoport 1492cef39703SArnd Bergmann #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) 1493cef39703SArnd Bergmann #ifdef CONFIG_PHYS_ADDR_T_64BIT 1494cef39703SArnd Bergmann /* 1495cef39703SArnd Bergmann * ZSMALLOC needs to know the highest PFN on 32-bit architectures 1496cef39703SArnd Bergmann * with physical address space extension, but falls back to 1497cef39703SArnd Bergmann * BITS_PER_LONG otherwise. 1498cef39703SArnd Bergmann */ 1499cef39703SArnd Bergmann #error Missing MAX_POSSIBLE_PHYSMEM_BITS definition 1500cef39703SArnd Bergmann #else 1501cef39703SArnd Bergmann #define MAX_POSSIBLE_PHYSMEM_BITS 32 1502cef39703SArnd Bergmann #endif 1503cef39703SArnd Bergmann #endif 1504cef39703SArnd Bergmann 1505ca5999fdSMike Rapoport #ifndef has_transparent_hugepage 1506ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1507ca5999fdSMike Rapoport #define has_transparent_hugepage() 1 1508ca5999fdSMike Rapoport #else 1509ca5999fdSMike Rapoport #define has_transparent_hugepage() 0 1510ca5999fdSMike Rapoport #endif 1511ca5999fdSMike Rapoport #endif 1512ca5999fdSMike Rapoport 1513ca5999fdSMike Rapoport /* 1514ca5999fdSMike Rapoport * On some architectures it depends on the mm if the p4d/pud or pmd 1515ca5999fdSMike Rapoport * layer of the page table hierarchy is folded or not. 1516ca5999fdSMike Rapoport */ 1517ca5999fdSMike Rapoport #ifndef mm_p4d_folded 1518ca5999fdSMike Rapoport #define mm_p4d_folded(mm) __is_defined(__PAGETABLE_P4D_FOLDED) 1519ca5999fdSMike Rapoport #endif 1520ca5999fdSMike Rapoport 1521ca5999fdSMike Rapoport #ifndef mm_pud_folded 1522ca5999fdSMike Rapoport #define mm_pud_folded(mm) __is_defined(__PAGETABLE_PUD_FOLDED) 1523ca5999fdSMike Rapoport #endif 1524ca5999fdSMike Rapoport 1525ca5999fdSMike Rapoport #ifndef mm_pmd_folded 1526ca5999fdSMike Rapoport #define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED) 1527ca5999fdSMike Rapoport #endif 1528ca5999fdSMike Rapoport 1529d3f7b1bbSVasily Gorbik #ifndef p4d_offset_lockless 1530d3f7b1bbSVasily Gorbik #define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address) 1531d3f7b1bbSVasily Gorbik #endif 1532d3f7b1bbSVasily Gorbik #ifndef pud_offset_lockless 1533d3f7b1bbSVasily Gorbik #define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address) 1534d3f7b1bbSVasily Gorbik #endif 1535d3f7b1bbSVasily Gorbik #ifndef pmd_offset_lockless 1536d3f7b1bbSVasily Gorbik #define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address) 1537d3f7b1bbSVasily Gorbik #endif 1538d3f7b1bbSVasily Gorbik 1539ca5999fdSMike Rapoport /* 1540ca5999fdSMike Rapoport * p?d_leaf() - true if this entry is a final mapping to a physical address. 1541ca5999fdSMike Rapoport * This differs from p?d_huge() by the fact that they are always available (if 1542ca5999fdSMike Rapoport * the architecture supports large pages at the appropriate level) even 1543ca5999fdSMike Rapoport * if CONFIG_HUGETLB_PAGE is not defined. 1544ca5999fdSMike Rapoport * Only meaningful when called on a valid entry. 1545ca5999fdSMike Rapoport */ 1546ca5999fdSMike Rapoport #ifndef pgd_leaf 1547ca5999fdSMike Rapoport #define pgd_leaf(x) 0 1548ca5999fdSMike Rapoport #endif 1549ca5999fdSMike Rapoport #ifndef p4d_leaf 1550ca5999fdSMike Rapoport #define p4d_leaf(x) 0 1551ca5999fdSMike Rapoport #endif 1552ca5999fdSMike Rapoport #ifndef pud_leaf 1553ca5999fdSMike Rapoport #define pud_leaf(x) 0 1554ca5999fdSMike Rapoport #endif 1555ca5999fdSMike Rapoport #ifndef pmd_leaf 1556ca5999fdSMike Rapoport #define pmd_leaf(x) 0 1557ca5999fdSMike Rapoport #endif 1558ca5999fdSMike Rapoport 1559560dabbdSPeter Zijlstra #ifndef pgd_leaf_size 1560560dabbdSPeter Zijlstra #define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT) 1561560dabbdSPeter Zijlstra #endif 1562560dabbdSPeter Zijlstra #ifndef p4d_leaf_size 1563560dabbdSPeter Zijlstra #define p4d_leaf_size(x) P4D_SIZE 1564560dabbdSPeter Zijlstra #endif 1565560dabbdSPeter Zijlstra #ifndef pud_leaf_size 1566560dabbdSPeter Zijlstra #define pud_leaf_size(x) PUD_SIZE 1567560dabbdSPeter Zijlstra #endif 1568560dabbdSPeter Zijlstra #ifndef pmd_leaf_size 1569560dabbdSPeter Zijlstra #define pmd_leaf_size(x) PMD_SIZE 1570560dabbdSPeter Zijlstra #endif 1571560dabbdSPeter Zijlstra #ifndef pte_leaf_size 1572560dabbdSPeter Zijlstra #define pte_leaf_size(x) PAGE_SIZE 1573560dabbdSPeter Zijlstra #endif 1574560dabbdSPeter Zijlstra 1575ca5999fdSMike Rapoport #endif /* _LINUX_PGTABLE_H */ 1576