xref: /linux-6.15/include/linux/pgtable.h (revision d15dfd31)
1ca5999fdSMike Rapoport /* SPDX-License-Identifier: GPL-2.0 */
2ca5999fdSMike Rapoport #ifndef _LINUX_PGTABLE_H
3ca5999fdSMike Rapoport #define _LINUX_PGTABLE_H
4ca5999fdSMike Rapoport 
5ca5999fdSMike Rapoport #include <linux/pfn.h>
6ca5999fdSMike Rapoport #include <asm/pgtable.h>
7ca5999fdSMike Rapoport 
8ca5999fdSMike Rapoport #ifndef __ASSEMBLY__
9ca5999fdSMike Rapoport #ifdef CONFIG_MMU
10ca5999fdSMike Rapoport 
11ca5999fdSMike Rapoport #include <linux/mm_types.h>
12ca5999fdSMike Rapoport #include <linux/bug.h>
13ca5999fdSMike Rapoport #include <linux/errno.h>
14ca5999fdSMike Rapoport #include <asm-generic/pgtable_uffd.h>
15ca5999fdSMike Rapoport 
16ca5999fdSMike Rapoport #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
17ca5999fdSMike Rapoport 	defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
18ca5999fdSMike Rapoport #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
19ca5999fdSMike Rapoport #endif
20ca5999fdSMike Rapoport 
21ca5999fdSMike Rapoport /*
22ca5999fdSMike Rapoport  * On almost all architectures and configurations, 0 can be used as the
23ca5999fdSMike Rapoport  * upper ceiling to free_pgtables(): on many architectures it has the same
24ca5999fdSMike Rapoport  * effect as using TASK_SIZE.  However, there is one configuration which
25ca5999fdSMike Rapoport  * must impose a more careful limit, to avoid freeing kernel pgtables.
26ca5999fdSMike Rapoport  */
27ca5999fdSMike Rapoport #ifndef USER_PGTABLES_CEILING
28ca5999fdSMike Rapoport #define USER_PGTABLES_CEILING	0UL
29ca5999fdSMike Rapoport #endif
30ca5999fdSMike Rapoport 
31e05c7b1fSMike Rapoport /*
32974b9b2cSMike Rapoport  * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
33974b9b2cSMike Rapoport  *
34974b9b2cSMike Rapoport  * The pXx_index() functions return the index of the entry in the page
35974b9b2cSMike Rapoport  * table page which would control the given virtual address
36974b9b2cSMike Rapoport  *
37974b9b2cSMike Rapoport  * As these functions may be used by the same code for different levels of
38974b9b2cSMike Rapoport  * the page table folding, they are always available, regardless of
39974b9b2cSMike Rapoport  * CONFIG_PGTABLE_LEVELS value. For the folded levels they simply return 0
40974b9b2cSMike Rapoport  * because in such cases PTRS_PER_PxD equals 1.
41974b9b2cSMike Rapoport  */
42974b9b2cSMike Rapoport 
43974b9b2cSMike Rapoport static inline unsigned long pte_index(unsigned long address)
44974b9b2cSMike Rapoport {
45974b9b2cSMike Rapoport 	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
46974b9b2cSMike Rapoport }
47974b9b2cSMike Rapoport 
48974b9b2cSMike Rapoport #ifndef pmd_index
49974b9b2cSMike Rapoport static inline unsigned long pmd_index(unsigned long address)
50974b9b2cSMike Rapoport {
51974b9b2cSMike Rapoport 	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
52974b9b2cSMike Rapoport }
53974b9b2cSMike Rapoport #define pmd_index pmd_index
54974b9b2cSMike Rapoport #endif
55974b9b2cSMike Rapoport 
56974b9b2cSMike Rapoport #ifndef pud_index
57974b9b2cSMike Rapoport static inline unsigned long pud_index(unsigned long address)
58974b9b2cSMike Rapoport {
59974b9b2cSMike Rapoport 	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
60974b9b2cSMike Rapoport }
61974b9b2cSMike Rapoport #define pud_index pud_index
62974b9b2cSMike Rapoport #endif
63974b9b2cSMike Rapoport 
64974b9b2cSMike Rapoport #ifndef pgd_index
65974b9b2cSMike Rapoport /* Must be a compile-time constant, so implement it as a macro */
66974b9b2cSMike Rapoport #define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
67974b9b2cSMike Rapoport #endif
68974b9b2cSMike Rapoport 
69974b9b2cSMike Rapoport #ifndef pte_offset_kernel
70974b9b2cSMike Rapoport static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
71974b9b2cSMike Rapoport {
72974b9b2cSMike Rapoport 	return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
73974b9b2cSMike Rapoport }
74974b9b2cSMike Rapoport #define pte_offset_kernel pte_offset_kernel
75974b9b2cSMike Rapoport #endif
76974b9b2cSMike Rapoport 
77974b9b2cSMike Rapoport #if defined(CONFIG_HIGHPTE)
78974b9b2cSMike Rapoport #define pte_offset_map(dir, address)				\
79974b9b2cSMike Rapoport 	((pte_t *)kmap_atomic(pmd_page(*(dir))) +		\
80974b9b2cSMike Rapoport 	 pte_index((address)))
81974b9b2cSMike Rapoport #define pte_unmap(pte) kunmap_atomic((pte))
82974b9b2cSMike Rapoport #else
83974b9b2cSMike Rapoport #define pte_offset_map(dir, address)	pte_offset_kernel((dir), (address))
84974b9b2cSMike Rapoport #define pte_unmap(pte) ((void)(pte))	/* NOP */
85974b9b2cSMike Rapoport #endif
86974b9b2cSMike Rapoport 
87974b9b2cSMike Rapoport /* Find an entry in the second-level page table.. */
88974b9b2cSMike Rapoport #ifndef pmd_offset
89974b9b2cSMike Rapoport static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
90974b9b2cSMike Rapoport {
91974b9b2cSMike Rapoport 	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
92974b9b2cSMike Rapoport }
93974b9b2cSMike Rapoport #define pmd_offset pmd_offset
94974b9b2cSMike Rapoport #endif
95974b9b2cSMike Rapoport 
96974b9b2cSMike Rapoport #ifndef pud_offset
97974b9b2cSMike Rapoport static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
98974b9b2cSMike Rapoport {
99974b9b2cSMike Rapoport 	return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
100974b9b2cSMike Rapoport }
101974b9b2cSMike Rapoport #define pud_offset pud_offset
102974b9b2cSMike Rapoport #endif
103974b9b2cSMike Rapoport 
104974b9b2cSMike Rapoport static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
105974b9b2cSMike Rapoport {
106974b9b2cSMike Rapoport 	return (pgd + pgd_index(address));
107974b9b2cSMike Rapoport };
108974b9b2cSMike Rapoport 
109974b9b2cSMike Rapoport /*
110974b9b2cSMike Rapoport  * a shortcut to get a pgd_t in a given mm
111974b9b2cSMike Rapoport  */
112974b9b2cSMike Rapoport #ifndef pgd_offset
113974b9b2cSMike Rapoport #define pgd_offset(mm, address)		pgd_offset_pgd((mm)->pgd, (address))
114974b9b2cSMike Rapoport #endif
115974b9b2cSMike Rapoport 
116974b9b2cSMike Rapoport /*
117974b9b2cSMike Rapoport  * a shortcut which implies the use of the kernel's pgd, instead
118974b9b2cSMike Rapoport  * of a process's
119974b9b2cSMike Rapoport  */
120bd05220cSJessica Clarke #ifndef pgd_offset_k
121974b9b2cSMike Rapoport #define pgd_offset_k(address)		pgd_offset(&init_mm, (address))
122bd05220cSJessica Clarke #endif
123974b9b2cSMike Rapoport 
124974b9b2cSMike Rapoport /*
125e05c7b1fSMike Rapoport  * In many cases it is known that a virtual address is mapped at PMD or PTE
126e05c7b1fSMike Rapoport  * level, so instead of traversing all the page table levels, we can get a
127e05c7b1fSMike Rapoport  * pointer to the PMD entry in user or kernel page table or translate a virtual
128e05c7b1fSMike Rapoport  * address to the pointer in the PTE in the kernel page tables with simple
129e05c7b1fSMike Rapoport  * helpers.
130e05c7b1fSMike Rapoport  */
131e05c7b1fSMike Rapoport static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va)
132e05c7b1fSMike Rapoport {
133e05c7b1fSMike Rapoport 	return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), va);
134e05c7b1fSMike Rapoport }
135e05c7b1fSMike Rapoport 
136e05c7b1fSMike Rapoport static inline pmd_t *pmd_off_k(unsigned long va)
137e05c7b1fSMike Rapoport {
138e05c7b1fSMike Rapoport 	return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va);
139e05c7b1fSMike Rapoport }
140e05c7b1fSMike Rapoport 
141e05c7b1fSMike Rapoport static inline pte_t *virt_to_kpte(unsigned long vaddr)
142e05c7b1fSMike Rapoport {
143e05c7b1fSMike Rapoport 	pmd_t *pmd = pmd_off_k(vaddr);
144e05c7b1fSMike Rapoport 
145e05c7b1fSMike Rapoport 	return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
146e05c7b1fSMike Rapoport }
147e05c7b1fSMike Rapoport 
148ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
149ca5999fdSMike Rapoport extern int ptep_set_access_flags(struct vm_area_struct *vma,
150ca5999fdSMike Rapoport 				 unsigned long address, pte_t *ptep,
151ca5999fdSMike Rapoport 				 pte_t entry, int dirty);
152ca5999fdSMike Rapoport #endif
153ca5999fdSMike Rapoport 
154ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
155ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
156ca5999fdSMike Rapoport extern int pmdp_set_access_flags(struct vm_area_struct *vma,
157ca5999fdSMike Rapoport 				 unsigned long address, pmd_t *pmdp,
158ca5999fdSMike Rapoport 				 pmd_t entry, int dirty);
159ca5999fdSMike Rapoport extern int pudp_set_access_flags(struct vm_area_struct *vma,
160ca5999fdSMike Rapoport 				 unsigned long address, pud_t *pudp,
161ca5999fdSMike Rapoport 				 pud_t entry, int dirty);
162ca5999fdSMike Rapoport #else
163ca5999fdSMike Rapoport static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
164ca5999fdSMike Rapoport 					unsigned long address, pmd_t *pmdp,
165ca5999fdSMike Rapoport 					pmd_t entry, int dirty)
166ca5999fdSMike Rapoport {
167ca5999fdSMike Rapoport 	BUILD_BUG();
168ca5999fdSMike Rapoport 	return 0;
169ca5999fdSMike Rapoport }
170ca5999fdSMike Rapoport static inline int pudp_set_access_flags(struct vm_area_struct *vma,
171ca5999fdSMike Rapoport 					unsigned long address, pud_t *pudp,
172ca5999fdSMike Rapoport 					pud_t entry, int dirty)
173ca5999fdSMike Rapoport {
174ca5999fdSMike Rapoport 	BUILD_BUG();
175ca5999fdSMike Rapoport 	return 0;
176ca5999fdSMike Rapoport }
177ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
178ca5999fdSMike Rapoport #endif
179ca5999fdSMike Rapoport 
180ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
181ca5999fdSMike Rapoport static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
182ca5999fdSMike Rapoport 					    unsigned long address,
183ca5999fdSMike Rapoport 					    pte_t *ptep)
184ca5999fdSMike Rapoport {
185ca5999fdSMike Rapoport 	pte_t pte = *ptep;
186ca5999fdSMike Rapoport 	int r = 1;
187ca5999fdSMike Rapoport 	if (!pte_young(pte))
188ca5999fdSMike Rapoport 		r = 0;
189ca5999fdSMike Rapoport 	else
190ca5999fdSMike Rapoport 		set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
191ca5999fdSMike Rapoport 	return r;
192ca5999fdSMike Rapoport }
193ca5999fdSMike Rapoport #endif
194ca5999fdSMike Rapoport 
195ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
196ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
197ca5999fdSMike Rapoport static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
198ca5999fdSMike Rapoport 					    unsigned long address,
199ca5999fdSMike Rapoport 					    pmd_t *pmdp)
200ca5999fdSMike Rapoport {
201ca5999fdSMike Rapoport 	pmd_t pmd = *pmdp;
202ca5999fdSMike Rapoport 	int r = 1;
203ca5999fdSMike Rapoport 	if (!pmd_young(pmd))
204ca5999fdSMike Rapoport 		r = 0;
205ca5999fdSMike Rapoport 	else
206ca5999fdSMike Rapoport 		set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
207ca5999fdSMike Rapoport 	return r;
208ca5999fdSMike Rapoport }
209ca5999fdSMike Rapoport #else
210ca5999fdSMike Rapoport static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
211ca5999fdSMike Rapoport 					    unsigned long address,
212ca5999fdSMike Rapoport 					    pmd_t *pmdp)
213ca5999fdSMike Rapoport {
214ca5999fdSMike Rapoport 	BUILD_BUG();
215ca5999fdSMike Rapoport 	return 0;
216ca5999fdSMike Rapoport }
217ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
218ca5999fdSMike Rapoport #endif
219ca5999fdSMike Rapoport 
220ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
221ca5999fdSMike Rapoport int ptep_clear_flush_young(struct vm_area_struct *vma,
222ca5999fdSMike Rapoport 			   unsigned long address, pte_t *ptep);
223ca5999fdSMike Rapoport #endif
224ca5999fdSMike Rapoport 
225ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
226ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
227ca5999fdSMike Rapoport extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
228ca5999fdSMike Rapoport 				  unsigned long address, pmd_t *pmdp);
229ca5999fdSMike Rapoport #else
230ca5999fdSMike Rapoport /*
231ca5999fdSMike Rapoport  * Despite relevant to THP only, this API is called from generic rmap code
232ca5999fdSMike Rapoport  * under PageTransHuge(), hence needs a dummy implementation for !THP
233ca5999fdSMike Rapoport  */
234ca5999fdSMike Rapoport static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
235ca5999fdSMike Rapoport 					 unsigned long address, pmd_t *pmdp)
236ca5999fdSMike Rapoport {
237ca5999fdSMike Rapoport 	BUILD_BUG();
238ca5999fdSMike Rapoport 	return 0;
239ca5999fdSMike Rapoport }
240ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
241ca5999fdSMike Rapoport #endif
242ca5999fdSMike Rapoport 
243ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
244ca5999fdSMike Rapoport static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
245ca5999fdSMike Rapoport 				       unsigned long address,
246ca5999fdSMike Rapoport 				       pte_t *ptep)
247ca5999fdSMike Rapoport {
248ca5999fdSMike Rapoport 	pte_t pte = *ptep;
249ca5999fdSMike Rapoport 	pte_clear(mm, address, ptep);
250ca5999fdSMike Rapoport 	return pte;
251ca5999fdSMike Rapoport }
252ca5999fdSMike Rapoport #endif
253ca5999fdSMike Rapoport 
254481e980aSChristophe Leroy #ifndef __HAVE_ARCH_PTEP_GET
255481e980aSChristophe Leroy static inline pte_t ptep_get(pte_t *ptep)
256481e980aSChristophe Leroy {
257481e980aSChristophe Leroy 	return READ_ONCE(*ptep);
258481e980aSChristophe Leroy }
259481e980aSChristophe Leroy #endif
260481e980aSChristophe Leroy 
2612a4a06daSPeter Zijlstra #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
2622a4a06daSPeter Zijlstra /*
2632a4a06daSPeter Zijlstra  * WARNING: only to be used in the get_user_pages_fast() implementation.
2642a4a06daSPeter Zijlstra  *
2652a4a06daSPeter Zijlstra  * With get_user_pages_fast(), we walk down the pagetables without taking any
2662a4a06daSPeter Zijlstra  * locks.  For this we would like to load the pointers atomically, but sometimes
2672a4a06daSPeter Zijlstra  * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE).  What
2682a4a06daSPeter Zijlstra  * we do have is the guarantee that a PTE will only either go from not present
2692a4a06daSPeter Zijlstra  * to present, or present to not present or both -- it will not switch to a
2702a4a06daSPeter Zijlstra  * completely different present page without a TLB flush in between; something
2712a4a06daSPeter Zijlstra  * that we are blocking by holding interrupts off.
2722a4a06daSPeter Zijlstra  *
2732a4a06daSPeter Zijlstra  * Setting ptes from not present to present goes:
2742a4a06daSPeter Zijlstra  *
2752a4a06daSPeter Zijlstra  *   ptep->pte_high = h;
2762a4a06daSPeter Zijlstra  *   smp_wmb();
2772a4a06daSPeter Zijlstra  *   ptep->pte_low = l;
2782a4a06daSPeter Zijlstra  *
2792a4a06daSPeter Zijlstra  * And present to not present goes:
2802a4a06daSPeter Zijlstra  *
2812a4a06daSPeter Zijlstra  *   ptep->pte_low = 0;
2822a4a06daSPeter Zijlstra  *   smp_wmb();
2832a4a06daSPeter Zijlstra  *   ptep->pte_high = 0;
2842a4a06daSPeter Zijlstra  *
2852a4a06daSPeter Zijlstra  * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
2862a4a06daSPeter Zijlstra  * We load pte_high *after* loading pte_low, which ensures we don't see an older
2872a4a06daSPeter Zijlstra  * value of pte_high.  *Then* we recheck pte_low, which ensures that we haven't
2882a4a06daSPeter Zijlstra  * picked up a changed pte high. We might have gotten rubbish values from
2892a4a06daSPeter Zijlstra  * pte_low and pte_high, but we are guaranteed that pte_low will not have the
2902a4a06daSPeter Zijlstra  * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
2912a4a06daSPeter Zijlstra  * operates on present ptes we're safe.
2922a4a06daSPeter Zijlstra  */
2932a4a06daSPeter Zijlstra static inline pte_t ptep_get_lockless(pte_t *ptep)
2942a4a06daSPeter Zijlstra {
2952a4a06daSPeter Zijlstra 	pte_t pte;
2962a4a06daSPeter Zijlstra 
2972a4a06daSPeter Zijlstra 	do {
2982a4a06daSPeter Zijlstra 		pte.pte_low = ptep->pte_low;
2992a4a06daSPeter Zijlstra 		smp_rmb();
3002a4a06daSPeter Zijlstra 		pte.pte_high = ptep->pte_high;
3012a4a06daSPeter Zijlstra 		smp_rmb();
3022a4a06daSPeter Zijlstra 	} while (unlikely(pte.pte_low != ptep->pte_low));
3032a4a06daSPeter Zijlstra 
3042a4a06daSPeter Zijlstra 	return pte;
3052a4a06daSPeter Zijlstra }
3062a4a06daSPeter Zijlstra #else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
3072a4a06daSPeter Zijlstra /*
3082a4a06daSPeter Zijlstra  * We require that the PTE can be read atomically.
3092a4a06daSPeter Zijlstra  */
3102a4a06daSPeter Zijlstra static inline pte_t ptep_get_lockless(pte_t *ptep)
3112a4a06daSPeter Zijlstra {
3122a4a06daSPeter Zijlstra 	return ptep_get(ptep);
3132a4a06daSPeter Zijlstra }
3142a4a06daSPeter Zijlstra #endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
3152a4a06daSPeter Zijlstra 
316ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
317ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
318ca5999fdSMike Rapoport static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
319ca5999fdSMike Rapoport 					    unsigned long address,
320ca5999fdSMike Rapoport 					    pmd_t *pmdp)
321ca5999fdSMike Rapoport {
322ca5999fdSMike Rapoport 	pmd_t pmd = *pmdp;
323ca5999fdSMike Rapoport 	pmd_clear(pmdp);
324ca5999fdSMike Rapoport 	return pmd;
325ca5999fdSMike Rapoport }
326ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
327ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
328ca5999fdSMike Rapoport static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
329ca5999fdSMike Rapoport 					    unsigned long address,
330ca5999fdSMike Rapoport 					    pud_t *pudp)
331ca5999fdSMike Rapoport {
332ca5999fdSMike Rapoport 	pud_t pud = *pudp;
333ca5999fdSMike Rapoport 
334ca5999fdSMike Rapoport 	pud_clear(pudp);
335ca5999fdSMike Rapoport 	return pud;
336ca5999fdSMike Rapoport }
337ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
338ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
339ca5999fdSMike Rapoport 
340ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
341ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
342ca5999fdSMike Rapoport static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
343ca5999fdSMike Rapoport 					    unsigned long address, pmd_t *pmdp,
344ca5999fdSMike Rapoport 					    int full)
345ca5999fdSMike Rapoport {
346ca5999fdSMike Rapoport 	return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
347ca5999fdSMike Rapoport }
348ca5999fdSMike Rapoport #endif
349ca5999fdSMike Rapoport 
350ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
351ca5999fdSMike Rapoport static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
352ca5999fdSMike Rapoport 					    unsigned long address, pud_t *pudp,
353ca5999fdSMike Rapoport 					    int full)
354ca5999fdSMike Rapoport {
355ca5999fdSMike Rapoport 	return pudp_huge_get_and_clear(mm, address, pudp);
356ca5999fdSMike Rapoport }
357ca5999fdSMike Rapoport #endif
358ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
359ca5999fdSMike Rapoport 
360ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
361ca5999fdSMike Rapoport static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
362ca5999fdSMike Rapoport 					    unsigned long address, pte_t *ptep,
363ca5999fdSMike Rapoport 					    int full)
364ca5999fdSMike Rapoport {
365ca5999fdSMike Rapoport 	pte_t pte;
366ca5999fdSMike Rapoport 	pte = ptep_get_and_clear(mm, address, ptep);
367ca5999fdSMike Rapoport 	return pte;
368ca5999fdSMike Rapoport }
369ca5999fdSMike Rapoport #endif
370ca5999fdSMike Rapoport 
371ca5999fdSMike Rapoport 
372ca5999fdSMike Rapoport /*
373ca5999fdSMike Rapoport  * If two threads concurrently fault at the same page, the thread that
374ca5999fdSMike Rapoport  * won the race updates the PTE and its local TLB/Cache. The other thread
375ca5999fdSMike Rapoport  * gives up, simply does nothing, and continues; on architectures where
376ca5999fdSMike Rapoport  * software can update TLB,  local TLB can be updated here to avoid next page
377ca5999fdSMike Rapoport  * fault. This function updates TLB only, do nothing with cache or others.
378ca5999fdSMike Rapoport  * It is the difference with function update_mmu_cache.
379ca5999fdSMike Rapoport  */
380ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_UPDATE_MMU_TLB
381ca5999fdSMike Rapoport static inline void update_mmu_tlb(struct vm_area_struct *vma,
382ca5999fdSMike Rapoport 				unsigned long address, pte_t *ptep)
383ca5999fdSMike Rapoport {
384ca5999fdSMike Rapoport }
385ca5999fdSMike Rapoport #define __HAVE_ARCH_UPDATE_MMU_TLB
386ca5999fdSMike Rapoport #endif
387ca5999fdSMike Rapoport 
388ca5999fdSMike Rapoport /*
389ca5999fdSMike Rapoport  * Some architectures may be able to avoid expensive synchronization
390ca5999fdSMike Rapoport  * primitives when modifications are made to PTE's which are already
391ca5999fdSMike Rapoport  * not present, or in the process of an address space destruction.
392ca5999fdSMike Rapoport  */
393ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
394ca5999fdSMike Rapoport static inline void pte_clear_not_present_full(struct mm_struct *mm,
395ca5999fdSMike Rapoport 					      unsigned long address,
396ca5999fdSMike Rapoport 					      pte_t *ptep,
397ca5999fdSMike Rapoport 					      int full)
398ca5999fdSMike Rapoport {
399ca5999fdSMike Rapoport 	pte_clear(mm, address, ptep);
400ca5999fdSMike Rapoport }
401ca5999fdSMike Rapoport #endif
402ca5999fdSMike Rapoport 
403ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
404ca5999fdSMike Rapoport extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
405ca5999fdSMike Rapoport 			      unsigned long address,
406ca5999fdSMike Rapoport 			      pte_t *ptep);
407ca5999fdSMike Rapoport #endif
408ca5999fdSMike Rapoport 
409ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
410ca5999fdSMike Rapoport extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
411ca5999fdSMike Rapoport 			      unsigned long address,
412ca5999fdSMike Rapoport 			      pmd_t *pmdp);
413ca5999fdSMike Rapoport extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
414ca5999fdSMike Rapoport 			      unsigned long address,
415ca5999fdSMike Rapoport 			      pud_t *pudp);
416ca5999fdSMike Rapoport #endif
417ca5999fdSMike Rapoport 
418ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
419ca5999fdSMike Rapoport struct mm_struct;
420ca5999fdSMike Rapoport static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
421ca5999fdSMike Rapoport {
422ca5999fdSMike Rapoport 	pte_t old_pte = *ptep;
423ca5999fdSMike Rapoport 	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
424ca5999fdSMike Rapoport }
425ca5999fdSMike Rapoport #endif
426ca5999fdSMike Rapoport 
427ca5999fdSMike Rapoport /*
428ca5999fdSMike Rapoport  * On some architectures hardware does not set page access bit when accessing
429ca5999fdSMike Rapoport  * memory page, it is responsibilty of software setting this bit. It brings
430ca5999fdSMike Rapoport  * out extra page fault penalty to track page access bit. For optimization page
431ca5999fdSMike Rapoport  * access bit can be set during all page fault flow on these arches.
432ca5999fdSMike Rapoport  * To be differentiate with macro pte_mkyoung, this macro is used on platforms
433ca5999fdSMike Rapoport  * where software maintains page access bit.
434ca5999fdSMike Rapoport  */
435ca5999fdSMike Rapoport #ifndef pte_savedwrite
436ca5999fdSMike Rapoport #define pte_savedwrite pte_write
437ca5999fdSMike Rapoport #endif
438ca5999fdSMike Rapoport 
439ca5999fdSMike Rapoport #ifndef pte_mk_savedwrite
440ca5999fdSMike Rapoport #define pte_mk_savedwrite pte_mkwrite
441ca5999fdSMike Rapoport #endif
442ca5999fdSMike Rapoport 
443ca5999fdSMike Rapoport #ifndef pte_clear_savedwrite
444ca5999fdSMike Rapoport #define pte_clear_savedwrite pte_wrprotect
445ca5999fdSMike Rapoport #endif
446ca5999fdSMike Rapoport 
447ca5999fdSMike Rapoport #ifndef pmd_savedwrite
448ca5999fdSMike Rapoport #define pmd_savedwrite pmd_write
449ca5999fdSMike Rapoport #endif
450ca5999fdSMike Rapoport 
451ca5999fdSMike Rapoport #ifndef pmd_mk_savedwrite
452ca5999fdSMike Rapoport #define pmd_mk_savedwrite pmd_mkwrite
453ca5999fdSMike Rapoport #endif
454ca5999fdSMike Rapoport 
455ca5999fdSMike Rapoport #ifndef pmd_clear_savedwrite
456ca5999fdSMike Rapoport #define pmd_clear_savedwrite pmd_wrprotect
457ca5999fdSMike Rapoport #endif
458ca5999fdSMike Rapoport 
459ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
460ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
461ca5999fdSMike Rapoport static inline void pmdp_set_wrprotect(struct mm_struct *mm,
462ca5999fdSMike Rapoport 				      unsigned long address, pmd_t *pmdp)
463ca5999fdSMike Rapoport {
464ca5999fdSMike Rapoport 	pmd_t old_pmd = *pmdp;
465ca5999fdSMike Rapoport 	set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
466ca5999fdSMike Rapoport }
467ca5999fdSMike Rapoport #else
468ca5999fdSMike Rapoport static inline void pmdp_set_wrprotect(struct mm_struct *mm,
469ca5999fdSMike Rapoport 				      unsigned long address, pmd_t *pmdp)
470ca5999fdSMike Rapoport {
471ca5999fdSMike Rapoport 	BUILD_BUG();
472ca5999fdSMike Rapoport }
473ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
474ca5999fdSMike Rapoport #endif
475ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
476ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
477ca5999fdSMike Rapoport static inline void pudp_set_wrprotect(struct mm_struct *mm,
478ca5999fdSMike Rapoport 				      unsigned long address, pud_t *pudp)
479ca5999fdSMike Rapoport {
480ca5999fdSMike Rapoport 	pud_t old_pud = *pudp;
481ca5999fdSMike Rapoport 
482ca5999fdSMike Rapoport 	set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
483ca5999fdSMike Rapoport }
484ca5999fdSMike Rapoport #else
485ca5999fdSMike Rapoport static inline void pudp_set_wrprotect(struct mm_struct *mm,
486ca5999fdSMike Rapoport 				      unsigned long address, pud_t *pudp)
487ca5999fdSMike Rapoport {
488ca5999fdSMike Rapoport 	BUILD_BUG();
489ca5999fdSMike Rapoport }
490ca5999fdSMike Rapoport #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
491ca5999fdSMike Rapoport #endif
492ca5999fdSMike Rapoport 
493ca5999fdSMike Rapoport #ifndef pmdp_collapse_flush
494ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
495ca5999fdSMike Rapoport extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
496ca5999fdSMike Rapoport 				 unsigned long address, pmd_t *pmdp);
497ca5999fdSMike Rapoport #else
498ca5999fdSMike Rapoport static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
499ca5999fdSMike Rapoport 					unsigned long address,
500ca5999fdSMike Rapoport 					pmd_t *pmdp)
501ca5999fdSMike Rapoport {
502ca5999fdSMike Rapoport 	BUILD_BUG();
503ca5999fdSMike Rapoport 	return *pmdp;
504ca5999fdSMike Rapoport }
505ca5999fdSMike Rapoport #define pmdp_collapse_flush pmdp_collapse_flush
506ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
507ca5999fdSMike Rapoport #endif
508ca5999fdSMike Rapoport 
509ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
510ca5999fdSMike Rapoport extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
511ca5999fdSMike Rapoport 				       pgtable_t pgtable);
512ca5999fdSMike Rapoport #endif
513ca5999fdSMike Rapoport 
514ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
515ca5999fdSMike Rapoport extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
516ca5999fdSMike Rapoport #endif
517ca5999fdSMike Rapoport 
518ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
519ca5999fdSMike Rapoport /*
520ca5999fdSMike Rapoport  * This is an implementation of pmdp_establish() that is only suitable for an
521ca5999fdSMike Rapoport  * architecture that doesn't have hardware dirty/accessed bits. In this case we
522ca5999fdSMike Rapoport  * can't race with CPU which sets these bits and non-atomic aproach is fine.
523ca5999fdSMike Rapoport  */
524ca5999fdSMike Rapoport static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
525ca5999fdSMike Rapoport 		unsigned long address, pmd_t *pmdp, pmd_t pmd)
526ca5999fdSMike Rapoport {
527ca5999fdSMike Rapoport 	pmd_t old_pmd = *pmdp;
528ca5999fdSMike Rapoport 	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
529ca5999fdSMike Rapoport 	return old_pmd;
530ca5999fdSMike Rapoport }
531ca5999fdSMike Rapoport #endif
532ca5999fdSMike Rapoport 
533ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_INVALIDATE
534ca5999fdSMike Rapoport extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
535ca5999fdSMike Rapoport 			    pmd_t *pmdp);
536ca5999fdSMike Rapoport #endif
537ca5999fdSMike Rapoport 
538ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_SAME
539ca5999fdSMike Rapoport static inline int pte_same(pte_t pte_a, pte_t pte_b)
540ca5999fdSMike Rapoport {
541ca5999fdSMike Rapoport 	return pte_val(pte_a) == pte_val(pte_b);
542ca5999fdSMike Rapoport }
543ca5999fdSMike Rapoport #endif
544ca5999fdSMike Rapoport 
545ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_UNUSED
546ca5999fdSMike Rapoport /*
547ca5999fdSMike Rapoport  * Some architectures provide facilities to virtualization guests
548ca5999fdSMike Rapoport  * so that they can flag allocated pages as unused. This allows the
549ca5999fdSMike Rapoport  * host to transparently reclaim unused pages. This function returns
550ca5999fdSMike Rapoport  * whether the pte's page is unused.
551ca5999fdSMike Rapoport  */
552ca5999fdSMike Rapoport static inline int pte_unused(pte_t pte)
553ca5999fdSMike Rapoport {
554ca5999fdSMike Rapoport 	return 0;
555ca5999fdSMike Rapoport }
556ca5999fdSMike Rapoport #endif
557ca5999fdSMike Rapoport 
558ca5999fdSMike Rapoport #ifndef pte_access_permitted
559ca5999fdSMike Rapoport #define pte_access_permitted(pte, write) \
560ca5999fdSMike Rapoport 	(pte_present(pte) && (!(write) || pte_write(pte)))
561ca5999fdSMike Rapoport #endif
562ca5999fdSMike Rapoport 
563ca5999fdSMike Rapoport #ifndef pmd_access_permitted
564ca5999fdSMike Rapoport #define pmd_access_permitted(pmd, write) \
565ca5999fdSMike Rapoport 	(pmd_present(pmd) && (!(write) || pmd_write(pmd)))
566ca5999fdSMike Rapoport #endif
567ca5999fdSMike Rapoport 
568ca5999fdSMike Rapoport #ifndef pud_access_permitted
569ca5999fdSMike Rapoport #define pud_access_permitted(pud, write) \
570ca5999fdSMike Rapoport 	(pud_present(pud) && (!(write) || pud_write(pud)))
571ca5999fdSMike Rapoport #endif
572ca5999fdSMike Rapoport 
573ca5999fdSMike Rapoport #ifndef p4d_access_permitted
574ca5999fdSMike Rapoport #define p4d_access_permitted(p4d, write) \
575ca5999fdSMike Rapoport 	(p4d_present(p4d) && (!(write) || p4d_write(p4d)))
576ca5999fdSMike Rapoport #endif
577ca5999fdSMike Rapoport 
578ca5999fdSMike Rapoport #ifndef pgd_access_permitted
579ca5999fdSMike Rapoport #define pgd_access_permitted(pgd, write) \
580ca5999fdSMike Rapoport 	(pgd_present(pgd) && (!(write) || pgd_write(pgd)))
581ca5999fdSMike Rapoport #endif
582ca5999fdSMike Rapoport 
583ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMD_SAME
584ca5999fdSMike Rapoport static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
585ca5999fdSMike Rapoport {
586ca5999fdSMike Rapoport 	return pmd_val(pmd_a) == pmd_val(pmd_b);
587ca5999fdSMike Rapoport }
588ca5999fdSMike Rapoport 
589ca5999fdSMike Rapoport static inline int pud_same(pud_t pud_a, pud_t pud_b)
590ca5999fdSMike Rapoport {
591ca5999fdSMike Rapoport 	return pud_val(pud_a) == pud_val(pud_b);
592ca5999fdSMike Rapoport }
593ca5999fdSMike Rapoport #endif
594ca5999fdSMike Rapoport 
595ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_P4D_SAME
596ca5999fdSMike Rapoport static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b)
597ca5999fdSMike Rapoport {
598ca5999fdSMike Rapoport 	return p4d_val(p4d_a) == p4d_val(p4d_b);
599ca5999fdSMike Rapoport }
600ca5999fdSMike Rapoport #endif
601ca5999fdSMike Rapoport 
602ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGD_SAME
603ca5999fdSMike Rapoport static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
604ca5999fdSMike Rapoport {
605ca5999fdSMike Rapoport 	return pgd_val(pgd_a) == pgd_val(pgd_b);
606ca5999fdSMike Rapoport }
607ca5999fdSMike Rapoport #endif
608ca5999fdSMike Rapoport 
609ca5999fdSMike Rapoport /*
610ca5999fdSMike Rapoport  * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
611ca5999fdSMike Rapoport  * TLB flush will be required as a result of the "set". For example, use
612ca5999fdSMike Rapoport  * in scenarios where it is known ahead of time that the routine is
613ca5999fdSMike Rapoport  * setting non-present entries, or re-setting an existing entry to the
614ca5999fdSMike Rapoport  * same value. Otherwise, use the typical "set" helpers and flush the
615ca5999fdSMike Rapoport  * TLB.
616ca5999fdSMike Rapoport  */
617ca5999fdSMike Rapoport #define set_pte_safe(ptep, pte) \
618ca5999fdSMike Rapoport ({ \
619ca5999fdSMike Rapoport 	WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
620ca5999fdSMike Rapoport 	set_pte(ptep, pte); \
621ca5999fdSMike Rapoport })
622ca5999fdSMike Rapoport 
623ca5999fdSMike Rapoport #define set_pmd_safe(pmdp, pmd) \
624ca5999fdSMike Rapoport ({ \
625ca5999fdSMike Rapoport 	WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
626ca5999fdSMike Rapoport 	set_pmd(pmdp, pmd); \
627ca5999fdSMike Rapoport })
628ca5999fdSMike Rapoport 
629ca5999fdSMike Rapoport #define set_pud_safe(pudp, pud) \
630ca5999fdSMike Rapoport ({ \
631ca5999fdSMike Rapoport 	WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
632ca5999fdSMike Rapoport 	set_pud(pudp, pud); \
633ca5999fdSMike Rapoport })
634ca5999fdSMike Rapoport 
635ca5999fdSMike Rapoport #define set_p4d_safe(p4dp, p4d) \
636ca5999fdSMike Rapoport ({ \
637ca5999fdSMike Rapoport 	WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
638ca5999fdSMike Rapoport 	set_p4d(p4dp, p4d); \
639ca5999fdSMike Rapoport })
640ca5999fdSMike Rapoport 
641ca5999fdSMike Rapoport #define set_pgd_safe(pgdp, pgd) \
642ca5999fdSMike Rapoport ({ \
643ca5999fdSMike Rapoport 	WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
644ca5999fdSMike Rapoport 	set_pgd(pgdp, pgd); \
645ca5999fdSMike Rapoport })
646ca5999fdSMike Rapoport 
647ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_DO_SWAP_PAGE
648ca5999fdSMike Rapoport /*
649ca5999fdSMike Rapoport  * Some architectures support metadata associated with a page. When a
650ca5999fdSMike Rapoport  * page is being swapped out, this metadata must be saved so it can be
651ca5999fdSMike Rapoport  * restored when the page is swapped back in. SPARC M7 and newer
652ca5999fdSMike Rapoport  * processors support an ADI (Application Data Integrity) tag for the
653ca5999fdSMike Rapoport  * page as metadata for the page. arch_do_swap_page() can restore this
654ca5999fdSMike Rapoport  * metadata when a page is swapped back in.
655ca5999fdSMike Rapoport  */
656ca5999fdSMike Rapoport static inline void arch_do_swap_page(struct mm_struct *mm,
657ca5999fdSMike Rapoport 				     struct vm_area_struct *vma,
658ca5999fdSMike Rapoport 				     unsigned long addr,
659ca5999fdSMike Rapoport 				     pte_t pte, pte_t oldpte)
660ca5999fdSMike Rapoport {
661ca5999fdSMike Rapoport 
662ca5999fdSMike Rapoport }
663ca5999fdSMike Rapoport #endif
664ca5999fdSMike Rapoport 
665ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_UNMAP_ONE
666ca5999fdSMike Rapoport /*
667ca5999fdSMike Rapoport  * Some architectures support metadata associated with a page. When a
668ca5999fdSMike Rapoport  * page is being swapped out, this metadata must be saved so it can be
669ca5999fdSMike Rapoport  * restored when the page is swapped back in. SPARC M7 and newer
670ca5999fdSMike Rapoport  * processors support an ADI (Application Data Integrity) tag for the
671ca5999fdSMike Rapoport  * page as metadata for the page. arch_unmap_one() can save this
672ca5999fdSMike Rapoport  * metadata on a swap-out of a page.
673ca5999fdSMike Rapoport  */
674ca5999fdSMike Rapoport static inline int arch_unmap_one(struct mm_struct *mm,
675ca5999fdSMike Rapoport 				  struct vm_area_struct *vma,
676ca5999fdSMike Rapoport 				  unsigned long addr,
677ca5999fdSMike Rapoport 				  pte_t orig_pte)
678ca5999fdSMike Rapoport {
679ca5999fdSMike Rapoport 	return 0;
680ca5999fdSMike Rapoport }
681ca5999fdSMike Rapoport #endif
682ca5999fdSMike Rapoport 
6838a84802eSSteven Price /*
6848a84802eSSteven Price  * Allow architectures to preserve additional metadata associated with
6858a84802eSSteven Price  * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function
6868a84802eSSteven Price  * prototypes must be defined in the arch-specific asm/pgtable.h file.
6878a84802eSSteven Price  */
6888a84802eSSteven Price #ifndef __HAVE_ARCH_PREPARE_TO_SWAP
6898a84802eSSteven Price static inline int arch_prepare_to_swap(struct page *page)
6908a84802eSSteven Price {
6918a84802eSSteven Price 	return 0;
6928a84802eSSteven Price }
6938a84802eSSteven Price #endif
6948a84802eSSteven Price 
6958a84802eSSteven Price #ifndef __HAVE_ARCH_SWAP_INVALIDATE
6968a84802eSSteven Price static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
6978a84802eSSteven Price {
6988a84802eSSteven Price }
6998a84802eSSteven Price 
7008a84802eSSteven Price static inline void arch_swap_invalidate_area(int type)
7018a84802eSSteven Price {
7028a84802eSSteven Price }
7038a84802eSSteven Price #endif
7048a84802eSSteven Price 
7058a84802eSSteven Price #ifndef __HAVE_ARCH_SWAP_RESTORE
7068a84802eSSteven Price static inline void arch_swap_restore(swp_entry_t entry, struct page *page)
7078a84802eSSteven Price {
7088a84802eSSteven Price }
7098a84802eSSteven Price #endif
7108a84802eSSteven Price 
711ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
712ca5999fdSMike Rapoport #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
713ca5999fdSMike Rapoport #endif
714ca5999fdSMike Rapoport 
715ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_MOVE_PTE
716ca5999fdSMike Rapoport #define move_pte(pte, prot, old_addr, new_addr)	(pte)
717ca5999fdSMike Rapoport #endif
718ca5999fdSMike Rapoport 
719ca5999fdSMike Rapoport #ifndef pte_accessible
720ca5999fdSMike Rapoport # define pte_accessible(mm, pte)	((void)(pte), 1)
721ca5999fdSMike Rapoport #endif
722ca5999fdSMike Rapoport 
723ca5999fdSMike Rapoport #ifndef flush_tlb_fix_spurious_fault
724ca5999fdSMike Rapoport #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
725ca5999fdSMike Rapoport #endif
726ca5999fdSMike Rapoport 
727ca5999fdSMike Rapoport /*
728ca5999fdSMike Rapoport  * When walking page tables, get the address of the next boundary,
729ca5999fdSMike Rapoport  * or the end address of the range if that comes earlier.  Although no
730ca5999fdSMike Rapoport  * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
731ca5999fdSMike Rapoport  */
732ca5999fdSMike Rapoport 
733ca5999fdSMike Rapoport #define pgd_addr_end(addr, end)						\
734ca5999fdSMike Rapoport ({	unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;	\
735ca5999fdSMike Rapoport 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
736ca5999fdSMike Rapoport })
737ca5999fdSMike Rapoport 
738ca5999fdSMike Rapoport #ifndef p4d_addr_end
739ca5999fdSMike Rapoport #define p4d_addr_end(addr, end)						\
740ca5999fdSMike Rapoport ({	unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK;	\
741ca5999fdSMike Rapoport 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
742ca5999fdSMike Rapoport })
743ca5999fdSMike Rapoport #endif
744ca5999fdSMike Rapoport 
745ca5999fdSMike Rapoport #ifndef pud_addr_end
746ca5999fdSMike Rapoport #define pud_addr_end(addr, end)						\
747ca5999fdSMike Rapoport ({	unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;	\
748ca5999fdSMike Rapoport 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
749ca5999fdSMike Rapoport })
750ca5999fdSMike Rapoport #endif
751ca5999fdSMike Rapoport 
752ca5999fdSMike Rapoport #ifndef pmd_addr_end
753ca5999fdSMike Rapoport #define pmd_addr_end(addr, end)						\
754ca5999fdSMike Rapoport ({	unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;	\
755ca5999fdSMike Rapoport 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
756ca5999fdSMike Rapoport })
757ca5999fdSMike Rapoport #endif
758ca5999fdSMike Rapoport 
759ca5999fdSMike Rapoport /*
760ca5999fdSMike Rapoport  * When walking page tables, we usually want to skip any p?d_none entries;
761ca5999fdSMike Rapoport  * and any p?d_bad entries - reporting the error before resetting to none.
762ca5999fdSMike Rapoport  * Do the tests inline, but report and clear the bad entry in mm/memory.c.
763ca5999fdSMike Rapoport  */
764ca5999fdSMike Rapoport void pgd_clear_bad(pgd_t *);
765ca5999fdSMike Rapoport 
766ca5999fdSMike Rapoport #ifndef __PAGETABLE_P4D_FOLDED
767ca5999fdSMike Rapoport void p4d_clear_bad(p4d_t *);
768ca5999fdSMike Rapoport #else
769ca5999fdSMike Rapoport #define p4d_clear_bad(p4d)        do { } while (0)
770ca5999fdSMike Rapoport #endif
771ca5999fdSMike Rapoport 
772ca5999fdSMike Rapoport #ifndef __PAGETABLE_PUD_FOLDED
773ca5999fdSMike Rapoport void pud_clear_bad(pud_t *);
774ca5999fdSMike Rapoport #else
775ca5999fdSMike Rapoport #define pud_clear_bad(p4d)        do { } while (0)
776ca5999fdSMike Rapoport #endif
777ca5999fdSMike Rapoport 
778ca5999fdSMike Rapoport void pmd_clear_bad(pmd_t *);
779ca5999fdSMike Rapoport 
780ca5999fdSMike Rapoport static inline int pgd_none_or_clear_bad(pgd_t *pgd)
781ca5999fdSMike Rapoport {
782ca5999fdSMike Rapoport 	if (pgd_none(*pgd))
783ca5999fdSMike Rapoport 		return 1;
784ca5999fdSMike Rapoport 	if (unlikely(pgd_bad(*pgd))) {
785ca5999fdSMike Rapoport 		pgd_clear_bad(pgd);
786ca5999fdSMike Rapoport 		return 1;
787ca5999fdSMike Rapoport 	}
788ca5999fdSMike Rapoport 	return 0;
789ca5999fdSMike Rapoport }
790ca5999fdSMike Rapoport 
791ca5999fdSMike Rapoport static inline int p4d_none_or_clear_bad(p4d_t *p4d)
792ca5999fdSMike Rapoport {
793ca5999fdSMike Rapoport 	if (p4d_none(*p4d))
794ca5999fdSMike Rapoport 		return 1;
795ca5999fdSMike Rapoport 	if (unlikely(p4d_bad(*p4d))) {
796ca5999fdSMike Rapoport 		p4d_clear_bad(p4d);
797ca5999fdSMike Rapoport 		return 1;
798ca5999fdSMike Rapoport 	}
799ca5999fdSMike Rapoport 	return 0;
800ca5999fdSMike Rapoport }
801ca5999fdSMike Rapoport 
802ca5999fdSMike Rapoport static inline int pud_none_or_clear_bad(pud_t *pud)
803ca5999fdSMike Rapoport {
804ca5999fdSMike Rapoport 	if (pud_none(*pud))
805ca5999fdSMike Rapoport 		return 1;
806ca5999fdSMike Rapoport 	if (unlikely(pud_bad(*pud))) {
807ca5999fdSMike Rapoport 		pud_clear_bad(pud);
808ca5999fdSMike Rapoport 		return 1;
809ca5999fdSMike Rapoport 	}
810ca5999fdSMike Rapoport 	return 0;
811ca5999fdSMike Rapoport }
812ca5999fdSMike Rapoport 
813ca5999fdSMike Rapoport static inline int pmd_none_or_clear_bad(pmd_t *pmd)
814ca5999fdSMike Rapoport {
815ca5999fdSMike Rapoport 	if (pmd_none(*pmd))
816ca5999fdSMike Rapoport 		return 1;
817ca5999fdSMike Rapoport 	if (unlikely(pmd_bad(*pmd))) {
818ca5999fdSMike Rapoport 		pmd_clear_bad(pmd);
819ca5999fdSMike Rapoport 		return 1;
820ca5999fdSMike Rapoport 	}
821ca5999fdSMike Rapoport 	return 0;
822ca5999fdSMike Rapoport }
823ca5999fdSMike Rapoport 
824ca5999fdSMike Rapoport static inline pte_t __ptep_modify_prot_start(struct vm_area_struct *vma,
825ca5999fdSMike Rapoport 					     unsigned long addr,
826ca5999fdSMike Rapoport 					     pte_t *ptep)
827ca5999fdSMike Rapoport {
828ca5999fdSMike Rapoport 	/*
829ca5999fdSMike Rapoport 	 * Get the current pte state, but zero it out to make it
830ca5999fdSMike Rapoport 	 * non-present, preventing the hardware from asynchronously
831ca5999fdSMike Rapoport 	 * updating it.
832ca5999fdSMike Rapoport 	 */
833ca5999fdSMike Rapoport 	return ptep_get_and_clear(vma->vm_mm, addr, ptep);
834ca5999fdSMike Rapoport }
835ca5999fdSMike Rapoport 
836ca5999fdSMike Rapoport static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma,
837ca5999fdSMike Rapoport 					     unsigned long addr,
838ca5999fdSMike Rapoport 					     pte_t *ptep, pte_t pte)
839ca5999fdSMike Rapoport {
840ca5999fdSMike Rapoport 	/*
841ca5999fdSMike Rapoport 	 * The pte is non-present, so there's no hardware state to
842ca5999fdSMike Rapoport 	 * preserve.
843ca5999fdSMike Rapoport 	 */
844ca5999fdSMike Rapoport 	set_pte_at(vma->vm_mm, addr, ptep, pte);
845ca5999fdSMike Rapoport }
846ca5999fdSMike Rapoport 
847ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
848ca5999fdSMike Rapoport /*
849ca5999fdSMike Rapoport  * Start a pte protection read-modify-write transaction, which
850ca5999fdSMike Rapoport  * protects against asynchronous hardware modifications to the pte.
851ca5999fdSMike Rapoport  * The intention is not to prevent the hardware from making pte
852ca5999fdSMike Rapoport  * updates, but to prevent any updates it may make from being lost.
853ca5999fdSMike Rapoport  *
854ca5999fdSMike Rapoport  * This does not protect against other software modifications of the
855ca5999fdSMike Rapoport  * pte; the appropriate pte lock must be held over the transation.
856ca5999fdSMike Rapoport  *
857ca5999fdSMike Rapoport  * Note that this interface is intended to be batchable, meaning that
858ca5999fdSMike Rapoport  * ptep_modify_prot_commit may not actually update the pte, but merely
859ca5999fdSMike Rapoport  * queue the update to be done at some later time.  The update must be
860ca5999fdSMike Rapoport  * actually committed before the pte lock is released, however.
861ca5999fdSMike Rapoport  */
862ca5999fdSMike Rapoport static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
863ca5999fdSMike Rapoport 					   unsigned long addr,
864ca5999fdSMike Rapoport 					   pte_t *ptep)
865ca5999fdSMike Rapoport {
866ca5999fdSMike Rapoport 	return __ptep_modify_prot_start(vma, addr, ptep);
867ca5999fdSMike Rapoport }
868ca5999fdSMike Rapoport 
869ca5999fdSMike Rapoport /*
870ca5999fdSMike Rapoport  * Commit an update to a pte, leaving any hardware-controlled bits in
871ca5999fdSMike Rapoport  * the PTE unmodified.
872ca5999fdSMike Rapoport  */
873ca5999fdSMike Rapoport static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
874ca5999fdSMike Rapoport 					   unsigned long addr,
875ca5999fdSMike Rapoport 					   pte_t *ptep, pte_t old_pte, pte_t pte)
876ca5999fdSMike Rapoport {
877ca5999fdSMike Rapoport 	__ptep_modify_prot_commit(vma, addr, ptep, pte);
878ca5999fdSMike Rapoport }
879ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
880ca5999fdSMike Rapoport #endif /* CONFIG_MMU */
881ca5999fdSMike Rapoport 
882ca5999fdSMike Rapoport /*
883ca5999fdSMike Rapoport  * No-op macros that just return the current protection value. Defined here
8841067b261SRandy Dunlap  * because these macros can be used even if CONFIG_MMU is not defined.
885ca5999fdSMike Rapoport  */
88663bb76deSPekka Enberg 
88763bb76deSPekka Enberg #ifndef pgprot_nx
88863bb76deSPekka Enberg #define pgprot_nx(prot)	(prot)
88963bb76deSPekka Enberg #endif
89063bb76deSPekka Enberg 
89163bb76deSPekka Enberg #ifndef pgprot_noncached
89263bb76deSPekka Enberg #define pgprot_noncached(prot)	(prot)
89363bb76deSPekka Enberg #endif
89463bb76deSPekka Enberg 
89563bb76deSPekka Enberg #ifndef pgprot_writecombine
89663bb76deSPekka Enberg #define pgprot_writecombine pgprot_noncached
89763bb76deSPekka Enberg #endif
89863bb76deSPekka Enberg 
89963bb76deSPekka Enberg #ifndef pgprot_writethrough
90063bb76deSPekka Enberg #define pgprot_writethrough pgprot_noncached
90163bb76deSPekka Enberg #endif
90263bb76deSPekka Enberg 
90363bb76deSPekka Enberg #ifndef pgprot_device
90463bb76deSPekka Enberg #define pgprot_device pgprot_noncached
90563bb76deSPekka Enberg #endif
90663bb76deSPekka Enberg 
907*d15dfd31SCatalin Marinas #ifndef pgprot_mhp
908*d15dfd31SCatalin Marinas #define pgprot_mhp(prot)	(prot)
909*d15dfd31SCatalin Marinas #endif
910*d15dfd31SCatalin Marinas 
91163bb76deSPekka Enberg #ifdef CONFIG_MMU
91263bb76deSPekka Enberg #ifndef pgprot_modify
91363bb76deSPekka Enberg #define pgprot_modify pgprot_modify
91463bb76deSPekka Enberg static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
91563bb76deSPekka Enberg {
91663bb76deSPekka Enberg 	if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
91763bb76deSPekka Enberg 		newprot = pgprot_noncached(newprot);
91863bb76deSPekka Enberg 	if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
91963bb76deSPekka Enberg 		newprot = pgprot_writecombine(newprot);
92063bb76deSPekka Enberg 	if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
92163bb76deSPekka Enberg 		newprot = pgprot_device(newprot);
92263bb76deSPekka Enberg 	return newprot;
92363bb76deSPekka Enberg }
92463bb76deSPekka Enberg #endif
92563bb76deSPekka Enberg #endif /* CONFIG_MMU */
92663bb76deSPekka Enberg 
927ca5999fdSMike Rapoport #ifndef pgprot_encrypted
928ca5999fdSMike Rapoport #define pgprot_encrypted(prot)	(prot)
929ca5999fdSMike Rapoport #endif
930ca5999fdSMike Rapoport 
931ca5999fdSMike Rapoport #ifndef pgprot_decrypted
932ca5999fdSMike Rapoport #define pgprot_decrypted(prot)	(prot)
933ca5999fdSMike Rapoport #endif
934ca5999fdSMike Rapoport 
935ca5999fdSMike Rapoport /*
936ca5999fdSMike Rapoport  * A facility to provide lazy MMU batching.  This allows PTE updates and
937ca5999fdSMike Rapoport  * page invalidations to be delayed until a call to leave lazy MMU mode
938ca5999fdSMike Rapoport  * is issued.  Some architectures may benefit from doing this, and it is
939ca5999fdSMike Rapoport  * beneficial for both shadow and direct mode hypervisors, which may batch
940ca5999fdSMike Rapoport  * the PTE updates which happen during this window.  Note that using this
941ca5999fdSMike Rapoport  * interface requires that read hazards be removed from the code.  A read
942ca5999fdSMike Rapoport  * hazard could result in the direct mode hypervisor case, since the actual
943ca5999fdSMike Rapoport  * write to the page tables may not yet have taken place, so reads though
944ca5999fdSMike Rapoport  * a raw PTE pointer after it has been modified are not guaranteed to be
945ca5999fdSMike Rapoport  * up to date.  This mode can only be entered and left under the protection of
946ca5999fdSMike Rapoport  * the page table locks for all page tables which may be modified.  In the UP
947ca5999fdSMike Rapoport  * case, this is required so that preemption is disabled, and in the SMP case,
948ca5999fdSMike Rapoport  * it must synchronize the delayed page table writes properly on other CPUs.
949ca5999fdSMike Rapoport  */
950ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
951ca5999fdSMike Rapoport #define arch_enter_lazy_mmu_mode()	do {} while (0)
952ca5999fdSMike Rapoport #define arch_leave_lazy_mmu_mode()	do {} while (0)
953ca5999fdSMike Rapoport #define arch_flush_lazy_mmu_mode()	do {} while (0)
954ca5999fdSMike Rapoport #endif
955ca5999fdSMike Rapoport 
956ca5999fdSMike Rapoport /*
957ca5999fdSMike Rapoport  * A facility to provide batching of the reload of page tables and
958ca5999fdSMike Rapoport  * other process state with the actual context switch code for
959ca5999fdSMike Rapoport  * paravirtualized guests.  By convention, only one of the batched
960ca5999fdSMike Rapoport  * update (lazy) modes (CPU, MMU) should be active at any given time,
961ca5999fdSMike Rapoport  * entry should never be nested, and entry and exits should always be
962ca5999fdSMike Rapoport  * paired.  This is for sanity of maintaining and reasoning about the
963ca5999fdSMike Rapoport  * kernel code.  In this case, the exit (end of the context switch) is
964ca5999fdSMike Rapoport  * in architecture-specific code, and so doesn't need a generic
965ca5999fdSMike Rapoport  * definition.
966ca5999fdSMike Rapoport  */
967ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
968ca5999fdSMike Rapoport #define arch_start_context_switch(prev)	do {} while (0)
969ca5999fdSMike Rapoport #endif
970ca5999fdSMike Rapoport 
971ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
972ca5999fdSMike Rapoport #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
973ca5999fdSMike Rapoport static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
974ca5999fdSMike Rapoport {
975ca5999fdSMike Rapoport 	return pmd;
976ca5999fdSMike Rapoport }
977ca5999fdSMike Rapoport 
978ca5999fdSMike Rapoport static inline int pmd_swp_soft_dirty(pmd_t pmd)
979ca5999fdSMike Rapoport {
980ca5999fdSMike Rapoport 	return 0;
981ca5999fdSMike Rapoport }
982ca5999fdSMike Rapoport 
983ca5999fdSMike Rapoport static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
984ca5999fdSMike Rapoport {
985ca5999fdSMike Rapoport 	return pmd;
986ca5999fdSMike Rapoport }
987ca5999fdSMike Rapoport #endif
988ca5999fdSMike Rapoport #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
989ca5999fdSMike Rapoport static inline int pte_soft_dirty(pte_t pte)
990ca5999fdSMike Rapoport {
991ca5999fdSMike Rapoport 	return 0;
992ca5999fdSMike Rapoport }
993ca5999fdSMike Rapoport 
994ca5999fdSMike Rapoport static inline int pmd_soft_dirty(pmd_t pmd)
995ca5999fdSMike Rapoport {
996ca5999fdSMike Rapoport 	return 0;
997ca5999fdSMike Rapoport }
998ca5999fdSMike Rapoport 
999ca5999fdSMike Rapoport static inline pte_t pte_mksoft_dirty(pte_t pte)
1000ca5999fdSMike Rapoport {
1001ca5999fdSMike Rapoport 	return pte;
1002ca5999fdSMike Rapoport }
1003ca5999fdSMike Rapoport 
1004ca5999fdSMike Rapoport static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
1005ca5999fdSMike Rapoport {
1006ca5999fdSMike Rapoport 	return pmd;
1007ca5999fdSMike Rapoport }
1008ca5999fdSMike Rapoport 
1009ca5999fdSMike Rapoport static inline pte_t pte_clear_soft_dirty(pte_t pte)
1010ca5999fdSMike Rapoport {
1011ca5999fdSMike Rapoport 	return pte;
1012ca5999fdSMike Rapoport }
1013ca5999fdSMike Rapoport 
1014ca5999fdSMike Rapoport static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
1015ca5999fdSMike Rapoport {
1016ca5999fdSMike Rapoport 	return pmd;
1017ca5999fdSMike Rapoport }
1018ca5999fdSMike Rapoport 
1019ca5999fdSMike Rapoport static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
1020ca5999fdSMike Rapoport {
1021ca5999fdSMike Rapoport 	return pte;
1022ca5999fdSMike Rapoport }
1023ca5999fdSMike Rapoport 
1024ca5999fdSMike Rapoport static inline int pte_swp_soft_dirty(pte_t pte)
1025ca5999fdSMike Rapoport {
1026ca5999fdSMike Rapoport 	return 0;
1027ca5999fdSMike Rapoport }
1028ca5999fdSMike Rapoport 
1029ca5999fdSMike Rapoport static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
1030ca5999fdSMike Rapoport {
1031ca5999fdSMike Rapoport 	return pte;
1032ca5999fdSMike Rapoport }
1033ca5999fdSMike Rapoport 
1034ca5999fdSMike Rapoport static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1035ca5999fdSMike Rapoport {
1036ca5999fdSMike Rapoport 	return pmd;
1037ca5999fdSMike Rapoport }
1038ca5999fdSMike Rapoport 
1039ca5999fdSMike Rapoport static inline int pmd_swp_soft_dirty(pmd_t pmd)
1040ca5999fdSMike Rapoport {
1041ca5999fdSMike Rapoport 	return 0;
1042ca5999fdSMike Rapoport }
1043ca5999fdSMike Rapoport 
1044ca5999fdSMike Rapoport static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1045ca5999fdSMike Rapoport {
1046ca5999fdSMike Rapoport 	return pmd;
1047ca5999fdSMike Rapoport }
1048ca5999fdSMike Rapoport #endif
1049ca5999fdSMike Rapoport 
1050ca5999fdSMike Rapoport #ifndef __HAVE_PFNMAP_TRACKING
1051ca5999fdSMike Rapoport /*
1052ca5999fdSMike Rapoport  * Interfaces that can be used by architecture code to keep track of
1053ca5999fdSMike Rapoport  * memory type of pfn mappings specified by the remap_pfn_range,
1054ca5999fdSMike Rapoport  * vmf_insert_pfn.
1055ca5999fdSMike Rapoport  */
1056ca5999fdSMike Rapoport 
1057ca5999fdSMike Rapoport /*
1058ca5999fdSMike Rapoport  * track_pfn_remap is called when a _new_ pfn mapping is being established
1059ca5999fdSMike Rapoport  * by remap_pfn_range() for physical range indicated by pfn and size.
1060ca5999fdSMike Rapoport  */
1061ca5999fdSMike Rapoport static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
1062ca5999fdSMike Rapoport 				  unsigned long pfn, unsigned long addr,
1063ca5999fdSMike Rapoport 				  unsigned long size)
1064ca5999fdSMike Rapoport {
1065ca5999fdSMike Rapoport 	return 0;
1066ca5999fdSMike Rapoport }
1067ca5999fdSMike Rapoport 
1068ca5999fdSMike Rapoport /*
1069ca5999fdSMike Rapoport  * track_pfn_insert is called when a _new_ single pfn is established
1070ca5999fdSMike Rapoport  * by vmf_insert_pfn().
1071ca5999fdSMike Rapoport  */
1072ca5999fdSMike Rapoport static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
1073ca5999fdSMike Rapoport 				    pfn_t pfn)
1074ca5999fdSMike Rapoport {
1075ca5999fdSMike Rapoport }
1076ca5999fdSMike Rapoport 
1077ca5999fdSMike Rapoport /*
1078ca5999fdSMike Rapoport  * track_pfn_copy is called when vma that is covering the pfnmap gets
1079ca5999fdSMike Rapoport  * copied through copy_page_range().
1080ca5999fdSMike Rapoport  */
1081ca5999fdSMike Rapoport static inline int track_pfn_copy(struct vm_area_struct *vma)
1082ca5999fdSMike Rapoport {
1083ca5999fdSMike Rapoport 	return 0;
1084ca5999fdSMike Rapoport }
1085ca5999fdSMike Rapoport 
1086ca5999fdSMike Rapoport /*
1087ca5999fdSMike Rapoport  * untrack_pfn is called while unmapping a pfnmap for a region.
1088ca5999fdSMike Rapoport  * untrack can be called for a specific region indicated by pfn and size or
1089ca5999fdSMike Rapoport  * can be for the entire vma (in which case pfn, size are zero).
1090ca5999fdSMike Rapoport  */
1091ca5999fdSMike Rapoport static inline void untrack_pfn(struct vm_area_struct *vma,
1092ca5999fdSMike Rapoport 			       unsigned long pfn, unsigned long size)
1093ca5999fdSMike Rapoport {
1094ca5999fdSMike Rapoport }
1095ca5999fdSMike Rapoport 
1096ca5999fdSMike Rapoport /*
1097ca5999fdSMike Rapoport  * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
1098ca5999fdSMike Rapoport  */
1099ca5999fdSMike Rapoport static inline void untrack_pfn_moved(struct vm_area_struct *vma)
1100ca5999fdSMike Rapoport {
1101ca5999fdSMike Rapoport }
1102ca5999fdSMike Rapoport #else
1103ca5999fdSMike Rapoport extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
1104ca5999fdSMike Rapoport 			   unsigned long pfn, unsigned long addr,
1105ca5999fdSMike Rapoport 			   unsigned long size);
1106ca5999fdSMike Rapoport extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
1107ca5999fdSMike Rapoport 			     pfn_t pfn);
1108ca5999fdSMike Rapoport extern int track_pfn_copy(struct vm_area_struct *vma);
1109ca5999fdSMike Rapoport extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
1110ca5999fdSMike Rapoport 			unsigned long size);
1111ca5999fdSMike Rapoport extern void untrack_pfn_moved(struct vm_area_struct *vma);
1112ca5999fdSMike Rapoport #endif
1113ca5999fdSMike Rapoport 
1114ca5999fdSMike Rapoport #ifdef __HAVE_COLOR_ZERO_PAGE
1115ca5999fdSMike Rapoport static inline int is_zero_pfn(unsigned long pfn)
1116ca5999fdSMike Rapoport {
1117ca5999fdSMike Rapoport 	extern unsigned long zero_pfn;
1118ca5999fdSMike Rapoport 	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
1119ca5999fdSMike Rapoport 	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
1120ca5999fdSMike Rapoport }
1121ca5999fdSMike Rapoport 
1122ca5999fdSMike Rapoport #define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
1123ca5999fdSMike Rapoport 
1124ca5999fdSMike Rapoport #else
1125ca5999fdSMike Rapoport static inline int is_zero_pfn(unsigned long pfn)
1126ca5999fdSMike Rapoport {
1127ca5999fdSMike Rapoport 	extern unsigned long zero_pfn;
1128ca5999fdSMike Rapoport 	return pfn == zero_pfn;
1129ca5999fdSMike Rapoport }
1130ca5999fdSMike Rapoport 
1131ca5999fdSMike Rapoport static inline unsigned long my_zero_pfn(unsigned long addr)
1132ca5999fdSMike Rapoport {
1133ca5999fdSMike Rapoport 	extern unsigned long zero_pfn;
1134ca5999fdSMike Rapoport 	return zero_pfn;
1135ca5999fdSMike Rapoport }
1136ca5999fdSMike Rapoport #endif
1137ca5999fdSMike Rapoport 
1138ca5999fdSMike Rapoport #ifdef CONFIG_MMU
1139ca5999fdSMike Rapoport 
1140ca5999fdSMike Rapoport #ifndef CONFIG_TRANSPARENT_HUGEPAGE
1141ca5999fdSMike Rapoport static inline int pmd_trans_huge(pmd_t pmd)
1142ca5999fdSMike Rapoport {
1143ca5999fdSMike Rapoport 	return 0;
1144ca5999fdSMike Rapoport }
1145ca5999fdSMike Rapoport #ifndef pmd_write
1146ca5999fdSMike Rapoport static inline int pmd_write(pmd_t pmd)
1147ca5999fdSMike Rapoport {
1148ca5999fdSMike Rapoport 	BUG();
1149ca5999fdSMike Rapoport 	return 0;
1150ca5999fdSMike Rapoport }
1151ca5999fdSMike Rapoport #endif /* pmd_write */
1152ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1153ca5999fdSMike Rapoport 
1154ca5999fdSMike Rapoport #ifndef pud_write
1155ca5999fdSMike Rapoport static inline int pud_write(pud_t pud)
1156ca5999fdSMike Rapoport {
1157ca5999fdSMike Rapoport 	BUG();
1158ca5999fdSMike Rapoport 	return 0;
1159ca5999fdSMike Rapoport }
1160ca5999fdSMike Rapoport #endif /* pud_write */
1161ca5999fdSMike Rapoport 
1162ca5999fdSMike Rapoport #if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
1163ca5999fdSMike Rapoport static inline int pmd_devmap(pmd_t pmd)
1164ca5999fdSMike Rapoport {
1165ca5999fdSMike Rapoport 	return 0;
1166ca5999fdSMike Rapoport }
1167ca5999fdSMike Rapoport static inline int pud_devmap(pud_t pud)
1168ca5999fdSMike Rapoport {
1169ca5999fdSMike Rapoport 	return 0;
1170ca5999fdSMike Rapoport }
1171ca5999fdSMike Rapoport static inline int pgd_devmap(pgd_t pgd)
1172ca5999fdSMike Rapoport {
1173ca5999fdSMike Rapoport 	return 0;
1174ca5999fdSMike Rapoport }
1175ca5999fdSMike Rapoport #endif
1176ca5999fdSMike Rapoport 
1177ca5999fdSMike Rapoport #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
1178ca5999fdSMike Rapoport 	(defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
1179ca5999fdSMike Rapoport 	 !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
1180ca5999fdSMike Rapoport static inline int pud_trans_huge(pud_t pud)
1181ca5999fdSMike Rapoport {
1182ca5999fdSMike Rapoport 	return 0;
1183ca5999fdSMike Rapoport }
1184ca5999fdSMike Rapoport #endif
1185ca5999fdSMike Rapoport 
1186ca5999fdSMike Rapoport /* See pmd_none_or_trans_huge_or_clear_bad for discussion. */
1187ca5999fdSMike Rapoport static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
1188ca5999fdSMike Rapoport {
1189ca5999fdSMike Rapoport 	pud_t pudval = READ_ONCE(*pud);
1190ca5999fdSMike Rapoport 
1191ca5999fdSMike Rapoport 	if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
1192ca5999fdSMike Rapoport 		return 1;
1193ca5999fdSMike Rapoport 	if (unlikely(pud_bad(pudval))) {
1194ca5999fdSMike Rapoport 		pud_clear_bad(pud);
1195ca5999fdSMike Rapoport 		return 1;
1196ca5999fdSMike Rapoport 	}
1197ca5999fdSMike Rapoport 	return 0;
1198ca5999fdSMike Rapoport }
1199ca5999fdSMike Rapoport 
1200ca5999fdSMike Rapoport /* See pmd_trans_unstable for discussion. */
1201ca5999fdSMike Rapoport static inline int pud_trans_unstable(pud_t *pud)
1202ca5999fdSMike Rapoport {
1203ca5999fdSMike Rapoport #if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&			\
1204ca5999fdSMike Rapoport 	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1205ca5999fdSMike Rapoport 	return pud_none_or_trans_huge_or_dev_or_clear_bad(pud);
1206ca5999fdSMike Rapoport #else
1207ca5999fdSMike Rapoport 	return 0;
1208ca5999fdSMike Rapoport #endif
1209ca5999fdSMike Rapoport }
1210ca5999fdSMike Rapoport 
1211ca5999fdSMike Rapoport #ifndef pmd_read_atomic
1212ca5999fdSMike Rapoport static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
1213ca5999fdSMike Rapoport {
1214ca5999fdSMike Rapoport 	/*
1215ca5999fdSMike Rapoport 	 * Depend on compiler for an atomic pmd read. NOTE: this is
1216ca5999fdSMike Rapoport 	 * only going to work, if the pmdval_t isn't larger than
1217ca5999fdSMike Rapoport 	 * an unsigned long.
1218ca5999fdSMike Rapoport 	 */
1219ca5999fdSMike Rapoport 	return *pmdp;
1220ca5999fdSMike Rapoport }
1221ca5999fdSMike Rapoport #endif
1222ca5999fdSMike Rapoport 
1223ca5999fdSMike Rapoport #ifndef arch_needs_pgtable_deposit
1224ca5999fdSMike Rapoport #define arch_needs_pgtable_deposit() (false)
1225ca5999fdSMike Rapoport #endif
1226ca5999fdSMike Rapoport /*
1227ca5999fdSMike Rapoport  * This function is meant to be used by sites walking pagetables with
1228c1e8d7c6SMichel Lespinasse  * the mmap_lock held in read mode to protect against MADV_DONTNEED and
1229ca5999fdSMike Rapoport  * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
1230ca5999fdSMike Rapoport  * into a null pmd and the transhuge page fault can convert a null pmd
1231ca5999fdSMike Rapoport  * into an hugepmd or into a regular pmd (if the hugepage allocation
1232c1e8d7c6SMichel Lespinasse  * fails). While holding the mmap_lock in read mode the pmd becomes
1233ca5999fdSMike Rapoport  * stable and stops changing under us only if it's not null and not a
1234ca5999fdSMike Rapoport  * transhuge pmd. When those races occurs and this function makes a
1235ca5999fdSMike Rapoport  * difference vs the standard pmd_none_or_clear_bad, the result is
1236ca5999fdSMike Rapoport  * undefined so behaving like if the pmd was none is safe (because it
1237ca5999fdSMike Rapoport  * can return none anyway). The compiler level barrier() is critically
1238ca5999fdSMike Rapoport  * important to compute the two checks atomically on the same pmdval.
1239ca5999fdSMike Rapoport  *
1240ca5999fdSMike Rapoport  * For 32bit kernels with a 64bit large pmd_t this automatically takes
1241ca5999fdSMike Rapoport  * care of reading the pmd atomically to avoid SMP race conditions
1242c1e8d7c6SMichel Lespinasse  * against pmd_populate() when the mmap_lock is hold for reading by the
1243ca5999fdSMike Rapoport  * caller (a special atomic read not done by "gcc" as in the generic
1244ca5999fdSMike Rapoport  * version above, is also needed when THP is disabled because the page
1245ca5999fdSMike Rapoport  * fault can populate the pmd from under us).
1246ca5999fdSMike Rapoport  */
1247ca5999fdSMike Rapoport static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
1248ca5999fdSMike Rapoport {
1249ca5999fdSMike Rapoport 	pmd_t pmdval = pmd_read_atomic(pmd);
1250ca5999fdSMike Rapoport 	/*
1251ca5999fdSMike Rapoport 	 * The barrier will stabilize the pmdval in a register or on
1252ca5999fdSMike Rapoport 	 * the stack so that it will stop changing under the code.
1253ca5999fdSMike Rapoport 	 *
1254ca5999fdSMike Rapoport 	 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
1255ca5999fdSMike Rapoport 	 * pmd_read_atomic is allowed to return a not atomic pmdval
1256ca5999fdSMike Rapoport 	 * (for example pointing to an hugepage that has never been
1257ca5999fdSMike Rapoport 	 * mapped in the pmd). The below checks will only care about
1258ca5999fdSMike Rapoport 	 * the low part of the pmd with 32bit PAE x86 anyway, with the
1259ca5999fdSMike Rapoport 	 * exception of pmd_none(). So the important thing is that if
1260ca5999fdSMike Rapoport 	 * the low part of the pmd is found null, the high part will
1261ca5999fdSMike Rapoport 	 * be also null or the pmd_none() check below would be
1262ca5999fdSMike Rapoport 	 * confused.
1263ca5999fdSMike Rapoport 	 */
1264ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1265ca5999fdSMike Rapoport 	barrier();
1266ca5999fdSMike Rapoport #endif
1267ca5999fdSMike Rapoport 	/*
1268ca5999fdSMike Rapoport 	 * !pmd_present() checks for pmd migration entries
1269ca5999fdSMike Rapoport 	 *
1270ca5999fdSMike Rapoport 	 * The complete check uses is_pmd_migration_entry() in linux/swapops.h
1271ca5999fdSMike Rapoport 	 * But using that requires moving current function and pmd_trans_unstable()
1272ca5999fdSMike Rapoport 	 * to linux/swapops.h to resovle dependency, which is too much code move.
1273ca5999fdSMike Rapoport 	 *
1274ca5999fdSMike Rapoport 	 * !pmd_present() is equivalent to is_pmd_migration_entry() currently,
1275ca5999fdSMike Rapoport 	 * because !pmd_present() pages can only be under migration not swapped
1276ca5999fdSMike Rapoport 	 * out.
1277ca5999fdSMike Rapoport 	 *
1278ca5999fdSMike Rapoport 	 * pmd_none() is preseved for future condition checks on pmd migration
1279ca5999fdSMike Rapoport 	 * entries and not confusing with this function name, although it is
1280ca5999fdSMike Rapoport 	 * redundant with !pmd_present().
1281ca5999fdSMike Rapoport 	 */
1282ca5999fdSMike Rapoport 	if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
1283ca5999fdSMike Rapoport 		(IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval)))
1284ca5999fdSMike Rapoport 		return 1;
1285ca5999fdSMike Rapoport 	if (unlikely(pmd_bad(pmdval))) {
1286ca5999fdSMike Rapoport 		pmd_clear_bad(pmd);
1287ca5999fdSMike Rapoport 		return 1;
1288ca5999fdSMike Rapoport 	}
1289ca5999fdSMike Rapoport 	return 0;
1290ca5999fdSMike Rapoport }
1291ca5999fdSMike Rapoport 
1292ca5999fdSMike Rapoport /*
1293ca5999fdSMike Rapoport  * This is a noop if Transparent Hugepage Support is not built into
1294ca5999fdSMike Rapoport  * the kernel. Otherwise it is equivalent to
1295ca5999fdSMike Rapoport  * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
1296ca5999fdSMike Rapoport  * places that already verified the pmd is not none and they want to
1297ca5999fdSMike Rapoport  * walk ptes while holding the mmap sem in read mode (write mode don't
1298ca5999fdSMike Rapoport  * need this). If THP is not enabled, the pmd can't go away under the
1299ca5999fdSMike Rapoport  * code even if MADV_DONTNEED runs, but if THP is enabled we need to
1300ca5999fdSMike Rapoport  * run a pmd_trans_unstable before walking the ptes after
1301ca5999fdSMike Rapoport  * split_huge_pmd returns (because it may have run when the pmd become
1302ca5999fdSMike Rapoport  * null, but then a page fault can map in a THP and not a regular page).
1303ca5999fdSMike Rapoport  */
1304ca5999fdSMike Rapoport static inline int pmd_trans_unstable(pmd_t *pmd)
1305ca5999fdSMike Rapoport {
1306ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1307ca5999fdSMike Rapoport 	return pmd_none_or_trans_huge_or_clear_bad(pmd);
1308ca5999fdSMike Rapoport #else
1309ca5999fdSMike Rapoport 	return 0;
1310ca5999fdSMike Rapoport #endif
1311ca5999fdSMike Rapoport }
1312ca5999fdSMike Rapoport 
1313f9ce0be7SKirill A. Shutemov /*
1314f9ce0be7SKirill A. Shutemov  * the ordering of these checks is important for pmds with _page_devmap set.
1315f9ce0be7SKirill A. Shutemov  * if we check pmd_trans_unstable() first we will trip the bad_pmd() check
1316f9ce0be7SKirill A. Shutemov  * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
1317f9ce0be7SKirill A. Shutemov  * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
1318f9ce0be7SKirill A. Shutemov  */
1319f9ce0be7SKirill A. Shutemov static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
1320f9ce0be7SKirill A. Shutemov {
1321f9ce0be7SKirill A. Shutemov 	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
1322f9ce0be7SKirill A. Shutemov }
1323f9ce0be7SKirill A. Shutemov 
1324ca5999fdSMike Rapoport #ifndef CONFIG_NUMA_BALANCING
1325ca5999fdSMike Rapoport /*
1326ca5999fdSMike Rapoport  * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
1327ca5999fdSMike Rapoport  * the only case the kernel cares is for NUMA balancing and is only ever set
1328ca5999fdSMike Rapoport  * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
13291067b261SRandy Dunlap  * _PAGE_PROTNONE so by default, implement the helper as "always no". It
1330ca5999fdSMike Rapoport  * is the responsibility of the caller to distinguish between PROT_NONE
1331ca5999fdSMike Rapoport  * protections and NUMA hinting fault protections.
1332ca5999fdSMike Rapoport  */
1333ca5999fdSMike Rapoport static inline int pte_protnone(pte_t pte)
1334ca5999fdSMike Rapoport {
1335ca5999fdSMike Rapoport 	return 0;
1336ca5999fdSMike Rapoport }
1337ca5999fdSMike Rapoport 
1338ca5999fdSMike Rapoport static inline int pmd_protnone(pmd_t pmd)
1339ca5999fdSMike Rapoport {
1340ca5999fdSMike Rapoport 	return 0;
1341ca5999fdSMike Rapoport }
1342ca5999fdSMike Rapoport #endif /* CONFIG_NUMA_BALANCING */
1343ca5999fdSMike Rapoport 
1344ca5999fdSMike Rapoport #endif /* CONFIG_MMU */
1345ca5999fdSMike Rapoport 
1346ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
1347ca5999fdSMike Rapoport 
1348ca5999fdSMike Rapoport #ifndef __PAGETABLE_P4D_FOLDED
1349ca5999fdSMike Rapoport int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
1350ca5999fdSMike Rapoport int p4d_clear_huge(p4d_t *p4d);
1351ca5999fdSMike Rapoport #else
1352ca5999fdSMike Rapoport static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1353ca5999fdSMike Rapoport {
1354ca5999fdSMike Rapoport 	return 0;
1355ca5999fdSMike Rapoport }
1356ca5999fdSMike Rapoport static inline int p4d_clear_huge(p4d_t *p4d)
1357ca5999fdSMike Rapoport {
1358ca5999fdSMike Rapoport 	return 0;
1359ca5999fdSMike Rapoport }
1360ca5999fdSMike Rapoport #endif /* !__PAGETABLE_P4D_FOLDED */
1361ca5999fdSMike Rapoport 
1362ca5999fdSMike Rapoport int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
1363ca5999fdSMike Rapoport int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
1364ca5999fdSMike Rapoport int pud_clear_huge(pud_t *pud);
1365ca5999fdSMike Rapoport int pmd_clear_huge(pmd_t *pmd);
1366ca5999fdSMike Rapoport int p4d_free_pud_page(p4d_t *p4d, unsigned long addr);
1367ca5999fdSMike Rapoport int pud_free_pmd_page(pud_t *pud, unsigned long addr);
1368ca5999fdSMike Rapoport int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
1369ca5999fdSMike Rapoport #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
1370ca5999fdSMike Rapoport static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1371ca5999fdSMike Rapoport {
1372ca5999fdSMike Rapoport 	return 0;
1373ca5999fdSMike Rapoport }
1374ca5999fdSMike Rapoport static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1375ca5999fdSMike Rapoport {
1376ca5999fdSMike Rapoport 	return 0;
1377ca5999fdSMike Rapoport }
1378ca5999fdSMike Rapoport static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1379ca5999fdSMike Rapoport {
1380ca5999fdSMike Rapoport 	return 0;
1381ca5999fdSMike Rapoport }
1382ca5999fdSMike Rapoport static inline int p4d_clear_huge(p4d_t *p4d)
1383ca5999fdSMike Rapoport {
1384ca5999fdSMike Rapoport 	return 0;
1385ca5999fdSMike Rapoport }
1386ca5999fdSMike Rapoport static inline int pud_clear_huge(pud_t *pud)
1387ca5999fdSMike Rapoport {
1388ca5999fdSMike Rapoport 	return 0;
1389ca5999fdSMike Rapoport }
1390ca5999fdSMike Rapoport static inline int pmd_clear_huge(pmd_t *pmd)
1391ca5999fdSMike Rapoport {
1392ca5999fdSMike Rapoport 	return 0;
1393ca5999fdSMike Rapoport }
1394ca5999fdSMike Rapoport static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1395ca5999fdSMike Rapoport {
1396ca5999fdSMike Rapoport 	return 0;
1397ca5999fdSMike Rapoport }
1398ca5999fdSMike Rapoport static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1399ca5999fdSMike Rapoport {
1400ca5999fdSMike Rapoport 	return 0;
1401ca5999fdSMike Rapoport }
1402ca5999fdSMike Rapoport static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1403ca5999fdSMike Rapoport {
1404ca5999fdSMike Rapoport 	return 0;
1405ca5999fdSMike Rapoport }
1406ca5999fdSMike Rapoport #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
1407ca5999fdSMike Rapoport 
1408ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
1409ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1410ca5999fdSMike Rapoport /*
1411ca5999fdSMike Rapoport  * ARCHes with special requirements for evicting THP backing TLB entries can
1412ca5999fdSMike Rapoport  * implement this. Otherwise also, it can help optimize normal TLB flush in
14131067b261SRandy Dunlap  * THP regime. Stock flush_tlb_range() typically has optimization to nuke the
14141067b261SRandy Dunlap  * entire TLB if flush span is greater than a threshold, which will
14151067b261SRandy Dunlap  * likely be true for a single huge page. Thus a single THP flush will
14161067b261SRandy Dunlap  * invalidate the entire TLB which is not desirable.
1417ca5999fdSMike Rapoport  * e.g. see arch/arc: flush_pmd_tlb_range
1418ca5999fdSMike Rapoport  */
1419ca5999fdSMike Rapoport #define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
1420ca5999fdSMike Rapoport #define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
1421ca5999fdSMike Rapoport #else
1422ca5999fdSMike Rapoport #define flush_pmd_tlb_range(vma, addr, end)	BUILD_BUG()
1423ca5999fdSMike Rapoport #define flush_pud_tlb_range(vma, addr, end)	BUILD_BUG()
1424ca5999fdSMike Rapoport #endif
1425ca5999fdSMike Rapoport #endif
1426ca5999fdSMike Rapoport 
1427ca5999fdSMike Rapoport struct file;
1428ca5999fdSMike Rapoport int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
1429ca5999fdSMike Rapoport 			unsigned long size, pgprot_t *vma_prot);
1430ca5999fdSMike Rapoport 
1431ca5999fdSMike Rapoport #ifndef CONFIG_X86_ESPFIX64
1432ca5999fdSMike Rapoport static inline void init_espfix_bsp(void) { }
1433ca5999fdSMike Rapoport #endif
1434ca5999fdSMike Rapoport 
1435ca5999fdSMike Rapoport extern void __init pgtable_cache_init(void);
1436ca5999fdSMike Rapoport 
1437ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
1438ca5999fdSMike Rapoport static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
1439ca5999fdSMike Rapoport {
1440ca5999fdSMike Rapoport 	return true;
1441ca5999fdSMike Rapoport }
1442ca5999fdSMike Rapoport 
1443ca5999fdSMike Rapoport static inline bool arch_has_pfn_modify_check(void)
1444ca5999fdSMike Rapoport {
1445ca5999fdSMike Rapoport 	return false;
1446ca5999fdSMike Rapoport }
1447ca5999fdSMike Rapoport #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
1448ca5999fdSMike Rapoport 
1449ca5999fdSMike Rapoport /*
1450ca5999fdSMike Rapoport  * Architecture PAGE_KERNEL_* fallbacks
1451ca5999fdSMike Rapoport  *
1452ca5999fdSMike Rapoport  * Some architectures don't define certain PAGE_KERNEL_* flags. This is either
1453ca5999fdSMike Rapoport  * because they really don't support them, or the port needs to be updated to
1454ca5999fdSMike Rapoport  * reflect the required functionality. Below are a set of relatively safe
1455ca5999fdSMike Rapoport  * fallbacks, as best effort, which we can count on in lieu of the architectures
1456ca5999fdSMike Rapoport  * not defining them on their own yet.
1457ca5999fdSMike Rapoport  */
1458ca5999fdSMike Rapoport 
1459ca5999fdSMike Rapoport #ifndef PAGE_KERNEL_RO
1460ca5999fdSMike Rapoport # define PAGE_KERNEL_RO PAGE_KERNEL
1461ca5999fdSMike Rapoport #endif
1462ca5999fdSMike Rapoport 
1463ca5999fdSMike Rapoport #ifndef PAGE_KERNEL_EXEC
1464ca5999fdSMike Rapoport # define PAGE_KERNEL_EXEC PAGE_KERNEL
1465ca5999fdSMike Rapoport #endif
1466ca5999fdSMike Rapoport 
1467ca5999fdSMike Rapoport /*
1468ca5999fdSMike Rapoport  * Page Table Modification bits for pgtbl_mod_mask.
1469ca5999fdSMike Rapoport  *
1470ca5999fdSMike Rapoport  * These are used by the p?d_alloc_track*() set of functions an in the generic
1471ca5999fdSMike Rapoport  * vmalloc/ioremap code to track at which page-table levels entries have been
1472ca5999fdSMike Rapoport  * modified. Based on that the code can better decide when vmalloc and ioremap
1473ca5999fdSMike Rapoport  * mapping changes need to be synchronized to other page-tables in the system.
1474ca5999fdSMike Rapoport  */
1475ca5999fdSMike Rapoport #define		__PGTBL_PGD_MODIFIED	0
1476ca5999fdSMike Rapoport #define		__PGTBL_P4D_MODIFIED	1
1477ca5999fdSMike Rapoport #define		__PGTBL_PUD_MODIFIED	2
1478ca5999fdSMike Rapoport #define		__PGTBL_PMD_MODIFIED	3
1479ca5999fdSMike Rapoport #define		__PGTBL_PTE_MODIFIED	4
1480ca5999fdSMike Rapoport 
1481ca5999fdSMike Rapoport #define		PGTBL_PGD_MODIFIED	BIT(__PGTBL_PGD_MODIFIED)
1482ca5999fdSMike Rapoport #define		PGTBL_P4D_MODIFIED	BIT(__PGTBL_P4D_MODIFIED)
1483ca5999fdSMike Rapoport #define		PGTBL_PUD_MODIFIED	BIT(__PGTBL_PUD_MODIFIED)
1484ca5999fdSMike Rapoport #define		PGTBL_PMD_MODIFIED	BIT(__PGTBL_PMD_MODIFIED)
1485ca5999fdSMike Rapoport #define		PGTBL_PTE_MODIFIED	BIT(__PGTBL_PTE_MODIFIED)
1486ca5999fdSMike Rapoport 
1487ca5999fdSMike Rapoport /* Page-Table Modification Mask */
1488ca5999fdSMike Rapoport typedef unsigned int pgtbl_mod_mask;
1489ca5999fdSMike Rapoport 
1490ca5999fdSMike Rapoport #endif /* !__ASSEMBLY__ */
1491ca5999fdSMike Rapoport 
1492cef39703SArnd Bergmann #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
1493cef39703SArnd Bergmann #ifdef CONFIG_PHYS_ADDR_T_64BIT
1494cef39703SArnd Bergmann /*
1495cef39703SArnd Bergmann  * ZSMALLOC needs to know the highest PFN on 32-bit architectures
1496cef39703SArnd Bergmann  * with physical address space extension, but falls back to
1497cef39703SArnd Bergmann  * BITS_PER_LONG otherwise.
1498cef39703SArnd Bergmann  */
1499cef39703SArnd Bergmann #error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
1500cef39703SArnd Bergmann #else
1501cef39703SArnd Bergmann #define MAX_POSSIBLE_PHYSMEM_BITS 32
1502cef39703SArnd Bergmann #endif
1503cef39703SArnd Bergmann #endif
1504cef39703SArnd Bergmann 
1505ca5999fdSMike Rapoport #ifndef has_transparent_hugepage
1506ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1507ca5999fdSMike Rapoport #define has_transparent_hugepage() 1
1508ca5999fdSMike Rapoport #else
1509ca5999fdSMike Rapoport #define has_transparent_hugepage() 0
1510ca5999fdSMike Rapoport #endif
1511ca5999fdSMike Rapoport #endif
1512ca5999fdSMike Rapoport 
1513ca5999fdSMike Rapoport /*
1514ca5999fdSMike Rapoport  * On some architectures it depends on the mm if the p4d/pud or pmd
1515ca5999fdSMike Rapoport  * layer of the page table hierarchy is folded or not.
1516ca5999fdSMike Rapoport  */
1517ca5999fdSMike Rapoport #ifndef mm_p4d_folded
1518ca5999fdSMike Rapoport #define mm_p4d_folded(mm)	__is_defined(__PAGETABLE_P4D_FOLDED)
1519ca5999fdSMike Rapoport #endif
1520ca5999fdSMike Rapoport 
1521ca5999fdSMike Rapoport #ifndef mm_pud_folded
1522ca5999fdSMike Rapoport #define mm_pud_folded(mm)	__is_defined(__PAGETABLE_PUD_FOLDED)
1523ca5999fdSMike Rapoport #endif
1524ca5999fdSMike Rapoport 
1525ca5999fdSMike Rapoport #ifndef mm_pmd_folded
1526ca5999fdSMike Rapoport #define mm_pmd_folded(mm)	__is_defined(__PAGETABLE_PMD_FOLDED)
1527ca5999fdSMike Rapoport #endif
1528ca5999fdSMike Rapoport 
1529d3f7b1bbSVasily Gorbik #ifndef p4d_offset_lockless
1530d3f7b1bbSVasily Gorbik #define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address)
1531d3f7b1bbSVasily Gorbik #endif
1532d3f7b1bbSVasily Gorbik #ifndef pud_offset_lockless
1533d3f7b1bbSVasily Gorbik #define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address)
1534d3f7b1bbSVasily Gorbik #endif
1535d3f7b1bbSVasily Gorbik #ifndef pmd_offset_lockless
1536d3f7b1bbSVasily Gorbik #define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address)
1537d3f7b1bbSVasily Gorbik #endif
1538d3f7b1bbSVasily Gorbik 
1539ca5999fdSMike Rapoport /*
1540ca5999fdSMike Rapoport  * p?d_leaf() - true if this entry is a final mapping to a physical address.
1541ca5999fdSMike Rapoport  * This differs from p?d_huge() by the fact that they are always available (if
1542ca5999fdSMike Rapoport  * the architecture supports large pages at the appropriate level) even
1543ca5999fdSMike Rapoport  * if CONFIG_HUGETLB_PAGE is not defined.
1544ca5999fdSMike Rapoport  * Only meaningful when called on a valid entry.
1545ca5999fdSMike Rapoport  */
1546ca5999fdSMike Rapoport #ifndef pgd_leaf
1547ca5999fdSMike Rapoport #define pgd_leaf(x)	0
1548ca5999fdSMike Rapoport #endif
1549ca5999fdSMike Rapoport #ifndef p4d_leaf
1550ca5999fdSMike Rapoport #define p4d_leaf(x)	0
1551ca5999fdSMike Rapoport #endif
1552ca5999fdSMike Rapoport #ifndef pud_leaf
1553ca5999fdSMike Rapoport #define pud_leaf(x)	0
1554ca5999fdSMike Rapoport #endif
1555ca5999fdSMike Rapoport #ifndef pmd_leaf
1556ca5999fdSMike Rapoport #define pmd_leaf(x)	0
1557ca5999fdSMike Rapoport #endif
1558ca5999fdSMike Rapoport 
1559560dabbdSPeter Zijlstra #ifndef pgd_leaf_size
1560560dabbdSPeter Zijlstra #define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
1561560dabbdSPeter Zijlstra #endif
1562560dabbdSPeter Zijlstra #ifndef p4d_leaf_size
1563560dabbdSPeter Zijlstra #define p4d_leaf_size(x) P4D_SIZE
1564560dabbdSPeter Zijlstra #endif
1565560dabbdSPeter Zijlstra #ifndef pud_leaf_size
1566560dabbdSPeter Zijlstra #define pud_leaf_size(x) PUD_SIZE
1567560dabbdSPeter Zijlstra #endif
1568560dabbdSPeter Zijlstra #ifndef pmd_leaf_size
1569560dabbdSPeter Zijlstra #define pmd_leaf_size(x) PMD_SIZE
1570560dabbdSPeter Zijlstra #endif
1571560dabbdSPeter Zijlstra #ifndef pte_leaf_size
1572560dabbdSPeter Zijlstra #define pte_leaf_size(x) PAGE_SIZE
1573560dabbdSPeter Zijlstra #endif
1574560dabbdSPeter Zijlstra 
1575ca5999fdSMike Rapoport #endif /* _LINUX_PGTABLE_H */
1576