xref: /linux-6.15/include/linux/pgtable.h (revision 8c56c5db)
1ca5999fdSMike Rapoport /* SPDX-License-Identifier: GPL-2.0 */
2ca5999fdSMike Rapoport #ifndef _LINUX_PGTABLE_H
3ca5999fdSMike Rapoport #define _LINUX_PGTABLE_H
4ca5999fdSMike Rapoport 
5ca5999fdSMike Rapoport #include <linux/pfn.h>
6ca5999fdSMike Rapoport #include <asm/pgtable.h>
7ca5999fdSMike Rapoport 
8051ddcfeSMatthew Wilcox (Oracle) #define PMD_ORDER	(PMD_SHIFT - PAGE_SHIFT)
9051ddcfeSMatthew Wilcox (Oracle) #define PUD_ORDER	(PUD_SHIFT - PAGE_SHIFT)
10051ddcfeSMatthew Wilcox (Oracle) 
11ca5999fdSMike Rapoport #ifndef __ASSEMBLY__
12ca5999fdSMike Rapoport #ifdef CONFIG_MMU
13ca5999fdSMike Rapoport 
14ca5999fdSMike Rapoport #include <linux/mm_types.h>
15ca5999fdSMike Rapoport #include <linux/bug.h>
16ca5999fdSMike Rapoport #include <linux/errno.h>
17ca5999fdSMike Rapoport #include <asm-generic/pgtable_uffd.h>
18de8c8e52STong Tiangen #include <linux/page_table_check.h>
19ca5999fdSMike Rapoport 
20ca5999fdSMike Rapoport #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
21ca5999fdSMike Rapoport 	defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
22ca5999fdSMike Rapoport #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
23ca5999fdSMike Rapoport #endif
24ca5999fdSMike Rapoport 
25ca5999fdSMike Rapoport /*
26ca5999fdSMike Rapoport  * On almost all architectures and configurations, 0 can be used as the
27ca5999fdSMike Rapoport  * upper ceiling to free_pgtables(): on many architectures it has the same
28ca5999fdSMike Rapoport  * effect as using TASK_SIZE.  However, there is one configuration which
29ca5999fdSMike Rapoport  * must impose a more careful limit, to avoid freeing kernel pgtables.
30ca5999fdSMike Rapoport  */
31ca5999fdSMike Rapoport #ifndef USER_PGTABLES_CEILING
32ca5999fdSMike Rapoport #define USER_PGTABLES_CEILING	0UL
33ca5999fdSMike Rapoport #endif
34ca5999fdSMike Rapoport 
35e05c7b1fSMike Rapoport /*
36fac7757eSAnshuman Khandual  * This defines the first usable user address. Platforms
37fac7757eSAnshuman Khandual  * can override its value with custom FIRST_USER_ADDRESS
38fac7757eSAnshuman Khandual  * defined in their respective <asm/pgtable.h>.
39fac7757eSAnshuman Khandual  */
40fac7757eSAnshuman Khandual #ifndef FIRST_USER_ADDRESS
41fac7757eSAnshuman Khandual #define FIRST_USER_ADDRESS	0UL
42fac7757eSAnshuman Khandual #endif
43fac7757eSAnshuman Khandual 
44fac7757eSAnshuman Khandual /*
451c2f7d14SAnshuman Khandual  * This defines the generic helper for accessing PMD page
461c2f7d14SAnshuman Khandual  * table page. Although platforms can still override this
471c2f7d14SAnshuman Khandual  * via their respective <asm/pgtable.h>.
481c2f7d14SAnshuman Khandual  */
491c2f7d14SAnshuman Khandual #ifndef pmd_pgtable
501c2f7d14SAnshuman Khandual #define pmd_pgtable(pmd) pmd_page(pmd)
511c2f7d14SAnshuman Khandual #endif
521c2f7d14SAnshuman Khandual 
53e06d03d5SMatthew Wilcox (Oracle) #define pmd_folio(pmd) page_folio(pmd_page(pmd))
54e06d03d5SMatthew Wilcox (Oracle) 
551c2f7d14SAnshuman Khandual /*
56974b9b2cSMike Rapoport  * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
57974b9b2cSMike Rapoport  *
58974b9b2cSMike Rapoport  * The pXx_index() functions return the index of the entry in the page
59974b9b2cSMike Rapoport  * table page which would control the given virtual address
60974b9b2cSMike Rapoport  *
61974b9b2cSMike Rapoport  * As these functions may be used by the same code for different levels of
62974b9b2cSMike Rapoport  * the page table folding, they are always available, regardless of
63974b9b2cSMike Rapoport  * CONFIG_PGTABLE_LEVELS value. For the folded levels they simply return 0
64974b9b2cSMike Rapoport  * because in such cases PTRS_PER_PxD equals 1.
65974b9b2cSMike Rapoport  */
66974b9b2cSMike Rapoport 
pte_index(unsigned long address)67974b9b2cSMike Rapoport static inline unsigned long pte_index(unsigned long address)
68974b9b2cSMike Rapoport {
69974b9b2cSMike Rapoport 	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
70974b9b2cSMike Rapoport }
71974b9b2cSMike Rapoport 
72974b9b2cSMike Rapoport #ifndef pmd_index
pmd_index(unsigned long address)73974b9b2cSMike Rapoport static inline unsigned long pmd_index(unsigned long address)
74974b9b2cSMike Rapoport {
75974b9b2cSMike Rapoport 	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
76974b9b2cSMike Rapoport }
77974b9b2cSMike Rapoport #define pmd_index pmd_index
78974b9b2cSMike Rapoport #endif
79974b9b2cSMike Rapoport 
80974b9b2cSMike Rapoport #ifndef pud_index
pud_index(unsigned long address)81974b9b2cSMike Rapoport static inline unsigned long pud_index(unsigned long address)
82974b9b2cSMike Rapoport {
83974b9b2cSMike Rapoport 	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
84974b9b2cSMike Rapoport }
85974b9b2cSMike Rapoport #define pud_index pud_index
86974b9b2cSMike Rapoport #endif
87974b9b2cSMike Rapoport 
88974b9b2cSMike Rapoport #ifndef pgd_index
89974b9b2cSMike Rapoport /* Must be a compile-time constant, so implement it as a macro */
90974b9b2cSMike Rapoport #define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
91974b9b2cSMike Rapoport #endif
92974b9b2cSMike Rapoport 
937269ed4aSBibo Mao #ifndef kernel_pte_init
kernel_pte_init(void * addr)947269ed4aSBibo Mao static inline void kernel_pte_init(void *addr)
957269ed4aSBibo Mao {
967269ed4aSBibo Mao }
977269ed4aSBibo Mao #define kernel_pte_init kernel_pte_init
987269ed4aSBibo Mao #endif
997269ed4aSBibo Mao 
1007269ed4aSBibo Mao #ifndef pmd_init
pmd_init(void * addr)1017269ed4aSBibo Mao static inline void pmd_init(void *addr)
1027269ed4aSBibo Mao {
1037269ed4aSBibo Mao }
1047269ed4aSBibo Mao #define pmd_init pmd_init
1057269ed4aSBibo Mao #endif
1067269ed4aSBibo Mao 
1077269ed4aSBibo Mao #ifndef pud_init
pud_init(void * addr)1087269ed4aSBibo Mao static inline void pud_init(void *addr)
1097269ed4aSBibo Mao {
1107269ed4aSBibo Mao }
1117269ed4aSBibo Mao #define pud_init pud_init
1127269ed4aSBibo Mao #endif
1137269ed4aSBibo Mao 
114974b9b2cSMike Rapoport #ifndef pte_offset_kernel
pte_offset_kernel(pmd_t * pmd,unsigned long address)115974b9b2cSMike Rapoport static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
116974b9b2cSMike Rapoport {
117974b9b2cSMike Rapoport 	return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
118974b9b2cSMike Rapoport }
119974b9b2cSMike Rapoport #define pte_offset_kernel pte_offset_kernel
120974b9b2cSMike Rapoport #endif
121974b9b2cSMike Rapoport 
1220d940a9bSHugh Dickins #ifdef CONFIG_HIGHPTE
1230d940a9bSHugh Dickins #define __pte_map(pmd, address) \
1240d940a9bSHugh Dickins 	((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
1250d940a9bSHugh Dickins #define pte_unmap(pte)	do {	\
1260d940a9bSHugh Dickins 	kunmap_local((pte));	\
127a349d72fSHugh Dickins 	rcu_read_unlock();	\
1280d940a9bSHugh Dickins } while (0)
129974b9b2cSMike Rapoport #else
__pte_map(pmd_t * pmd,unsigned long address)1300d940a9bSHugh Dickins static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
1310d940a9bSHugh Dickins {
1320d940a9bSHugh Dickins 	return pte_offset_kernel(pmd, address);
1330d940a9bSHugh Dickins }
pte_unmap(pte_t * pte)1340d940a9bSHugh Dickins static inline void pte_unmap(pte_t *pte)
1350d940a9bSHugh Dickins {
136a349d72fSHugh Dickins 	rcu_read_unlock();
1370d940a9bSHugh Dickins }
138974b9b2cSMike Rapoport #endif
139974b9b2cSMike Rapoport 
14013cf577eSHugh Dickins void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
14113cf577eSHugh Dickins 
142974b9b2cSMike Rapoport /* Find an entry in the second-level page table.. */
143974b9b2cSMike Rapoport #ifndef pmd_offset
pmd_offset(pud_t * pud,unsigned long address)144974b9b2cSMike Rapoport static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
145974b9b2cSMike Rapoport {
1469cf6fa24SAneesh Kumar K.V 	return pud_pgtable(*pud) + pmd_index(address);
147974b9b2cSMike Rapoport }
148974b9b2cSMike Rapoport #define pmd_offset pmd_offset
149974b9b2cSMike Rapoport #endif
150974b9b2cSMike Rapoport 
151974b9b2cSMike Rapoport #ifndef pud_offset
pud_offset(p4d_t * p4d,unsigned long address)152974b9b2cSMike Rapoport static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
153974b9b2cSMike Rapoport {
154dc4875f0SAneesh Kumar K.V 	return p4d_pgtable(*p4d) + pud_index(address);
155974b9b2cSMike Rapoport }
156974b9b2cSMike Rapoport #define pud_offset pud_offset
157974b9b2cSMike Rapoport #endif
158974b9b2cSMike Rapoport 
pgd_offset_pgd(pgd_t * pgd,unsigned long address)159974b9b2cSMike Rapoport static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
160974b9b2cSMike Rapoport {
161974b9b2cSMike Rapoport 	return (pgd + pgd_index(address));
162974b9b2cSMike Rapoport };
163974b9b2cSMike Rapoport 
164974b9b2cSMike Rapoport /*
165974b9b2cSMike Rapoport  * a shortcut to get a pgd_t in a given mm
166974b9b2cSMike Rapoport  */
167974b9b2cSMike Rapoport #ifndef pgd_offset
168974b9b2cSMike Rapoport #define pgd_offset(mm, address)		pgd_offset_pgd((mm)->pgd, (address))
169974b9b2cSMike Rapoport #endif
170974b9b2cSMike Rapoport 
171974b9b2cSMike Rapoport /*
172974b9b2cSMike Rapoport  * a shortcut which implies the use of the kernel's pgd, instead
173974b9b2cSMike Rapoport  * of a process's
174974b9b2cSMike Rapoport  */
175974b9b2cSMike Rapoport #define pgd_offset_k(address)		pgd_offset(&init_mm, (address))
176974b9b2cSMike Rapoport 
177974b9b2cSMike Rapoport /*
178e05c7b1fSMike Rapoport  * In many cases it is known that a virtual address is mapped at PMD or PTE
179e05c7b1fSMike Rapoport  * level, so instead of traversing all the page table levels, we can get a
180e05c7b1fSMike Rapoport  * pointer to the PMD entry in user or kernel page table or translate a virtual
181e05c7b1fSMike Rapoport  * address to the pointer in the PTE in the kernel page tables with simple
182e05c7b1fSMike Rapoport  * helpers.
183e05c7b1fSMike Rapoport  */
pmd_off(struct mm_struct * mm,unsigned long va)184e05c7b1fSMike Rapoport static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va)
185e05c7b1fSMike Rapoport {
186e05c7b1fSMike Rapoport 	return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), va);
187e05c7b1fSMike Rapoport }
188e05c7b1fSMike Rapoport 
pmd_off_k(unsigned long va)189e05c7b1fSMike Rapoport static inline pmd_t *pmd_off_k(unsigned long va)
190e05c7b1fSMike Rapoport {
191e05c7b1fSMike Rapoport 	return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va);
192e05c7b1fSMike Rapoport }
193e05c7b1fSMike Rapoport 
virt_to_kpte(unsigned long vaddr)194e05c7b1fSMike Rapoport static inline pte_t *virt_to_kpte(unsigned long vaddr)
195e05c7b1fSMike Rapoport {
196e05c7b1fSMike Rapoport 	pmd_t *pmd = pmd_off_k(vaddr);
197e05c7b1fSMike Rapoport 
198e05c7b1fSMike Rapoport 	return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
199e05c7b1fSMike Rapoport }
200e05c7b1fSMike Rapoport 
2016617da8fSJuergen Gross #ifndef pmd_young
pmd_young(pmd_t pmd)2026617da8fSJuergen Gross static inline int pmd_young(pmd_t pmd)
2036617da8fSJuergen Gross {
2046617da8fSJuergen Gross 	return 0;
2056617da8fSJuergen Gross }
2066617da8fSJuergen Gross #endif
2076617da8fSJuergen Gross 
208533c67e6SKinsey Ho #ifndef pmd_dirty
pmd_dirty(pmd_t pmd)209533c67e6SKinsey Ho static inline int pmd_dirty(pmd_t pmd)
210533c67e6SKinsey Ho {
211533c67e6SKinsey Ho 	return 0;
212533c67e6SKinsey Ho }
213533c67e6SKinsey Ho #endif
214533c67e6SKinsey Ho 
215bcc6cc83SMatthew Wilcox (Oracle) /*
216bcc6cc83SMatthew Wilcox (Oracle)  * A facility to provide lazy MMU batching.  This allows PTE updates and
217bcc6cc83SMatthew Wilcox (Oracle)  * page invalidations to be delayed until a call to leave lazy MMU mode
218bcc6cc83SMatthew Wilcox (Oracle)  * is issued.  Some architectures may benefit from doing this, and it is
219bcc6cc83SMatthew Wilcox (Oracle)  * beneficial for both shadow and direct mode hypervisors, which may batch
220bcc6cc83SMatthew Wilcox (Oracle)  * the PTE updates which happen during this window.  Note that using this
221bcc6cc83SMatthew Wilcox (Oracle)  * interface requires that read hazards be removed from the code.  A read
222bcc6cc83SMatthew Wilcox (Oracle)  * hazard could result in the direct mode hypervisor case, since the actual
223bcc6cc83SMatthew Wilcox (Oracle)  * write to the page tables may not yet have taken place, so reads though
224bcc6cc83SMatthew Wilcox (Oracle)  * a raw PTE pointer after it has been modified are not guaranteed to be
225691ee97eSRyan Roberts  * up to date.
226691ee97eSRyan Roberts  *
227691ee97eSRyan Roberts  * In the general case, no lock is guaranteed to be held between entry and exit
228691ee97eSRyan Roberts  * of the lazy mode. So the implementation must assume preemption may be enabled
229691ee97eSRyan Roberts  * and cpu migration is possible; it must take steps to be robust against this.
230691ee97eSRyan Roberts  * (In practice, for user PTE updates, the appropriate page table lock(s) are
231691ee97eSRyan Roberts  * held, but for kernel PTE updates, no lock is held). Nesting is not permitted
232691ee97eSRyan Roberts  * and the mode cannot be used in interrupt context.
233bcc6cc83SMatthew Wilcox (Oracle)  */
234bcc6cc83SMatthew Wilcox (Oracle) #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
235bcc6cc83SMatthew Wilcox (Oracle) #define arch_enter_lazy_mmu_mode()	do {} while (0)
236bcc6cc83SMatthew Wilcox (Oracle) #define arch_leave_lazy_mmu_mode()	do {} while (0)
237bcc6cc83SMatthew Wilcox (Oracle) #define arch_flush_lazy_mmu_mode()	do {} while (0)
238bcc6cc83SMatthew Wilcox (Oracle) #endif
239bcc6cc83SMatthew Wilcox (Oracle) 
240c6ec76a2SRyan Roberts #ifndef pte_batch_hint
241c6ec76a2SRyan Roberts /**
242c6ec76a2SRyan Roberts  * pte_batch_hint - Number of pages that can be added to batch without scanning.
243c6ec76a2SRyan Roberts  * @ptep: Page table pointer for the entry.
244c6ec76a2SRyan Roberts  * @pte: Page table entry.
245c6ec76a2SRyan Roberts  *
246c6ec76a2SRyan Roberts  * Some architectures know that a set of contiguous ptes all map the same
247c6ec76a2SRyan Roberts  * contiguous memory with the same permissions. In this case, it can provide a
248c6ec76a2SRyan Roberts  * hint to aid pte batching without the core code needing to scan every pte.
249c6ec76a2SRyan Roberts  *
250c6ec76a2SRyan Roberts  * An architecture implementation may ignore the PTE accessed state. Further,
251c6ec76a2SRyan Roberts  * the dirty state must apply atomically to all the PTEs described by the hint.
252c6ec76a2SRyan Roberts  *
253c6ec76a2SRyan Roberts  * May be overridden by the architecture, else pte_batch_hint is always 1.
254c6ec76a2SRyan Roberts  */
pte_batch_hint(pte_t * ptep,pte_t pte)255c6ec76a2SRyan Roberts static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
256c6ec76a2SRyan Roberts {
257c6ec76a2SRyan Roberts 	return 1;
258c6ec76a2SRyan Roberts }
259c6ec76a2SRyan Roberts #endif
260c6ec76a2SRyan Roberts 
261583ceaaaSRyan Roberts #ifndef pte_advance_pfn
pte_advance_pfn(pte_t pte,unsigned long nr)262583ceaaaSRyan Roberts static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
263ce60f27bSMatthew Wilcox (Oracle) {
264583ceaaaSRyan Roberts 	return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
265ce60f27bSMatthew Wilcox (Oracle) }
266ce60f27bSMatthew Wilcox (Oracle) #endif
267ce60f27bSMatthew Wilcox (Oracle) 
268583ceaaaSRyan Roberts #define pte_next_pfn(pte) pte_advance_pfn(pte, 1)
269583ceaaaSRyan Roberts 
2706cdfa1d5SDavid Hildenbrand #ifndef set_ptes
271bcc6cc83SMatthew Wilcox (Oracle) /**
272bcc6cc83SMatthew Wilcox (Oracle)  * set_ptes - Map consecutive pages to a contiguous range of addresses.
273bcc6cc83SMatthew Wilcox (Oracle)  * @mm: Address space to map the pages into.
274bcc6cc83SMatthew Wilcox (Oracle)  * @addr: Address to map the first page at.
275bcc6cc83SMatthew Wilcox (Oracle)  * @ptep: Page table pointer for the first entry.
276bcc6cc83SMatthew Wilcox (Oracle)  * @pte: Page table entry for the first page.
277bcc6cc83SMatthew Wilcox (Oracle)  * @nr: Number of pages to map.
278bcc6cc83SMatthew Wilcox (Oracle)  *
2796280d731SRyan Roberts  * When nr==1, initial state of pte may be present or not present, and new state
2806280d731SRyan Roberts  * may be present or not present. When nr>1, initial state of all ptes must be
2816280d731SRyan Roberts  * not present, and new state must be present.
2826280d731SRyan Roberts  *
283bcc6cc83SMatthew Wilcox (Oracle)  * May be overridden by the architecture, or the architecture can define
284bcc6cc83SMatthew Wilcox (Oracle)  * set_pte() and PFN_PTE_SHIFT.
285bcc6cc83SMatthew Wilcox (Oracle)  *
286bcc6cc83SMatthew Wilcox (Oracle)  * Context: The caller holds the page table lock.  The pages all belong
287bcc6cc83SMatthew Wilcox (Oracle)  * to the same folio.  The PTEs are all in the same PMD.
288bcc6cc83SMatthew Wilcox (Oracle)  */
set_ptes(struct mm_struct * mm,unsigned long addr,pte_t * ptep,pte_t pte,unsigned int nr)289bcc6cc83SMatthew Wilcox (Oracle) static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
290bcc6cc83SMatthew Wilcox (Oracle) 		pte_t *ptep, pte_t pte, unsigned int nr)
291bcc6cc83SMatthew Wilcox (Oracle) {
292bcc6cc83SMatthew Wilcox (Oracle) 	page_table_check_ptes_set(mm, ptep, pte, nr);
293bcc6cc83SMatthew Wilcox (Oracle) 
294bcc6cc83SMatthew Wilcox (Oracle) 	for (;;) {
295bcc6cc83SMatthew Wilcox (Oracle) 		set_pte(ptep, pte);
296bcc6cc83SMatthew Wilcox (Oracle) 		if (--nr == 0)
297bcc6cc83SMatthew Wilcox (Oracle) 			break;
298bcc6cc83SMatthew Wilcox (Oracle) 		ptep++;
299ce60f27bSMatthew Wilcox (Oracle) 		pte = pte_next_pfn(pte);
300bcc6cc83SMatthew Wilcox (Oracle) 	}
301bcc6cc83SMatthew Wilcox (Oracle) }
302af4fcb07SMatthew Wilcox (Oracle) #endif
303bcc6cc83SMatthew Wilcox (Oracle) #define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
304bcc6cc83SMatthew Wilcox (Oracle) 
305ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
306ca5999fdSMike Rapoport extern int ptep_set_access_flags(struct vm_area_struct *vma,
307ca5999fdSMike Rapoport 				 unsigned long address, pte_t *ptep,
308ca5999fdSMike Rapoport 				 pte_t entry, int dirty);
309ca5999fdSMike Rapoport #endif
310ca5999fdSMike Rapoport 
311ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
312ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
313ca5999fdSMike Rapoport extern int pmdp_set_access_flags(struct vm_area_struct *vma,
314ca5999fdSMike Rapoport 				 unsigned long address, pmd_t *pmdp,
315ca5999fdSMike Rapoport 				 pmd_t entry, int dirty);
316ca5999fdSMike Rapoport extern int pudp_set_access_flags(struct vm_area_struct *vma,
317ca5999fdSMike Rapoport 				 unsigned long address, pud_t *pudp,
318ca5999fdSMike Rapoport 				 pud_t entry, int dirty);
319ca5999fdSMike Rapoport #else
pmdp_set_access_flags(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp,pmd_t entry,int dirty)320ca5999fdSMike Rapoport static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
321ca5999fdSMike Rapoport 					unsigned long address, pmd_t *pmdp,
322ca5999fdSMike Rapoport 					pmd_t entry, int dirty)
323ca5999fdSMike Rapoport {
324ca5999fdSMike Rapoport 	BUILD_BUG();
325ca5999fdSMike Rapoport 	return 0;
326ca5999fdSMike Rapoport }
pudp_set_access_flags(struct vm_area_struct * vma,unsigned long address,pud_t * pudp,pud_t entry,int dirty)327ca5999fdSMike Rapoport static inline int pudp_set_access_flags(struct vm_area_struct *vma,
328ca5999fdSMike Rapoport 					unsigned long address, pud_t *pudp,
329ca5999fdSMike Rapoport 					pud_t entry, int dirty)
330ca5999fdSMike Rapoport {
331ca5999fdSMike Rapoport 	BUILD_BUG();
332ca5999fdSMike Rapoport 	return 0;
333ca5999fdSMike Rapoport }
334ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
335ca5999fdSMike Rapoport #endif
336ca5999fdSMike Rapoport 
3376c1d2a07SRyan Roberts #ifndef ptep_get
ptep_get(pte_t * ptep)3386c1d2a07SRyan Roberts static inline pte_t ptep_get(pte_t *ptep)
3396c1d2a07SRyan Roberts {
3406c1d2a07SRyan Roberts 	return READ_ONCE(*ptep);
3416c1d2a07SRyan Roberts }
3426c1d2a07SRyan Roberts #endif
3436c1d2a07SRyan Roberts 
3446c1d2a07SRyan Roberts #ifndef pmdp_get
pmdp_get(pmd_t * pmdp)3456c1d2a07SRyan Roberts static inline pmd_t pmdp_get(pmd_t *pmdp)
3466c1d2a07SRyan Roberts {
3476c1d2a07SRyan Roberts 	return READ_ONCE(*pmdp);
3486c1d2a07SRyan Roberts }
3496c1d2a07SRyan Roberts #endif
3506c1d2a07SRyan Roberts 
351eba2591dSAlexandre Ghiti #ifndef pudp_get
pudp_get(pud_t * pudp)352eba2591dSAlexandre Ghiti static inline pud_t pudp_get(pud_t *pudp)
353eba2591dSAlexandre Ghiti {
354eba2591dSAlexandre Ghiti 	return READ_ONCE(*pudp);
355eba2591dSAlexandre Ghiti }
356eba2591dSAlexandre Ghiti #endif
357eba2591dSAlexandre Ghiti 
358eba2591dSAlexandre Ghiti #ifndef p4dp_get
p4dp_get(p4d_t * p4dp)359eba2591dSAlexandre Ghiti static inline p4d_t p4dp_get(p4d_t *p4dp)
360eba2591dSAlexandre Ghiti {
361eba2591dSAlexandre Ghiti 	return READ_ONCE(*p4dp);
362eba2591dSAlexandre Ghiti }
363eba2591dSAlexandre Ghiti #endif
364eba2591dSAlexandre Ghiti 
365eba2591dSAlexandre Ghiti #ifndef pgdp_get
pgdp_get(pgd_t * pgdp)366eba2591dSAlexandre Ghiti static inline pgd_t pgdp_get(pgd_t *pgdp)
367eba2591dSAlexandre Ghiti {
368eba2591dSAlexandre Ghiti 	return READ_ONCE(*pgdp);
369eba2591dSAlexandre Ghiti }
370eba2591dSAlexandre Ghiti #endif
371eba2591dSAlexandre Ghiti 
372ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
ptep_test_and_clear_young(struct vm_area_struct * vma,unsigned long address,pte_t * ptep)373ca5999fdSMike Rapoport static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
374ca5999fdSMike Rapoport 					    unsigned long address,
375ca5999fdSMike Rapoport 					    pte_t *ptep)
376ca5999fdSMike Rapoport {
377c33c7948SRyan Roberts 	pte_t pte = ptep_get(ptep);
378ca5999fdSMike Rapoport 	int r = 1;
379ca5999fdSMike Rapoport 	if (!pte_young(pte))
380ca5999fdSMike Rapoport 		r = 0;
381ca5999fdSMike Rapoport 	else
382ca5999fdSMike Rapoport 		set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
383ca5999fdSMike Rapoport 	return r;
384ca5999fdSMike Rapoport }
385ca5999fdSMike Rapoport #endif
386ca5999fdSMike Rapoport 
387ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
388eed9a328SYu Zhao #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
pmdp_test_and_clear_young(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp)389ca5999fdSMike Rapoport static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
390ca5999fdSMike Rapoport 					    unsigned long address,
391ca5999fdSMike Rapoport 					    pmd_t *pmdp)
392ca5999fdSMike Rapoport {
393ca5999fdSMike Rapoport 	pmd_t pmd = *pmdp;
394ca5999fdSMike Rapoport 	int r = 1;
395ca5999fdSMike Rapoport 	if (!pmd_young(pmd))
396ca5999fdSMike Rapoport 		r = 0;
397ca5999fdSMike Rapoport 	else
398ca5999fdSMike Rapoport 		set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
399ca5999fdSMike Rapoport 	return r;
400ca5999fdSMike Rapoport }
401ca5999fdSMike Rapoport #else
pmdp_test_and_clear_young(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp)402ca5999fdSMike Rapoport static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
403ca5999fdSMike Rapoport 					    unsigned long address,
404ca5999fdSMike Rapoport 					    pmd_t *pmdp)
405ca5999fdSMike Rapoport {
406ca5999fdSMike Rapoport 	BUILD_BUG();
407ca5999fdSMike Rapoport 	return 0;
408ca5999fdSMike Rapoport }
409eed9a328SYu Zhao #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
410ca5999fdSMike Rapoport #endif
411ca5999fdSMike Rapoport 
412ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
413ca5999fdSMike Rapoport int ptep_clear_flush_young(struct vm_area_struct *vma,
414ca5999fdSMike Rapoport 			   unsigned long address, pte_t *ptep);
415ca5999fdSMike Rapoport #endif
416ca5999fdSMike Rapoport 
417ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
418ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
419ca5999fdSMike Rapoport extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
420ca5999fdSMike Rapoport 				  unsigned long address, pmd_t *pmdp);
421ca5999fdSMike Rapoport #else
422ca5999fdSMike Rapoport /*
423ca5999fdSMike Rapoport  * Despite relevant to THP only, this API is called from generic rmap code
424ca5999fdSMike Rapoport  * under PageTransHuge(), hence needs a dummy implementation for !THP
425ca5999fdSMike Rapoport  */
pmdp_clear_flush_young(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp)426ca5999fdSMike Rapoport static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
427ca5999fdSMike Rapoport 					 unsigned long address, pmd_t *pmdp)
428ca5999fdSMike Rapoport {
429ca5999fdSMike Rapoport 	BUILD_BUG();
430ca5999fdSMike Rapoport 	return 0;
431ca5999fdSMike Rapoport }
432ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
433ca5999fdSMike Rapoport #endif
434ca5999fdSMike Rapoport 
4354aaf269cSJuergen Gross #ifndef arch_has_hw_nonleaf_pmd_young
4364aaf269cSJuergen Gross /*
4374aaf269cSJuergen Gross  * Return whether the accessed bit in non-leaf PMD entries is supported on the
4384aaf269cSJuergen Gross  * local CPU.
4394aaf269cSJuergen Gross  */
arch_has_hw_nonleaf_pmd_young(void)4404aaf269cSJuergen Gross static inline bool arch_has_hw_nonleaf_pmd_young(void)
4414aaf269cSJuergen Gross {
4424aaf269cSJuergen Gross 	return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
4434aaf269cSJuergen Gross }
4444aaf269cSJuergen Gross #endif
4454aaf269cSJuergen Gross 
446e1fd09e3SYu Zhao #ifndef arch_has_hw_pte_young
447e1fd09e3SYu Zhao /*
448e1fd09e3SYu Zhao  * Return whether the accessed bit is supported on the local CPU.
449e1fd09e3SYu Zhao  *
450e1fd09e3SYu Zhao  * This stub assumes accessing through an old PTE triggers a page fault.
451e1fd09e3SYu Zhao  * Architectures that automatically set the access bit should overwrite it.
452e1fd09e3SYu Zhao  */
arch_has_hw_pte_young(void)453e1fd09e3SYu Zhao static inline bool arch_has_hw_pte_young(void)
454e1fd09e3SYu Zhao {
45571ce1ab5SKinsey Ho 	return IS_ENABLED(CONFIG_ARCH_HAS_HW_PTE_YOUNG);
456e1fd09e3SYu Zhao }
457e1fd09e3SYu Zhao #endif
458e1fd09e3SYu Zhao 
459e5136e87SRick Edgecombe #ifndef arch_check_zapped_pte
arch_check_zapped_pte(struct vm_area_struct * vma,pte_t pte)460e5136e87SRick Edgecombe static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
461e5136e87SRick Edgecombe 					 pte_t pte)
462e5136e87SRick Edgecombe {
463e5136e87SRick Edgecombe }
464e5136e87SRick Edgecombe #endif
465e5136e87SRick Edgecombe 
466e5136e87SRick Edgecombe #ifndef arch_check_zapped_pmd
arch_check_zapped_pmd(struct vm_area_struct * vma,pmd_t pmd)467e5136e87SRick Edgecombe static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
468e5136e87SRick Edgecombe 					 pmd_t pmd)
469e5136e87SRick Edgecombe {
470e5136e87SRick Edgecombe }
471e5136e87SRick Edgecombe #endif
472e5136e87SRick Edgecombe 
4731c399e74SPeter Xu #ifndef arch_check_zapped_pud
arch_check_zapped_pud(struct vm_area_struct * vma,pud_t pud)4741c399e74SPeter Xu static inline void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
4751c399e74SPeter Xu {
4761c399e74SPeter Xu }
4771c399e74SPeter Xu #endif
4781c399e74SPeter Xu 
479ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
ptep_get_and_clear(struct mm_struct * mm,unsigned long address,pte_t * ptep)480ca5999fdSMike Rapoport static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
481ca5999fdSMike Rapoport 				       unsigned long address,
482ca5999fdSMike Rapoport 				       pte_t *ptep)
483ca5999fdSMike Rapoport {
484c33c7948SRyan Roberts 	pte_t pte = ptep_get(ptep);
485ca5999fdSMike Rapoport 	pte_clear(mm, address, ptep);
486aa232204SKemeng Shi 	page_table_check_pte_clear(mm, pte);
487ca5999fdSMike Rapoport 	return pte;
488ca5999fdSMike Rapoport }
489ca5999fdSMike Rapoport #endif
490ca5999fdSMike Rapoport 
4911b68112cSLance Yang #ifndef clear_young_dirty_ptes
4921b68112cSLance Yang /**
4931b68112cSLance Yang  * clear_young_dirty_ptes - Mark PTEs that map consecutive pages of the
4941b68112cSLance Yang  *		same folio as old/clean.
4951b68112cSLance Yang  * @mm: Address space the pages are mapped into.
4961b68112cSLance Yang  * @addr: Address the first page is mapped at.
4971b68112cSLance Yang  * @ptep: Page table pointer for the first entry.
4981b68112cSLance Yang  * @nr: Number of entries to mark old/clean.
4991b68112cSLance Yang  * @flags: Flags to modify the PTE batch semantics.
5001b68112cSLance Yang  *
5011b68112cSLance Yang  * May be overridden by the architecture; otherwise, implemented by
5021b68112cSLance Yang  * get_and_clear/modify/set for each pte in the range.
5031b68112cSLance Yang  *
5041b68112cSLance Yang  * Note that PTE bits in the PTE range besides the PFN can differ. For example,
5051b68112cSLance Yang  * some PTEs might be write-protected.
5061b68112cSLance Yang  *
5071b68112cSLance Yang  * Context: The caller holds the page table lock.  The PTEs map consecutive
5081b68112cSLance Yang  * pages that belong to the same folio.  The PTEs are all in the same PMD.
5091b68112cSLance Yang  */
clear_young_dirty_ptes(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,unsigned int nr,cydp_t flags)5101b68112cSLance Yang static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
5111b68112cSLance Yang 					  unsigned long addr, pte_t *ptep,
5121b68112cSLance Yang 					  unsigned int nr, cydp_t flags)
5131b68112cSLance Yang {
5141b68112cSLance Yang 	pte_t pte;
5151b68112cSLance Yang 
5161b68112cSLance Yang 	for (;;) {
5171b68112cSLance Yang 		if (flags == CYDP_CLEAR_YOUNG)
5181b68112cSLance Yang 			ptep_test_and_clear_young(vma, addr, ptep);
5191b68112cSLance Yang 		else {
5201b68112cSLance Yang 			pte = ptep_get_and_clear(vma->vm_mm, addr, ptep);
5211b68112cSLance Yang 			if (flags & CYDP_CLEAR_YOUNG)
5221b68112cSLance Yang 				pte = pte_mkold(pte);
5231b68112cSLance Yang 			if (flags & CYDP_CLEAR_DIRTY)
5241b68112cSLance Yang 				pte = pte_mkclean(pte);
5251b68112cSLance Yang 			set_pte_at(vma->vm_mm, addr, ptep, pte);
5261b68112cSLance Yang 		}
5271b68112cSLance Yang 		if (--nr == 0)
5281b68112cSLance Yang 			break;
5291b68112cSLance Yang 		ptep++;
5301b68112cSLance Yang 		addr += PAGE_SIZE;
5311b68112cSLance Yang 	}
5321b68112cSLance Yang }
5331b68112cSLance Yang #endif
5341b68112cSLance Yang 
ptep_clear(struct mm_struct * mm,unsigned long addr,pte_t * ptep)535de8c8e52STong Tiangen static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
536de8c8e52STong Tiangen 			      pte_t *ptep)
537de8c8e52STong Tiangen {
53820f3ab25SQi Zheng 	pte_t pte = ptep_get(ptep);
53920f3ab25SQi Zheng 
54020f3ab25SQi Zheng 	pte_clear(mm, addr, ptep);
54120f3ab25SQi Zheng 	/*
54220f3ab25SQi Zheng 	 * No need for ptep_get_and_clear(): page table check doesn't care about
54320f3ab25SQi Zheng 	 * any bits that could have been set by HW concurrently.
54420f3ab25SQi Zheng 	 */
54520f3ab25SQi Zheng 	page_table_check_pte_clear(mm, pte);
546de8c8e52STong Tiangen }
547de8c8e52STong Tiangen 
5486ca297d4SPeter Zijlstra #ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
5492a4a06daSPeter Zijlstra /*
55093b3037aSPeter Zijlstra  * For walking the pagetables without holding any locks.  Some architectures
55193b3037aSPeter Zijlstra  * (eg x86-32 PAE) cannot load the entries atomically without using expensive
55293b3037aSPeter Zijlstra  * instructions.  We are guaranteed that a PTE will only either go from not
55393b3037aSPeter Zijlstra  * present to present, or present to not present -- it will not switch to a
55493b3037aSPeter Zijlstra  * completely different present page without a TLB flush inbetween; which we
55593b3037aSPeter Zijlstra  * are blocking by holding interrupts off.
5562a4a06daSPeter Zijlstra  *
5572a4a06daSPeter Zijlstra  * Setting ptes from not present to present goes:
5582a4a06daSPeter Zijlstra  *
5592a4a06daSPeter Zijlstra  *   ptep->pte_high = h;
5602a4a06daSPeter Zijlstra  *   smp_wmb();
5612a4a06daSPeter Zijlstra  *   ptep->pte_low = l;
5622a4a06daSPeter Zijlstra  *
5632a4a06daSPeter Zijlstra  * And present to not present goes:
5642a4a06daSPeter Zijlstra  *
5652a4a06daSPeter Zijlstra  *   ptep->pte_low = 0;
5662a4a06daSPeter Zijlstra  *   smp_wmb();
5672a4a06daSPeter Zijlstra  *   ptep->pte_high = 0;
5682a4a06daSPeter Zijlstra  *
5692a4a06daSPeter Zijlstra  * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
5702a4a06daSPeter Zijlstra  * We load pte_high *after* loading pte_low, which ensures we don't see an older
5712a4a06daSPeter Zijlstra  * value of pte_high.  *Then* we recheck pte_low, which ensures that we haven't
5722a4a06daSPeter Zijlstra  * picked up a changed pte high. We might have gotten rubbish values from
5732a4a06daSPeter Zijlstra  * pte_low and pte_high, but we are guaranteed that pte_low will not have the
5742a4a06daSPeter Zijlstra  * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
5752a4a06daSPeter Zijlstra  * operates on present ptes we're safe.
5762a4a06daSPeter Zijlstra  */
ptep_get_lockless(pte_t * ptep)5772a4a06daSPeter Zijlstra static inline pte_t ptep_get_lockless(pte_t *ptep)
5782a4a06daSPeter Zijlstra {
5792a4a06daSPeter Zijlstra 	pte_t pte;
5802a4a06daSPeter Zijlstra 
5812a4a06daSPeter Zijlstra 	do {
5822a4a06daSPeter Zijlstra 		pte.pte_low = ptep->pte_low;
5832a4a06daSPeter Zijlstra 		smp_rmb();
5842a4a06daSPeter Zijlstra 		pte.pte_high = ptep->pte_high;
5852a4a06daSPeter Zijlstra 		smp_rmb();
5862a4a06daSPeter Zijlstra 	} while (unlikely(pte.pte_low != ptep->pte_low));
5872a4a06daSPeter Zijlstra 
5882a4a06daSPeter Zijlstra 	return pte;
5892a4a06daSPeter Zijlstra }
590024d232aSPeter Zijlstra #define ptep_get_lockless ptep_get_lockless
591024d232aSPeter Zijlstra 
592024d232aSPeter Zijlstra #if CONFIG_PGTABLE_LEVELS > 2
pmdp_get_lockless(pmd_t * pmdp)593024d232aSPeter Zijlstra static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
594024d232aSPeter Zijlstra {
595024d232aSPeter Zijlstra 	pmd_t pmd;
596024d232aSPeter Zijlstra 
597024d232aSPeter Zijlstra 	do {
598024d232aSPeter Zijlstra 		pmd.pmd_low = pmdp->pmd_low;
599024d232aSPeter Zijlstra 		smp_rmb();
600024d232aSPeter Zijlstra 		pmd.pmd_high = pmdp->pmd_high;
601024d232aSPeter Zijlstra 		smp_rmb();
602024d232aSPeter Zijlstra 	} while (unlikely(pmd.pmd_low != pmdp->pmd_low));
603024d232aSPeter Zijlstra 
604024d232aSPeter Zijlstra 	return pmd;
605024d232aSPeter Zijlstra }
606024d232aSPeter Zijlstra #define pmdp_get_lockless pmdp_get_lockless
607146b42e0SHugh Dickins #define pmdp_get_lockless_sync() tlb_remove_table_sync_one()
608024d232aSPeter Zijlstra #endif /* CONFIG_PGTABLE_LEVELS > 2 */
6096ca297d4SPeter Zijlstra #endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
610024d232aSPeter Zijlstra 
6112a4a06daSPeter Zijlstra /*
6122a4a06daSPeter Zijlstra  * We require that the PTE can be read atomically.
6132a4a06daSPeter Zijlstra  */
614024d232aSPeter Zijlstra #ifndef ptep_get_lockless
ptep_get_lockless(pte_t * ptep)6152a4a06daSPeter Zijlstra static inline pte_t ptep_get_lockless(pte_t *ptep)
6162a4a06daSPeter Zijlstra {
6172a4a06daSPeter Zijlstra 	return ptep_get(ptep);
6182a4a06daSPeter Zijlstra }
619024d232aSPeter Zijlstra #endif
620024d232aSPeter Zijlstra 
621024d232aSPeter Zijlstra #ifndef pmdp_get_lockless
pmdp_get_lockless(pmd_t * pmdp)622024d232aSPeter Zijlstra static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
623024d232aSPeter Zijlstra {
624024d232aSPeter Zijlstra 	return pmdp_get(pmdp);
625024d232aSPeter Zijlstra }
pmdp_get_lockless_sync(void)626146b42e0SHugh Dickins static inline void pmdp_get_lockless_sync(void)
627146b42e0SHugh Dickins {
628146b42e0SHugh Dickins }
629024d232aSPeter Zijlstra #endif
6302a4a06daSPeter Zijlstra 
631ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
632ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
pmdp_huge_get_and_clear(struct mm_struct * mm,unsigned long address,pmd_t * pmdp)633ca5999fdSMike Rapoport static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
634ca5999fdSMike Rapoport 					    unsigned long address,
635ca5999fdSMike Rapoport 					    pmd_t *pmdp)
636ca5999fdSMike Rapoport {
637ca5999fdSMike Rapoport 	pmd_t pmd = *pmdp;
638de8c8e52STong Tiangen 
639ca5999fdSMike Rapoport 	pmd_clear(pmdp);
6401831414cSKemeng Shi 	page_table_check_pmd_clear(mm, pmd);
641de8c8e52STong Tiangen 
642ca5999fdSMike Rapoport 	return pmd;
643ca5999fdSMike Rapoport }
644ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
645ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
pudp_huge_get_and_clear(struct mm_struct * mm,unsigned long address,pud_t * pudp)646ca5999fdSMike Rapoport static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
647ca5999fdSMike Rapoport 					    unsigned long address,
648ca5999fdSMike Rapoport 					    pud_t *pudp)
649ca5999fdSMike Rapoport {
650ca5999fdSMike Rapoport 	pud_t pud = *pudp;
651ca5999fdSMike Rapoport 
652ca5999fdSMike Rapoport 	pud_clear(pudp);
653931c38e1SKemeng Shi 	page_table_check_pud_clear(mm, pud);
654de8c8e52STong Tiangen 
655ca5999fdSMike Rapoport 	return pud;
656ca5999fdSMike Rapoport }
657ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
658ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
659ca5999fdSMike Rapoport 
660ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
661ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
pmdp_huge_get_and_clear_full(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp,int full)662ca5999fdSMike Rapoport static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
663ca5999fdSMike Rapoport 					    unsigned long address, pmd_t *pmdp,
664ca5999fdSMike Rapoport 					    int full)
665ca5999fdSMike Rapoport {
666ca5999fdSMike Rapoport 	return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
667ca5999fdSMike Rapoport }
668ca5999fdSMike Rapoport #endif
669ca5999fdSMike Rapoport 
670ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
pudp_huge_get_and_clear_full(struct vm_area_struct * vma,unsigned long address,pud_t * pudp,int full)671f32928abSAneesh Kumar K.V static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
672ca5999fdSMike Rapoport 					    unsigned long address, pud_t *pudp,
673ca5999fdSMike Rapoport 					    int full)
674ca5999fdSMike Rapoport {
675f32928abSAneesh Kumar K.V 	return pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
676ca5999fdSMike Rapoport }
677ca5999fdSMike Rapoport #endif
678ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
679ca5999fdSMike Rapoport 
680ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
ptep_get_and_clear_full(struct mm_struct * mm,unsigned long address,pte_t * ptep,int full)681ca5999fdSMike Rapoport static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
682ca5999fdSMike Rapoport 					    unsigned long address, pte_t *ptep,
683ca5999fdSMike Rapoport 					    int full)
684ca5999fdSMike Rapoport {
685d3a89233Szhang songyi 	return ptep_get_and_clear(mm, address, ptep);
686ca5999fdSMike Rapoport }
687ca5999fdSMike Rapoport #endif
688ca5999fdSMike Rapoport 
68910ebac4fSDavid Hildenbrand #ifndef get_and_clear_full_ptes
69010ebac4fSDavid Hildenbrand /**
69110ebac4fSDavid Hildenbrand  * get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of
69210ebac4fSDavid Hildenbrand  *			     the same folio, collecting dirty/accessed bits.
69310ebac4fSDavid Hildenbrand  * @mm: Address space the pages are mapped into.
69410ebac4fSDavid Hildenbrand  * @addr: Address the first page is mapped at.
69510ebac4fSDavid Hildenbrand  * @ptep: Page table pointer for the first entry.
69610ebac4fSDavid Hildenbrand  * @nr: Number of entries to clear.
69710ebac4fSDavid Hildenbrand  * @full: Whether we are clearing a full mm.
69810ebac4fSDavid Hildenbrand  *
69910ebac4fSDavid Hildenbrand  * May be overridden by the architecture; otherwise, implemented as a simple
70010ebac4fSDavid Hildenbrand  * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the
70110ebac4fSDavid Hildenbrand  * returned PTE.
70210ebac4fSDavid Hildenbrand  *
70310ebac4fSDavid Hildenbrand  * Note that PTE bits in the PTE range besides the PFN can differ. For example,
70410ebac4fSDavid Hildenbrand  * some PTEs might be write-protected.
70510ebac4fSDavid Hildenbrand  *
70610ebac4fSDavid Hildenbrand  * Context: The caller holds the page table lock.  The PTEs map consecutive
70710ebac4fSDavid Hildenbrand  * pages that belong to the same folio.  The PTEs are all in the same PMD.
70810ebac4fSDavid Hildenbrand  */
get_and_clear_full_ptes(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned int nr,int full)70910ebac4fSDavid Hildenbrand static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
71010ebac4fSDavid Hildenbrand 		unsigned long addr, pte_t *ptep, unsigned int nr, int full)
71110ebac4fSDavid Hildenbrand {
71210ebac4fSDavid Hildenbrand 	pte_t pte, tmp_pte;
71310ebac4fSDavid Hildenbrand 
71410ebac4fSDavid Hildenbrand 	pte = ptep_get_and_clear_full(mm, addr, ptep, full);
71510ebac4fSDavid Hildenbrand 	while (--nr) {
71610ebac4fSDavid Hildenbrand 		ptep++;
71710ebac4fSDavid Hildenbrand 		addr += PAGE_SIZE;
71810ebac4fSDavid Hildenbrand 		tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full);
71910ebac4fSDavid Hildenbrand 		if (pte_dirty(tmp_pte))
72010ebac4fSDavid Hildenbrand 			pte = pte_mkdirty(pte);
72110ebac4fSDavid Hildenbrand 		if (pte_young(tmp_pte))
72210ebac4fSDavid Hildenbrand 			pte = pte_mkyoung(pte);
72310ebac4fSDavid Hildenbrand 	}
72410ebac4fSDavid Hildenbrand 	return pte;
72510ebac4fSDavid Hildenbrand }
72610ebac4fSDavid Hildenbrand #endif
72710ebac4fSDavid Hildenbrand 
72810ebac4fSDavid Hildenbrand #ifndef clear_full_ptes
72910ebac4fSDavid Hildenbrand /**
73010ebac4fSDavid Hildenbrand  * clear_full_ptes - Clear present PTEs that map consecutive pages of the same
73110ebac4fSDavid Hildenbrand  *		     folio.
73210ebac4fSDavid Hildenbrand  * @mm: Address space the pages are mapped into.
73310ebac4fSDavid Hildenbrand  * @addr: Address the first page is mapped at.
73410ebac4fSDavid Hildenbrand  * @ptep: Page table pointer for the first entry.
73510ebac4fSDavid Hildenbrand  * @nr: Number of entries to clear.
73610ebac4fSDavid Hildenbrand  * @full: Whether we are clearing a full mm.
73710ebac4fSDavid Hildenbrand  *
73810ebac4fSDavid Hildenbrand  * May be overridden by the architecture; otherwise, implemented as a simple
73910ebac4fSDavid Hildenbrand  * loop over ptep_get_and_clear_full().
74010ebac4fSDavid Hildenbrand  *
74110ebac4fSDavid Hildenbrand  * Note that PTE bits in the PTE range besides the PFN can differ. For example,
74210ebac4fSDavid Hildenbrand  * some PTEs might be write-protected.
74310ebac4fSDavid Hildenbrand  *
74410ebac4fSDavid Hildenbrand  * Context: The caller holds the page table lock.  The PTEs map consecutive
74510ebac4fSDavid Hildenbrand  * pages that belong to the same folio.  The PTEs are all in the same PMD.
74610ebac4fSDavid Hildenbrand  */
clear_full_ptes(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned int nr,int full)74710ebac4fSDavid Hildenbrand static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
74810ebac4fSDavid Hildenbrand 		pte_t *ptep, unsigned int nr, int full)
74910ebac4fSDavid Hildenbrand {
75010ebac4fSDavid Hildenbrand 	for (;;) {
75110ebac4fSDavid Hildenbrand 		ptep_get_and_clear_full(mm, addr, ptep, full);
75210ebac4fSDavid Hildenbrand 		if (--nr == 0)
75310ebac4fSDavid Hildenbrand 			break;
75410ebac4fSDavid Hildenbrand 		ptep++;
75510ebac4fSDavid Hildenbrand 		addr += PAGE_SIZE;
75610ebac4fSDavid Hildenbrand 	}
75710ebac4fSDavid Hildenbrand }
75810ebac4fSDavid Hildenbrand #endif
759ca5999fdSMike Rapoport 
760ca5999fdSMike Rapoport /*
761ca5999fdSMike Rapoport  * If two threads concurrently fault at the same page, the thread that
762ca5999fdSMike Rapoport  * won the race updates the PTE and its local TLB/Cache. The other thread
763ca5999fdSMike Rapoport  * gives up, simply does nothing, and continues; on architectures where
764ca5999fdSMike Rapoport  * software can update TLB,  local TLB can be updated here to avoid next page
765ca5999fdSMike Rapoport  * fault. This function updates TLB only, do nothing with cache or others.
766ca5999fdSMike Rapoport  * It is the difference with function update_mmu_cache.
767ca5999fdSMike Rapoport  */
76823b1b44eSBang Li #ifndef update_mmu_tlb_range
update_mmu_tlb_range(struct vm_area_struct * vma,unsigned long address,pte_t * ptep,unsigned int nr)76923b1b44eSBang Li static inline void update_mmu_tlb_range(struct vm_area_struct *vma,
77023b1b44eSBang Li 				unsigned long address, pte_t *ptep, unsigned int nr)
77123b1b44eSBang Li {
77223b1b44eSBang Li }
77323b1b44eSBang Li #endif
77423b1b44eSBang Li 
update_mmu_tlb(struct vm_area_struct * vma,unsigned long address,pte_t * ptep)775ca5999fdSMike Rapoport static inline void update_mmu_tlb(struct vm_area_struct *vma,
776ca5999fdSMike Rapoport 				unsigned long address, pte_t *ptep)
777ca5999fdSMike Rapoport {
7788f65aa32SBang Li 	update_mmu_tlb_range(vma, address, ptep, 1);
779ca5999fdSMike Rapoport }
780ca5999fdSMike Rapoport 
781ca5999fdSMike Rapoport /*
782ca5999fdSMike Rapoport  * Some architectures may be able to avoid expensive synchronization
783ca5999fdSMike Rapoport  * primitives when modifications are made to PTE's which are already
784ca5999fdSMike Rapoport  * not present, or in the process of an address space destruction.
785ca5999fdSMike Rapoport  */
786ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
pte_clear_not_present_full(struct mm_struct * mm,unsigned long address,pte_t * ptep,int full)787ca5999fdSMike Rapoport static inline void pte_clear_not_present_full(struct mm_struct *mm,
788ca5999fdSMike Rapoport 					      unsigned long address,
789ca5999fdSMike Rapoport 					      pte_t *ptep,
790ca5999fdSMike Rapoport 					      int full)
791ca5999fdSMike Rapoport {
792ca5999fdSMike Rapoport 	pte_clear(mm, address, ptep);
793ca5999fdSMike Rapoport }
794ca5999fdSMike Rapoport #endif
795ca5999fdSMike Rapoport 
796a62fb92aSRyan Roberts #ifndef clear_not_present_full_ptes
797a62fb92aSRyan Roberts /**
798a62fb92aSRyan Roberts  * clear_not_present_full_ptes - Clear multiple not present PTEs which are
799a62fb92aSRyan Roberts  *				 consecutive in the pgtable.
800a62fb92aSRyan Roberts  * @mm: Address space the ptes represent.
801a62fb92aSRyan Roberts  * @addr: Address of the first pte.
802a62fb92aSRyan Roberts  * @ptep: Page table pointer for the first entry.
803a62fb92aSRyan Roberts  * @nr: Number of entries to clear.
804a62fb92aSRyan Roberts  * @full: Whether we are clearing a full mm.
805a62fb92aSRyan Roberts  *
806a62fb92aSRyan Roberts  * May be overridden by the architecture; otherwise, implemented as a simple
807a62fb92aSRyan Roberts  * loop over pte_clear_not_present_full().
808a62fb92aSRyan Roberts  *
809a62fb92aSRyan Roberts  * Context: The caller holds the page table lock.  The PTEs are all not present.
810a62fb92aSRyan Roberts  * The PTEs are all in the same PMD.
811a62fb92aSRyan Roberts  */
clear_not_present_full_ptes(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned int nr,int full)812a62fb92aSRyan Roberts static inline void clear_not_present_full_ptes(struct mm_struct *mm,
813a62fb92aSRyan Roberts 		unsigned long addr, pte_t *ptep, unsigned int nr, int full)
814a62fb92aSRyan Roberts {
815a62fb92aSRyan Roberts 	for (;;) {
816a62fb92aSRyan Roberts 		pte_clear_not_present_full(mm, addr, ptep, full);
817a62fb92aSRyan Roberts 		if (--nr == 0)
818a62fb92aSRyan Roberts 			break;
819a62fb92aSRyan Roberts 		ptep++;
820a62fb92aSRyan Roberts 		addr += PAGE_SIZE;
821a62fb92aSRyan Roberts 	}
822a62fb92aSRyan Roberts }
823a62fb92aSRyan Roberts #endif
824a62fb92aSRyan Roberts 
825ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
826ca5999fdSMike Rapoport extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
827ca5999fdSMike Rapoport 			      unsigned long address,
828ca5999fdSMike Rapoport 			      pte_t *ptep);
829ca5999fdSMike Rapoport #endif
830ca5999fdSMike Rapoport 
831ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
832ca5999fdSMike Rapoport extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
833ca5999fdSMike Rapoport 			      unsigned long address,
834ca5999fdSMike Rapoport 			      pmd_t *pmdp);
835ca5999fdSMike Rapoport extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
836ca5999fdSMike Rapoport 			      unsigned long address,
837ca5999fdSMike Rapoport 			      pud_t *pudp);
838ca5999fdSMike Rapoport #endif
839ca5999fdSMike Rapoport 
8402f0584f3SRick Edgecombe #ifndef pte_mkwrite
pte_mkwrite(pte_t pte,struct vm_area_struct * vma)841161e393cSRick Edgecombe static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
8422f0584f3SRick Edgecombe {
8432f0584f3SRick Edgecombe 	return pte_mkwrite_novma(pte);
8442f0584f3SRick Edgecombe }
8452f0584f3SRick Edgecombe #endif
8462f0584f3SRick Edgecombe 
8472f0584f3SRick Edgecombe #if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
pmd_mkwrite(pmd_t pmd,struct vm_area_struct * vma)848161e393cSRick Edgecombe static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
8492f0584f3SRick Edgecombe {
8502f0584f3SRick Edgecombe 	return pmd_mkwrite_novma(pmd);
8512f0584f3SRick Edgecombe }
8522f0584f3SRick Edgecombe #endif
8532f0584f3SRick Edgecombe 
854ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
855ca5999fdSMike Rapoport struct mm_struct;
ptep_set_wrprotect(struct mm_struct * mm,unsigned long address,pte_t * ptep)856ca5999fdSMike Rapoport static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
857ca5999fdSMike Rapoport {
858c33c7948SRyan Roberts 	pte_t old_pte = ptep_get(ptep);
859ca5999fdSMike Rapoport 	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
860ca5999fdSMike Rapoport }
861ca5999fdSMike Rapoport #endif
862ca5999fdSMike Rapoport 
863f8d93776SDavid Hildenbrand #ifndef wrprotect_ptes
864f8d93776SDavid Hildenbrand /**
865f8d93776SDavid Hildenbrand  * wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same
866f8d93776SDavid Hildenbrand  *		    folio.
867f8d93776SDavid Hildenbrand  * @mm: Address space the pages are mapped into.
868f8d93776SDavid Hildenbrand  * @addr: Address the first page is mapped at.
869f8d93776SDavid Hildenbrand  * @ptep: Page table pointer for the first entry.
870f8d93776SDavid Hildenbrand  * @nr: Number of entries to write-protect.
871f8d93776SDavid Hildenbrand  *
872f8d93776SDavid Hildenbrand  * May be overridden by the architecture; otherwise, implemented as a simple
873f8d93776SDavid Hildenbrand  * loop over ptep_set_wrprotect().
874f8d93776SDavid Hildenbrand  *
875f8d93776SDavid Hildenbrand  * Note that PTE bits in the PTE range besides the PFN can differ. For example,
876f8d93776SDavid Hildenbrand  * some PTEs might be write-protected.
877f8d93776SDavid Hildenbrand  *
878f8d93776SDavid Hildenbrand  * Context: The caller holds the page table lock.  The PTEs map consecutive
879f8d93776SDavid Hildenbrand  * pages that belong to the same folio.  The PTEs are all in the same PMD.
880f8d93776SDavid Hildenbrand  */
wrprotect_ptes(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned int nr)881f8d93776SDavid Hildenbrand static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
882f8d93776SDavid Hildenbrand 		pte_t *ptep, unsigned int nr)
883f8d93776SDavid Hildenbrand {
884f8d93776SDavid Hildenbrand 	for (;;) {
885f8d93776SDavid Hildenbrand 		ptep_set_wrprotect(mm, addr, ptep);
886f8d93776SDavid Hildenbrand 		if (--nr == 0)
887f8d93776SDavid Hildenbrand 			break;
888f8d93776SDavid Hildenbrand 		ptep++;
889f8d93776SDavid Hildenbrand 		addr += PAGE_SIZE;
890f8d93776SDavid Hildenbrand 	}
891f8d93776SDavid Hildenbrand }
892f8d93776SDavid Hildenbrand #endif
893f8d93776SDavid Hildenbrand 
894ca5999fdSMike Rapoport /*
895ca5999fdSMike Rapoport  * On some architectures hardware does not set page access bit when accessing
8962eb70aabSBhaskar Chowdhury  * memory page, it is responsibility of software setting this bit. It brings
897ca5999fdSMike Rapoport  * out extra page fault penalty to track page access bit. For optimization page
898ca5999fdSMike Rapoport  * access bit can be set during all page fault flow on these arches.
899ca5999fdSMike Rapoport  * To be differentiate with macro pte_mkyoung, this macro is used on platforms
900ca5999fdSMike Rapoport  * where software maintains page access bit.
901ca5999fdSMike Rapoport  */
90250c25ee9SThomas Bogendoerfer #ifndef pte_sw_mkyoung
pte_sw_mkyoung(pte_t pte)90350c25ee9SThomas Bogendoerfer static inline pte_t pte_sw_mkyoung(pte_t pte)
90450c25ee9SThomas Bogendoerfer {
90550c25ee9SThomas Bogendoerfer 	return pte;
90650c25ee9SThomas Bogendoerfer }
90750c25ee9SThomas Bogendoerfer #define pte_sw_mkyoung	pte_sw_mkyoung
90850c25ee9SThomas Bogendoerfer #endif
90950c25ee9SThomas Bogendoerfer 
910ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
911ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
pmdp_set_wrprotect(struct mm_struct * mm,unsigned long address,pmd_t * pmdp)912ca5999fdSMike Rapoport static inline void pmdp_set_wrprotect(struct mm_struct *mm,
913ca5999fdSMike Rapoport 				      unsigned long address, pmd_t *pmdp)
914ca5999fdSMike Rapoport {
915ca5999fdSMike Rapoport 	pmd_t old_pmd = *pmdp;
916ca5999fdSMike Rapoport 	set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
917ca5999fdSMike Rapoport }
918ca5999fdSMike Rapoport #else
pmdp_set_wrprotect(struct mm_struct * mm,unsigned long address,pmd_t * pmdp)919ca5999fdSMike Rapoport static inline void pmdp_set_wrprotect(struct mm_struct *mm,
920ca5999fdSMike Rapoport 				      unsigned long address, pmd_t *pmdp)
921ca5999fdSMike Rapoport {
922ca5999fdSMike Rapoport 	BUILD_BUG();
923ca5999fdSMike Rapoport }
924ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
925ca5999fdSMike Rapoport #endif
926ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
927ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
92854a948a1SAneesh Kumar K.V #ifdef CONFIG_TRANSPARENT_HUGEPAGE
pudp_set_wrprotect(struct mm_struct * mm,unsigned long address,pud_t * pudp)929ca5999fdSMike Rapoport static inline void pudp_set_wrprotect(struct mm_struct *mm,
930ca5999fdSMike Rapoport 				      unsigned long address, pud_t *pudp)
931ca5999fdSMike Rapoport {
932ca5999fdSMike Rapoport 	pud_t old_pud = *pudp;
933ca5999fdSMike Rapoport 
934ca5999fdSMike Rapoport 	set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
935ca5999fdSMike Rapoport }
936ca5999fdSMike Rapoport #else
pudp_set_wrprotect(struct mm_struct * mm,unsigned long address,pud_t * pudp)937ca5999fdSMike Rapoport static inline void pudp_set_wrprotect(struct mm_struct *mm,
938ca5999fdSMike Rapoport 				      unsigned long address, pud_t *pudp)
939ca5999fdSMike Rapoport {
940ca5999fdSMike Rapoport 	BUILD_BUG();
941ca5999fdSMike Rapoport }
94254a948a1SAneesh Kumar K.V #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
943ca5999fdSMike Rapoport #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
944ca5999fdSMike Rapoport #endif
945ca5999fdSMike Rapoport 
946ca5999fdSMike Rapoport #ifndef pmdp_collapse_flush
947ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
948ca5999fdSMike Rapoport extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
949ca5999fdSMike Rapoport 				 unsigned long address, pmd_t *pmdp);
950ca5999fdSMike Rapoport #else
pmdp_collapse_flush(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp)951ca5999fdSMike Rapoport static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
952ca5999fdSMike Rapoport 					unsigned long address,
953ca5999fdSMike Rapoport 					pmd_t *pmdp)
954ca5999fdSMike Rapoport {
955ca5999fdSMike Rapoport 	BUILD_BUG();
956ca5999fdSMike Rapoport 	return *pmdp;
957ca5999fdSMike Rapoport }
958ca5999fdSMike Rapoport #define pmdp_collapse_flush pmdp_collapse_flush
959ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
960ca5999fdSMike Rapoport #endif
961ca5999fdSMike Rapoport 
962ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
963ca5999fdSMike Rapoport extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
964ca5999fdSMike Rapoport 				       pgtable_t pgtable);
965ca5999fdSMike Rapoport #endif
966ca5999fdSMike Rapoport 
967ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
968ca5999fdSMike Rapoport extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
969ca5999fdSMike Rapoport #endif
970ca5999fdSMike Rapoport 
971feda5c39SHugh Dickins #ifndef arch_needs_pgtable_deposit
972feda5c39SHugh Dickins #define arch_needs_pgtable_deposit() (false)
973feda5c39SHugh Dickins #endif
974feda5c39SHugh Dickins 
975ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
976ca5999fdSMike Rapoport /*
977ca5999fdSMike Rapoport  * This is an implementation of pmdp_establish() that is only suitable for an
978ca5999fdSMike Rapoport  * architecture that doesn't have hardware dirty/accessed bits. In this case we
9792eb70aabSBhaskar Chowdhury  * can't race with CPU which sets these bits and non-atomic approach is fine.
980ca5999fdSMike Rapoport  */
generic_pmdp_establish(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp,pmd_t pmd)981ca5999fdSMike Rapoport static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
982ca5999fdSMike Rapoport 		unsigned long address, pmd_t *pmdp, pmd_t pmd)
983ca5999fdSMike Rapoport {
984ca5999fdSMike Rapoport 	pmd_t old_pmd = *pmdp;
985ca5999fdSMike Rapoport 	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
986ca5999fdSMike Rapoport 	return old_pmd;
987ca5999fdSMike Rapoport }
988ca5999fdSMike Rapoport #endif
989ca5999fdSMike Rapoport 
990ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMDP_INVALIDATE
991ca5999fdSMike Rapoport extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
992ca5999fdSMike Rapoport 			    pmd_t *pmdp);
993ca5999fdSMike Rapoport #endif
994ca5999fdSMike Rapoport 
9954f831457SNadav Amit #ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
9964f831457SNadav Amit 
9974f831457SNadav Amit /*
9984f831457SNadav Amit  * pmdp_invalidate_ad() invalidates the PMD while changing a transparent
9994f831457SNadav Amit  * hugepage mapping in the page tables. This function is similar to
10004f831457SNadav Amit  * pmdp_invalidate(), but should only be used if the access and dirty bits would
10014f831457SNadav Amit  * not be cleared by the software in the new PMD value. The function ensures
10024f831457SNadav Amit  * that hardware changes of the access and dirty bits updates would not be lost.
10034f831457SNadav Amit  *
10044f831457SNadav Amit  * Doing so can allow in certain architectures to avoid a TLB flush in most
10054f831457SNadav Amit  * cases. Yet, another TLB flush might be necessary later if the PMD update
10064f831457SNadav Amit  * itself requires such flush (e.g., if protection was set to be stricter). Yet,
10074f831457SNadav Amit  * even when a TLB flush is needed because of the update, the caller may be able
10084f831457SNadav Amit  * to batch these TLB flushing operations, so fewer TLB flush operations are
10094f831457SNadav Amit  * needed.
10104f831457SNadav Amit  */
10114f831457SNadav Amit extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
10124f831457SNadav Amit 				unsigned long address, pmd_t *pmdp);
10134f831457SNadav Amit #endif
10144f831457SNadav Amit 
1015ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_SAME
pte_same(pte_t pte_a,pte_t pte_b)1016ca5999fdSMike Rapoport static inline int pte_same(pte_t pte_a, pte_t pte_b)
1017ca5999fdSMike Rapoport {
1018ca5999fdSMike Rapoport 	return pte_val(pte_a) == pte_val(pte_b);
1019ca5999fdSMike Rapoport }
1020ca5999fdSMike Rapoport #endif
1021ca5999fdSMike Rapoport 
1022ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTE_UNUSED
1023ca5999fdSMike Rapoport /*
1024ca5999fdSMike Rapoport  * Some architectures provide facilities to virtualization guests
1025ca5999fdSMike Rapoport  * so that they can flag allocated pages as unused. This allows the
1026ca5999fdSMike Rapoport  * host to transparently reclaim unused pages. This function returns
1027ca5999fdSMike Rapoport  * whether the pte's page is unused.
1028ca5999fdSMike Rapoport  */
pte_unused(pte_t pte)1029ca5999fdSMike Rapoport static inline int pte_unused(pte_t pte)
1030ca5999fdSMike Rapoport {
1031ca5999fdSMike Rapoport 	return 0;
1032ca5999fdSMike Rapoport }
1033ca5999fdSMike Rapoport #endif
1034ca5999fdSMike Rapoport 
1035ca5999fdSMike Rapoport #ifndef pte_access_permitted
1036ca5999fdSMike Rapoport #define pte_access_permitted(pte, write) \
1037ca5999fdSMike Rapoport 	(pte_present(pte) && (!(write) || pte_write(pte)))
1038ca5999fdSMike Rapoport #endif
1039ca5999fdSMike Rapoport 
1040ca5999fdSMike Rapoport #ifndef pmd_access_permitted
1041ca5999fdSMike Rapoport #define pmd_access_permitted(pmd, write) \
1042ca5999fdSMike Rapoport 	(pmd_present(pmd) && (!(write) || pmd_write(pmd)))
1043ca5999fdSMike Rapoport #endif
1044ca5999fdSMike Rapoport 
1045ca5999fdSMike Rapoport #ifndef pud_access_permitted
1046ca5999fdSMike Rapoport #define pud_access_permitted(pud, write) \
1047ca5999fdSMike Rapoport 	(pud_present(pud) && (!(write) || pud_write(pud)))
1048ca5999fdSMike Rapoport #endif
1049ca5999fdSMike Rapoport 
1050ca5999fdSMike Rapoport #ifndef p4d_access_permitted
1051ca5999fdSMike Rapoport #define p4d_access_permitted(p4d, write) \
1052ca5999fdSMike Rapoport 	(p4d_present(p4d) && (!(write) || p4d_write(p4d)))
1053ca5999fdSMike Rapoport #endif
1054ca5999fdSMike Rapoport 
1055ca5999fdSMike Rapoport #ifndef pgd_access_permitted
1056ca5999fdSMike Rapoport #define pgd_access_permitted(pgd, write) \
1057ca5999fdSMike Rapoport 	(pgd_present(pgd) && (!(write) || pgd_write(pgd)))
1058ca5999fdSMike Rapoport #endif
1059ca5999fdSMike Rapoport 
1060ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PMD_SAME
pmd_same(pmd_t pmd_a,pmd_t pmd_b)1061ca5999fdSMike Rapoport static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
1062ca5999fdSMike Rapoport {
1063ca5999fdSMike Rapoport 	return pmd_val(pmd_a) == pmd_val(pmd_b);
1064ca5999fdSMike Rapoport }
1065973bf680SAneesh Kumar K.V #endif
1066ca5999fdSMike Rapoport 
1067973bf680SAneesh Kumar K.V #ifndef pud_same
pud_same(pud_t pud_a,pud_t pud_b)1068ca5999fdSMike Rapoport static inline int pud_same(pud_t pud_a, pud_t pud_b)
1069ca5999fdSMike Rapoport {
1070ca5999fdSMike Rapoport 	return pud_val(pud_a) == pud_val(pud_b);
1071ca5999fdSMike Rapoport }
1072973bf680SAneesh Kumar K.V #define pud_same pud_same
1073ca5999fdSMike Rapoport #endif
1074ca5999fdSMike Rapoport 
1075ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_P4D_SAME
p4d_same(p4d_t p4d_a,p4d_t p4d_b)1076ca5999fdSMike Rapoport static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b)
1077ca5999fdSMike Rapoport {
1078ca5999fdSMike Rapoport 	return p4d_val(p4d_a) == p4d_val(p4d_b);
1079ca5999fdSMike Rapoport }
1080ca5999fdSMike Rapoport #endif
1081ca5999fdSMike Rapoport 
1082ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGD_SAME
pgd_same(pgd_t pgd_a,pgd_t pgd_b)1083ca5999fdSMike Rapoport static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
1084ca5999fdSMike Rapoport {
1085ca5999fdSMike Rapoport 	return pgd_val(pgd_a) == pgd_val(pgd_b);
1086ca5999fdSMike Rapoport }
1087ca5999fdSMike Rapoport #endif
1088ca5999fdSMike Rapoport 
1089ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_DO_SWAP_PAGE
arch_do_swap_page_nr(struct mm_struct * mm,struct vm_area_struct * vma,unsigned long addr,pte_t pte,pte_t oldpte,int nr)109029f252cdSBarry Song static inline void arch_do_swap_page_nr(struct mm_struct *mm,
109129f252cdSBarry Song 				     struct vm_area_struct *vma,
109229f252cdSBarry Song 				     unsigned long addr,
109329f252cdSBarry Song 				     pte_t pte, pte_t oldpte,
109429f252cdSBarry Song 				     int nr)
109529f252cdSBarry Song {
109629f252cdSBarry Song 
109729f252cdSBarry Song }
109829f252cdSBarry Song #else
1099ca5999fdSMike Rapoport /*
1100ca5999fdSMike Rapoport  * Some architectures support metadata associated with a page. When a
1101ca5999fdSMike Rapoport  * page is being swapped out, this metadata must be saved so it can be
1102ca5999fdSMike Rapoport  * restored when the page is swapped back in. SPARC M7 and newer
1103ca5999fdSMike Rapoport  * processors support an ADI (Application Data Integrity) tag for the
1104ca5999fdSMike Rapoport  * page as metadata for the page. arch_do_swap_page() can restore this
1105ca5999fdSMike Rapoport  * metadata when a page is swapped back in.
1106ca5999fdSMike Rapoport  */
arch_do_swap_page_nr(struct mm_struct * mm,struct vm_area_struct * vma,unsigned long addr,pte_t pte,pte_t oldpte,int nr)110729f252cdSBarry Song static inline void arch_do_swap_page_nr(struct mm_struct *mm,
1108ca5999fdSMike Rapoport 					struct vm_area_struct *vma,
1109ca5999fdSMike Rapoport 					unsigned long addr,
111029f252cdSBarry Song 					pte_t pte, pte_t oldpte,
111129f252cdSBarry Song 					int nr)
1112ca5999fdSMike Rapoport {
111329f252cdSBarry Song 	for (int i = 0; i < nr; i++) {
111429f252cdSBarry Song 		arch_do_swap_page(vma->vm_mm, vma, addr + i * PAGE_SIZE,
111529f252cdSBarry Song 				pte_advance_pfn(pte, i),
111629f252cdSBarry Song 				pte_advance_pfn(oldpte, i));
111729f252cdSBarry Song 	}
1118ca5999fdSMike Rapoport }
1119ca5999fdSMike Rapoport #endif
1120ca5999fdSMike Rapoport 
1121ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_UNMAP_ONE
1122ca5999fdSMike Rapoport /*
1123ca5999fdSMike Rapoport  * Some architectures support metadata associated with a page. When a
1124ca5999fdSMike Rapoport  * page is being swapped out, this metadata must be saved so it can be
1125ca5999fdSMike Rapoport  * restored when the page is swapped back in. SPARC M7 and newer
1126ca5999fdSMike Rapoport  * processors support an ADI (Application Data Integrity) tag for the
1127ca5999fdSMike Rapoport  * page as metadata for the page. arch_unmap_one() can save this
1128ca5999fdSMike Rapoport  * metadata on a swap-out of a page.
1129ca5999fdSMike Rapoport  */
arch_unmap_one(struct mm_struct * mm,struct vm_area_struct * vma,unsigned long addr,pte_t orig_pte)1130ca5999fdSMike Rapoport static inline int arch_unmap_one(struct mm_struct *mm,
1131ca5999fdSMike Rapoport 				  struct vm_area_struct *vma,
1132ca5999fdSMike Rapoport 				  unsigned long addr,
1133ca5999fdSMike Rapoport 				  pte_t orig_pte)
1134ca5999fdSMike Rapoport {
1135ca5999fdSMike Rapoport 	return 0;
1136ca5999fdSMike Rapoport }
1137ca5999fdSMike Rapoport #endif
1138ca5999fdSMike Rapoport 
11398a84802eSSteven Price /*
11408a84802eSSteven Price  * Allow architectures to preserve additional metadata associated with
11418a84802eSSteven Price  * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function
11428a84802eSSteven Price  * prototypes must be defined in the arch-specific asm/pgtable.h file.
11438a84802eSSteven Price  */
11448a84802eSSteven Price #ifndef __HAVE_ARCH_PREPARE_TO_SWAP
arch_prepare_to_swap(struct folio * folio)1145f238b8c3SBarry Song static inline int arch_prepare_to_swap(struct folio *folio)
11468a84802eSSteven Price {
11478a84802eSSteven Price 	return 0;
11488a84802eSSteven Price }
11498a84802eSSteven Price #endif
11508a84802eSSteven Price 
11518a84802eSSteven Price #ifndef __HAVE_ARCH_SWAP_INVALIDATE
arch_swap_invalidate_page(int type,pgoff_t offset)11528a84802eSSteven Price static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
11538a84802eSSteven Price {
11548a84802eSSteven Price }
11558a84802eSSteven Price 
arch_swap_invalidate_area(int type)11568a84802eSSteven Price static inline void arch_swap_invalidate_area(int type)
11578a84802eSSteven Price {
11588a84802eSSteven Price }
11598a84802eSSteven Price #endif
11608a84802eSSteven Price 
11618a84802eSSteven Price #ifndef __HAVE_ARCH_SWAP_RESTORE
arch_swap_restore(swp_entry_t entry,struct folio * folio)1162da08e9b7SMatthew Wilcox (Oracle) static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
11638a84802eSSteven Price {
11648a84802eSSteven Price }
11658a84802eSSteven Price #endif
11668a84802eSSteven Price 
1167ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
1168ca5999fdSMike Rapoport #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
1169ca5999fdSMike Rapoport #endif
1170ca5999fdSMike Rapoport 
1171ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_MOVE_PTE
117282a616d0SDavid Hildenbrand #define move_pte(pte, old_addr, new_addr)	(pte)
1173ca5999fdSMike Rapoport #endif
1174ca5999fdSMike Rapoport 
1175ca5999fdSMike Rapoport #ifndef pte_accessible
1176ca5999fdSMike Rapoport # define pte_accessible(mm, pte)	((void)(pte), 1)
1177ca5999fdSMike Rapoport #endif
1178ca5999fdSMike Rapoport 
1179ca5999fdSMike Rapoport #ifndef flush_tlb_fix_spurious_fault
118099c29133SGerald Schaefer #define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address)
1181ca5999fdSMike Rapoport #endif
1182ca5999fdSMike Rapoport 
1183ca5999fdSMike Rapoport /*
1184ca5999fdSMike Rapoport  * When walking page tables, get the address of the next boundary,
1185ca5999fdSMike Rapoport  * or the end address of the range if that comes earlier.  Although no
1186ca5999fdSMike Rapoport  * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
1187ca5999fdSMike Rapoport  */
1188ca5999fdSMike Rapoport 
1189ca5999fdSMike Rapoport #define pgd_addr_end(addr, end)						\
1190ca5999fdSMike Rapoport ({	unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;	\
1191ca5999fdSMike Rapoport 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
1192ca5999fdSMike Rapoport })
1193ca5999fdSMike Rapoport 
1194ca5999fdSMike Rapoport #ifndef p4d_addr_end
1195ca5999fdSMike Rapoport #define p4d_addr_end(addr, end)						\
1196ca5999fdSMike Rapoport ({	unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK;	\
1197ca5999fdSMike Rapoport 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
1198ca5999fdSMike Rapoport })
1199ca5999fdSMike Rapoport #endif
1200ca5999fdSMike Rapoport 
1201ca5999fdSMike Rapoport #ifndef pud_addr_end
1202ca5999fdSMike Rapoport #define pud_addr_end(addr, end)						\
1203ca5999fdSMike Rapoport ({	unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;	\
1204ca5999fdSMike Rapoport 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
1205ca5999fdSMike Rapoport })
1206ca5999fdSMike Rapoport #endif
1207ca5999fdSMike Rapoport 
1208ca5999fdSMike Rapoport #ifndef pmd_addr_end
1209ca5999fdSMike Rapoport #define pmd_addr_end(addr, end)						\
1210ca5999fdSMike Rapoport ({	unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;	\
1211ca5999fdSMike Rapoport 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
1212ca5999fdSMike Rapoport })
1213ca5999fdSMike Rapoport #endif
1214ca5999fdSMike Rapoport 
1215ca5999fdSMike Rapoport /*
1216ca5999fdSMike Rapoport  * When walking page tables, we usually want to skip any p?d_none entries;
1217ca5999fdSMike Rapoport  * and any p?d_bad entries - reporting the error before resetting to none.
1218ca5999fdSMike Rapoport  * Do the tests inline, but report and clear the bad entry in mm/memory.c.
1219ca5999fdSMike Rapoport  */
1220ca5999fdSMike Rapoport void pgd_clear_bad(pgd_t *);
1221ca5999fdSMike Rapoport 
1222ca5999fdSMike Rapoport #ifndef __PAGETABLE_P4D_FOLDED
1223ca5999fdSMike Rapoport void p4d_clear_bad(p4d_t *);
1224ca5999fdSMike Rapoport #else
1225ca5999fdSMike Rapoport #define p4d_clear_bad(p4d)        do { } while (0)
1226ca5999fdSMike Rapoport #endif
1227ca5999fdSMike Rapoport 
1228ca5999fdSMike Rapoport #ifndef __PAGETABLE_PUD_FOLDED
1229ca5999fdSMike Rapoport void pud_clear_bad(pud_t *);
1230ca5999fdSMike Rapoport #else
1231ca5999fdSMike Rapoport #define pud_clear_bad(p4d)        do { } while (0)
1232ca5999fdSMike Rapoport #endif
1233ca5999fdSMike Rapoport 
1234ca5999fdSMike Rapoport void pmd_clear_bad(pmd_t *);
1235ca5999fdSMike Rapoport 
pgd_none_or_clear_bad(pgd_t * pgd)1236ca5999fdSMike Rapoport static inline int pgd_none_or_clear_bad(pgd_t *pgd)
1237ca5999fdSMike Rapoport {
1238ca5999fdSMike Rapoport 	if (pgd_none(*pgd))
1239ca5999fdSMike Rapoport 		return 1;
1240ca5999fdSMike Rapoport 	if (unlikely(pgd_bad(*pgd))) {
1241ca5999fdSMike Rapoport 		pgd_clear_bad(pgd);
1242ca5999fdSMike Rapoport 		return 1;
1243ca5999fdSMike Rapoport 	}
1244ca5999fdSMike Rapoport 	return 0;
1245ca5999fdSMike Rapoport }
1246ca5999fdSMike Rapoport 
p4d_none_or_clear_bad(p4d_t * p4d)1247ca5999fdSMike Rapoport static inline int p4d_none_or_clear_bad(p4d_t *p4d)
1248ca5999fdSMike Rapoport {
1249ca5999fdSMike Rapoport 	if (p4d_none(*p4d))
1250ca5999fdSMike Rapoport 		return 1;
1251ca5999fdSMike Rapoport 	if (unlikely(p4d_bad(*p4d))) {
1252ca5999fdSMike Rapoport 		p4d_clear_bad(p4d);
1253ca5999fdSMike Rapoport 		return 1;
1254ca5999fdSMike Rapoport 	}
1255ca5999fdSMike Rapoport 	return 0;
1256ca5999fdSMike Rapoport }
1257ca5999fdSMike Rapoport 
pud_none_or_clear_bad(pud_t * pud)1258ca5999fdSMike Rapoport static inline int pud_none_or_clear_bad(pud_t *pud)
1259ca5999fdSMike Rapoport {
1260ca5999fdSMike Rapoport 	if (pud_none(*pud))
1261ca5999fdSMike Rapoport 		return 1;
1262ca5999fdSMike Rapoport 	if (unlikely(pud_bad(*pud))) {
1263ca5999fdSMike Rapoport 		pud_clear_bad(pud);
1264ca5999fdSMike Rapoport 		return 1;
1265ca5999fdSMike Rapoport 	}
1266ca5999fdSMike Rapoport 	return 0;
1267ca5999fdSMike Rapoport }
1268ca5999fdSMike Rapoport 
pmd_none_or_clear_bad(pmd_t * pmd)1269ca5999fdSMike Rapoport static inline int pmd_none_or_clear_bad(pmd_t *pmd)
1270ca5999fdSMike Rapoport {
1271ca5999fdSMike Rapoport 	if (pmd_none(*pmd))
1272ca5999fdSMike Rapoport 		return 1;
1273ca5999fdSMike Rapoport 	if (unlikely(pmd_bad(*pmd))) {
1274ca5999fdSMike Rapoport 		pmd_clear_bad(pmd);
1275ca5999fdSMike Rapoport 		return 1;
1276ca5999fdSMike Rapoport 	}
1277ca5999fdSMike Rapoport 	return 0;
1278ca5999fdSMike Rapoport }
1279ca5999fdSMike Rapoport 
__ptep_modify_prot_start(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep)1280ca5999fdSMike Rapoport static inline pte_t __ptep_modify_prot_start(struct vm_area_struct *vma,
1281ca5999fdSMike Rapoport 					     unsigned long addr,
1282ca5999fdSMike Rapoport 					     pte_t *ptep)
1283ca5999fdSMike Rapoport {
1284ca5999fdSMike Rapoport 	/*
1285ca5999fdSMike Rapoport 	 * Get the current pte state, but zero it out to make it
1286ca5999fdSMike Rapoport 	 * non-present, preventing the hardware from asynchronously
1287ca5999fdSMike Rapoport 	 * updating it.
1288ca5999fdSMike Rapoport 	 */
1289ca5999fdSMike Rapoport 	return ptep_get_and_clear(vma->vm_mm, addr, ptep);
1290ca5999fdSMike Rapoport }
1291ca5999fdSMike Rapoport 
__ptep_modify_prot_commit(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,pte_t pte)1292ca5999fdSMike Rapoport static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma,
1293ca5999fdSMike Rapoport 					     unsigned long addr,
1294ca5999fdSMike Rapoport 					     pte_t *ptep, pte_t pte)
1295ca5999fdSMike Rapoport {
1296ca5999fdSMike Rapoport 	/*
1297ca5999fdSMike Rapoport 	 * The pte is non-present, so there's no hardware state to
1298ca5999fdSMike Rapoport 	 * preserve.
1299ca5999fdSMike Rapoport 	 */
1300ca5999fdSMike Rapoport 	set_pte_at(vma->vm_mm, addr, ptep, pte);
1301ca5999fdSMike Rapoport }
1302ca5999fdSMike Rapoport 
1303ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1304ca5999fdSMike Rapoport /*
1305ca5999fdSMike Rapoport  * Start a pte protection read-modify-write transaction, which
1306ca5999fdSMike Rapoport  * protects against asynchronous hardware modifications to the pte.
1307ca5999fdSMike Rapoport  * The intention is not to prevent the hardware from making pte
1308ca5999fdSMike Rapoport  * updates, but to prevent any updates it may make from being lost.
1309ca5999fdSMike Rapoport  *
1310ca5999fdSMike Rapoport  * This does not protect against other software modifications of the
13112eb70aabSBhaskar Chowdhury  * pte; the appropriate pte lock must be held over the transaction.
1312ca5999fdSMike Rapoport  *
1313ca5999fdSMike Rapoport  * Note that this interface is intended to be batchable, meaning that
1314ca5999fdSMike Rapoport  * ptep_modify_prot_commit may not actually update the pte, but merely
1315ca5999fdSMike Rapoport  * queue the update to be done at some later time.  The update must be
1316ca5999fdSMike Rapoport  * actually committed before the pte lock is released, however.
1317ca5999fdSMike Rapoport  */
ptep_modify_prot_start(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep)1318ca5999fdSMike Rapoport static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
1319ca5999fdSMike Rapoport 					   unsigned long addr,
1320ca5999fdSMike Rapoport 					   pte_t *ptep)
1321ca5999fdSMike Rapoport {
1322ca5999fdSMike Rapoport 	return __ptep_modify_prot_start(vma, addr, ptep);
1323ca5999fdSMike Rapoport }
1324ca5999fdSMike Rapoport 
1325ca5999fdSMike Rapoport /*
1326ca5999fdSMike Rapoport  * Commit an update to a pte, leaving any hardware-controlled bits in
1327ca5999fdSMike Rapoport  * the PTE unmodified.
1328ca5999fdSMike Rapoport  */
ptep_modify_prot_commit(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,pte_t old_pte,pte_t pte)1329ca5999fdSMike Rapoport static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
1330ca5999fdSMike Rapoport 					   unsigned long addr,
1331ca5999fdSMike Rapoport 					   pte_t *ptep, pte_t old_pte, pte_t pte)
1332ca5999fdSMike Rapoport {
1333ca5999fdSMike Rapoport 	__ptep_modify_prot_commit(vma, addr, ptep, pte);
1334ca5999fdSMike Rapoport }
1335ca5999fdSMike Rapoport #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
1336ca5999fdSMike Rapoport #endif /* CONFIG_MMU */
1337ca5999fdSMike Rapoport 
1338ca5999fdSMike Rapoport /*
1339ca5999fdSMike Rapoport  * No-op macros that just return the current protection value. Defined here
13401067b261SRandy Dunlap  * because these macros can be used even if CONFIG_MMU is not defined.
1341ca5999fdSMike Rapoport  */
134263bb76deSPekka Enberg 
134363bb76deSPekka Enberg #ifndef pgprot_nx
134463bb76deSPekka Enberg #define pgprot_nx(prot)	(prot)
134563bb76deSPekka Enberg #endif
134663bb76deSPekka Enberg 
134763bb76deSPekka Enberg #ifndef pgprot_noncached
134863bb76deSPekka Enberg #define pgprot_noncached(prot)	(prot)
134963bb76deSPekka Enberg #endif
135063bb76deSPekka Enberg 
135163bb76deSPekka Enberg #ifndef pgprot_writecombine
135263bb76deSPekka Enberg #define pgprot_writecombine pgprot_noncached
135363bb76deSPekka Enberg #endif
135463bb76deSPekka Enberg 
135563bb76deSPekka Enberg #ifndef pgprot_writethrough
135663bb76deSPekka Enberg #define pgprot_writethrough pgprot_noncached
135763bb76deSPekka Enberg #endif
135863bb76deSPekka Enberg 
135963bb76deSPekka Enberg #ifndef pgprot_device
136063bb76deSPekka Enberg #define pgprot_device pgprot_noncached
136163bb76deSPekka Enberg #endif
136263bb76deSPekka Enberg 
1363d15dfd31SCatalin Marinas #ifndef pgprot_mhp
1364d15dfd31SCatalin Marinas #define pgprot_mhp(prot)	(prot)
1365d15dfd31SCatalin Marinas #endif
1366d15dfd31SCatalin Marinas 
136763bb76deSPekka Enberg #ifdef CONFIG_MMU
136863bb76deSPekka Enberg #ifndef pgprot_modify
136963bb76deSPekka Enberg #define pgprot_modify pgprot_modify
pgprot_modify(pgprot_t oldprot,pgprot_t newprot)137063bb76deSPekka Enberg static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
137163bb76deSPekka Enberg {
137263bb76deSPekka Enberg 	if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
137363bb76deSPekka Enberg 		newprot = pgprot_noncached(newprot);
137463bb76deSPekka Enberg 	if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
137563bb76deSPekka Enberg 		newprot = pgprot_writecombine(newprot);
137663bb76deSPekka Enberg 	if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
137763bb76deSPekka Enberg 		newprot = pgprot_device(newprot);
137863bb76deSPekka Enberg 	return newprot;
137963bb76deSPekka Enberg }
138063bb76deSPekka Enberg #endif
138163bb76deSPekka Enberg #endif /* CONFIG_MMU */
138263bb76deSPekka Enberg 
1383ca5999fdSMike Rapoport #ifndef pgprot_encrypted
1384ca5999fdSMike Rapoport #define pgprot_encrypted(prot)	(prot)
1385ca5999fdSMike Rapoport #endif
1386ca5999fdSMike Rapoport 
1387ca5999fdSMike Rapoport #ifndef pgprot_decrypted
1388ca5999fdSMike Rapoport #define pgprot_decrypted(prot)	(prot)
1389ca5999fdSMike Rapoport #endif
1390ca5999fdSMike Rapoport 
1391ca5999fdSMike Rapoport /*
1392ca5999fdSMike Rapoport  * A facility to provide batching of the reload of page tables and
1393ca5999fdSMike Rapoport  * other process state with the actual context switch code for
1394ca5999fdSMike Rapoport  * paravirtualized guests.  By convention, only one of the batched
1395ca5999fdSMike Rapoport  * update (lazy) modes (CPU, MMU) should be active at any given time,
1396ca5999fdSMike Rapoport  * entry should never be nested, and entry and exits should always be
1397ca5999fdSMike Rapoport  * paired.  This is for sanity of maintaining and reasoning about the
1398ca5999fdSMike Rapoport  * kernel code.  In this case, the exit (end of the context switch) is
1399ca5999fdSMike Rapoport  * in architecture-specific code, and so doesn't need a generic
1400ca5999fdSMike Rapoport  * definition.
1401ca5999fdSMike Rapoport  */
1402ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
1403ca5999fdSMike Rapoport #define arch_start_context_switch(prev)	do {} while (0)
1404ca5999fdSMike Rapoport #endif
1405ca5999fdSMike Rapoport 
1406ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
1407ca5999fdSMike Rapoport #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
pmd_swp_mksoft_dirty(pmd_t pmd)1408ca5999fdSMike Rapoport static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1409ca5999fdSMike Rapoport {
1410ca5999fdSMike Rapoport 	return pmd;
1411ca5999fdSMike Rapoport }
1412ca5999fdSMike Rapoport 
pmd_swp_soft_dirty(pmd_t pmd)1413ca5999fdSMike Rapoport static inline int pmd_swp_soft_dirty(pmd_t pmd)
1414ca5999fdSMike Rapoport {
1415ca5999fdSMike Rapoport 	return 0;
1416ca5999fdSMike Rapoport }
1417ca5999fdSMike Rapoport 
pmd_swp_clear_soft_dirty(pmd_t pmd)1418ca5999fdSMike Rapoport static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1419ca5999fdSMike Rapoport {
1420ca5999fdSMike Rapoport 	return pmd;
1421ca5999fdSMike Rapoport }
1422ca5999fdSMike Rapoport #endif
1423ca5999fdSMike Rapoport #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
pte_soft_dirty(pte_t pte)1424ca5999fdSMike Rapoport static inline int pte_soft_dirty(pte_t pte)
1425ca5999fdSMike Rapoport {
1426ca5999fdSMike Rapoport 	return 0;
1427ca5999fdSMike Rapoport }
1428ca5999fdSMike Rapoport 
pmd_soft_dirty(pmd_t pmd)1429ca5999fdSMike Rapoport static inline int pmd_soft_dirty(pmd_t pmd)
1430ca5999fdSMike Rapoport {
1431ca5999fdSMike Rapoport 	return 0;
1432ca5999fdSMike Rapoport }
1433ca5999fdSMike Rapoport 
pte_mksoft_dirty(pte_t pte)1434ca5999fdSMike Rapoport static inline pte_t pte_mksoft_dirty(pte_t pte)
1435ca5999fdSMike Rapoport {
1436ca5999fdSMike Rapoport 	return pte;
1437ca5999fdSMike Rapoport }
1438ca5999fdSMike Rapoport 
pmd_mksoft_dirty(pmd_t pmd)1439ca5999fdSMike Rapoport static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
1440ca5999fdSMike Rapoport {
1441ca5999fdSMike Rapoport 	return pmd;
1442ca5999fdSMike Rapoport }
1443ca5999fdSMike Rapoport 
pte_clear_soft_dirty(pte_t pte)1444ca5999fdSMike Rapoport static inline pte_t pte_clear_soft_dirty(pte_t pte)
1445ca5999fdSMike Rapoport {
1446ca5999fdSMike Rapoport 	return pte;
1447ca5999fdSMike Rapoport }
1448ca5999fdSMike Rapoport 
pmd_clear_soft_dirty(pmd_t pmd)1449ca5999fdSMike Rapoport static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
1450ca5999fdSMike Rapoport {
1451ca5999fdSMike Rapoport 	return pmd;
1452ca5999fdSMike Rapoport }
1453ca5999fdSMike Rapoport 
pte_swp_mksoft_dirty(pte_t pte)1454ca5999fdSMike Rapoport static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
1455ca5999fdSMike Rapoport {
1456ca5999fdSMike Rapoport 	return pte;
1457ca5999fdSMike Rapoport }
1458ca5999fdSMike Rapoport 
pte_swp_soft_dirty(pte_t pte)1459ca5999fdSMike Rapoport static inline int pte_swp_soft_dirty(pte_t pte)
1460ca5999fdSMike Rapoport {
1461ca5999fdSMike Rapoport 	return 0;
1462ca5999fdSMike Rapoport }
1463ca5999fdSMike Rapoport 
pte_swp_clear_soft_dirty(pte_t pte)1464ca5999fdSMike Rapoport static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
1465ca5999fdSMike Rapoport {
1466ca5999fdSMike Rapoport 	return pte;
1467ca5999fdSMike Rapoport }
1468ca5999fdSMike Rapoport 
pmd_swp_mksoft_dirty(pmd_t pmd)1469ca5999fdSMike Rapoport static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1470ca5999fdSMike Rapoport {
1471ca5999fdSMike Rapoport 	return pmd;
1472ca5999fdSMike Rapoport }
1473ca5999fdSMike Rapoport 
pmd_swp_soft_dirty(pmd_t pmd)1474ca5999fdSMike Rapoport static inline int pmd_swp_soft_dirty(pmd_t pmd)
1475ca5999fdSMike Rapoport {
1476ca5999fdSMike Rapoport 	return 0;
1477ca5999fdSMike Rapoport }
1478ca5999fdSMike Rapoport 
pmd_swp_clear_soft_dirty(pmd_t pmd)1479ca5999fdSMike Rapoport static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1480ca5999fdSMike Rapoport {
1481ca5999fdSMike Rapoport 	return pmd;
1482ca5999fdSMike Rapoport }
1483ca5999fdSMike Rapoport #endif
1484ca5999fdSMike Rapoport 
1485ca5999fdSMike Rapoport #ifndef __HAVE_PFNMAP_TRACKING
1486ca5999fdSMike Rapoport /*
1487ca5999fdSMike Rapoport  * Interfaces that can be used by architecture code to keep track of
1488ca5999fdSMike Rapoport  * memory type of pfn mappings specified by the remap_pfn_range,
1489ca5999fdSMike Rapoport  * vmf_insert_pfn.
1490ca5999fdSMike Rapoport  */
1491ca5999fdSMike Rapoport 
1492ca5999fdSMike Rapoport /*
1493ca5999fdSMike Rapoport  * track_pfn_remap is called when a _new_ pfn mapping is being established
1494ca5999fdSMike Rapoport  * by remap_pfn_range() for physical range indicated by pfn and size.
1495ca5999fdSMike Rapoport  */
track_pfn_remap(struct vm_area_struct * vma,pgprot_t * prot,unsigned long pfn,unsigned long addr,unsigned long size)1496ca5999fdSMike Rapoport static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
1497ca5999fdSMike Rapoport 				  unsigned long pfn, unsigned long addr,
1498ca5999fdSMike Rapoport 				  unsigned long size)
1499ca5999fdSMike Rapoport {
1500ca5999fdSMike Rapoport 	return 0;
1501ca5999fdSMike Rapoport }
1502ca5999fdSMike Rapoport 
1503ca5999fdSMike Rapoport /*
1504ca5999fdSMike Rapoport  * track_pfn_insert is called when a _new_ single pfn is established
1505ca5999fdSMike Rapoport  * by vmf_insert_pfn().
1506ca5999fdSMike Rapoport  */
track_pfn_insert(struct vm_area_struct * vma,pgprot_t * prot,pfn_t pfn)1507ca5999fdSMike Rapoport static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
1508ca5999fdSMike Rapoport 				    pfn_t pfn)
1509ca5999fdSMike Rapoport {
1510ca5999fdSMike Rapoport }
1511ca5999fdSMike Rapoport 
1512ca5999fdSMike Rapoport /*
1513dc84bc2aSDavid Hildenbrand  * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
1514*8c56c5dbSDavid Hildenbrand  * tables copied during copy_page_range(). Will store the pfn to be
1515*8c56c5dbSDavid Hildenbrand  * passed to untrack_pfn_copy() only if there is something to be untracked.
1516*8c56c5dbSDavid Hildenbrand  * Callers should initialize the pfn to 0.
1517ca5999fdSMike Rapoport  */
track_pfn_copy(struct vm_area_struct * dst_vma,struct vm_area_struct * src_vma,unsigned long * pfn)1518dc84bc2aSDavid Hildenbrand static inline int track_pfn_copy(struct vm_area_struct *dst_vma,
1519dc84bc2aSDavid Hildenbrand 		struct vm_area_struct *src_vma, unsigned long *pfn)
1520ca5999fdSMike Rapoport {
1521ca5999fdSMike Rapoport 	return 0;
1522ca5999fdSMike Rapoport }
1523ca5999fdSMike Rapoport 
1524ca5999fdSMike Rapoport /*
1525dc84bc2aSDavid Hildenbrand  * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during
1526*8c56c5dbSDavid Hildenbrand  * copy_page_range(), but after track_pfn_copy() was already called. Can
1527*8c56c5dbSDavid Hildenbrand  * be called even if track_pfn_copy() did not actually track anything:
1528*8c56c5dbSDavid Hildenbrand  * handled internally.
1529dc84bc2aSDavid Hildenbrand  */
untrack_pfn_copy(struct vm_area_struct * dst_vma,unsigned long pfn)1530dc84bc2aSDavid Hildenbrand static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma,
1531dc84bc2aSDavid Hildenbrand 		unsigned long pfn)
1532dc84bc2aSDavid Hildenbrand {
1533dc84bc2aSDavid Hildenbrand }
1534dc84bc2aSDavid Hildenbrand 
1535dc84bc2aSDavid Hildenbrand /*
1536ca5999fdSMike Rapoport  * untrack_pfn is called while unmapping a pfnmap for a region.
1537ca5999fdSMike Rapoport  * untrack can be called for a specific region indicated by pfn and size or
1538ca5999fdSMike Rapoport  * can be for the entire vma (in which case pfn, size are zero).
1539ca5999fdSMike Rapoport  */
untrack_pfn(struct vm_area_struct * vma,unsigned long pfn,unsigned long size,bool mm_wr_locked)1540ca5999fdSMike Rapoport static inline void untrack_pfn(struct vm_area_struct *vma,
154168f48381SSuren Baghdasaryan 			       unsigned long pfn, unsigned long size,
154268f48381SSuren Baghdasaryan 			       bool mm_wr_locked)
1543ca5999fdSMike Rapoport {
1544ca5999fdSMike Rapoport }
1545ca5999fdSMike Rapoport 
1546ca5999fdSMike Rapoport /*
1547dc84bc2aSDavid Hildenbrand  * untrack_pfn_clear is called in the following cases on a VM_PFNMAP VMA:
1548dc84bc2aSDavid Hildenbrand  *
1549dc84bc2aSDavid Hildenbrand  * 1) During mremap() on the src VMA after the page tables were moved.
1550dc84bc2aSDavid Hildenbrand  * 2) During fork() on the dst VMA, immediately after duplicating the src VMA.
1551ca5999fdSMike Rapoport  */
untrack_pfn_clear(struct vm_area_struct * vma)1552d155df53SMa Wupeng static inline void untrack_pfn_clear(struct vm_area_struct *vma)
1553ca5999fdSMike Rapoport {
1554ca5999fdSMike Rapoport }
1555ca5999fdSMike Rapoport #else
1556ca5999fdSMike Rapoport extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
1557ca5999fdSMike Rapoport 			   unsigned long pfn, unsigned long addr,
1558ca5999fdSMike Rapoport 			   unsigned long size);
1559ca5999fdSMike Rapoport extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
1560ca5999fdSMike Rapoport 			     pfn_t pfn);
1561dc84bc2aSDavid Hildenbrand extern int track_pfn_copy(struct vm_area_struct *dst_vma,
1562dc84bc2aSDavid Hildenbrand 		struct vm_area_struct *src_vma, unsigned long *pfn);
1563dc84bc2aSDavid Hildenbrand extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
1564dc84bc2aSDavid Hildenbrand 		unsigned long pfn);
1565ca5999fdSMike Rapoport extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
156668f48381SSuren Baghdasaryan 			unsigned long size, bool mm_wr_locked);
1567d155df53SMa Wupeng extern void untrack_pfn_clear(struct vm_area_struct *vma);
1568ca5999fdSMike Rapoport #endif
1569ca5999fdSMike Rapoport 
15709afaf30fSPavel Tatashin #ifdef CONFIG_MMU
1571ca5999fdSMike Rapoport #ifdef __HAVE_COLOR_ZERO_PAGE
is_zero_pfn(unsigned long pfn)1572ca5999fdSMike Rapoport static inline int is_zero_pfn(unsigned long pfn)
1573ca5999fdSMike Rapoport {
1574ca5999fdSMike Rapoport 	extern unsigned long zero_pfn;
1575ca5999fdSMike Rapoport 	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
1576ca5999fdSMike Rapoport 	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
1577ca5999fdSMike Rapoport }
1578ca5999fdSMike Rapoport 
1579ca5999fdSMike Rapoport #define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
1580ca5999fdSMike Rapoport 
1581ca5999fdSMike Rapoport #else
is_zero_pfn(unsigned long pfn)1582ca5999fdSMike Rapoport static inline int is_zero_pfn(unsigned long pfn)
1583ca5999fdSMike Rapoport {
1584ca5999fdSMike Rapoport 	extern unsigned long zero_pfn;
1585ca5999fdSMike Rapoport 	return pfn == zero_pfn;
1586ca5999fdSMike Rapoport }
1587ca5999fdSMike Rapoport 
my_zero_pfn(unsigned long addr)1588ca5999fdSMike Rapoport static inline unsigned long my_zero_pfn(unsigned long addr)
1589ca5999fdSMike Rapoport {
1590ca5999fdSMike Rapoport 	extern unsigned long zero_pfn;
1591ca5999fdSMike Rapoport 	return zero_pfn;
1592ca5999fdSMike Rapoport }
1593ca5999fdSMike Rapoport #endif
15949afaf30fSPavel Tatashin #else
is_zero_pfn(unsigned long pfn)15959afaf30fSPavel Tatashin static inline int is_zero_pfn(unsigned long pfn)
15969afaf30fSPavel Tatashin {
15979afaf30fSPavel Tatashin 	return 0;
15989afaf30fSPavel Tatashin }
15999afaf30fSPavel Tatashin 
my_zero_pfn(unsigned long addr)16009afaf30fSPavel Tatashin static inline unsigned long my_zero_pfn(unsigned long addr)
16019afaf30fSPavel Tatashin {
16029afaf30fSPavel Tatashin 	return 0;
16039afaf30fSPavel Tatashin }
16049afaf30fSPavel Tatashin #endif /* CONFIG_MMU */
1605ca5999fdSMike Rapoport 
1606ca5999fdSMike Rapoport #ifdef CONFIG_MMU
1607ca5999fdSMike Rapoport 
1608ca5999fdSMike Rapoport #ifndef CONFIG_TRANSPARENT_HUGEPAGE
pmd_trans_huge(pmd_t pmd)1609ca5999fdSMike Rapoport static inline int pmd_trans_huge(pmd_t pmd)
1610ca5999fdSMike Rapoport {
1611ca5999fdSMike Rapoport 	return 0;
1612ca5999fdSMike Rapoport }
1613ca5999fdSMike Rapoport #ifndef pmd_write
pmd_write(pmd_t pmd)1614ca5999fdSMike Rapoport static inline int pmd_write(pmd_t pmd)
1615ca5999fdSMike Rapoport {
1616ca5999fdSMike Rapoport 	BUG();
1617ca5999fdSMike Rapoport 	return 0;
1618ca5999fdSMike Rapoport }
1619ca5999fdSMike Rapoport #endif /* pmd_write */
1620ca5999fdSMike Rapoport #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1621ca5999fdSMike Rapoport 
1622ca5999fdSMike Rapoport #ifndef pud_write
pud_write(pud_t pud)1623ca5999fdSMike Rapoport static inline int pud_write(pud_t pud)
1624ca5999fdSMike Rapoport {
1625ca5999fdSMike Rapoport 	BUG();
1626ca5999fdSMike Rapoport 	return 0;
1627ca5999fdSMike Rapoport }
1628ca5999fdSMike Rapoport #endif /* pud_write */
1629ca5999fdSMike Rapoport 
1630ca5999fdSMike Rapoport #if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
pmd_devmap(pmd_t pmd)1631ca5999fdSMike Rapoport static inline int pmd_devmap(pmd_t pmd)
1632ca5999fdSMike Rapoport {
1633ca5999fdSMike Rapoport 	return 0;
1634ca5999fdSMike Rapoport }
pud_devmap(pud_t pud)1635ca5999fdSMike Rapoport static inline int pud_devmap(pud_t pud)
1636ca5999fdSMike Rapoport {
1637ca5999fdSMike Rapoport 	return 0;
1638ca5999fdSMike Rapoport }
pgd_devmap(pgd_t pgd)1639ca5999fdSMike Rapoport static inline int pgd_devmap(pgd_t pgd)
1640ca5999fdSMike Rapoport {
1641ca5999fdSMike Rapoport 	return 0;
1642ca5999fdSMike Rapoport }
1643ca5999fdSMike Rapoport #endif
1644ca5999fdSMike Rapoport 
1645ca5999fdSMike Rapoport #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
1646bcd0dea5SLiu Shixin 	!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
pud_trans_huge(pud_t pud)1647ca5999fdSMike Rapoport static inline int pud_trans_huge(pud_t pud)
1648ca5999fdSMike Rapoport {
1649ca5999fdSMike Rapoport 	return 0;
1650ca5999fdSMike Rapoport }
1651ca5999fdSMike Rapoport #endif
1652ca5999fdSMike Rapoport 
pud_trans_unstable(pud_t * pud)1653feda5c39SHugh Dickins static inline int pud_trans_unstable(pud_t *pud)
1654ca5999fdSMike Rapoport {
1655feda5c39SHugh Dickins #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
1656feda5c39SHugh Dickins 	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1657ca5999fdSMike Rapoport 	pud_t pudval = READ_ONCE(*pud);
1658ca5999fdSMike Rapoport 
1659ca5999fdSMike Rapoport 	if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
1660ca5999fdSMike Rapoport 		return 1;
1661ca5999fdSMike Rapoport 	if (unlikely(pud_bad(pudval))) {
1662ca5999fdSMike Rapoport 		pud_clear_bad(pud);
1663ca5999fdSMike Rapoport 		return 1;
1664ca5999fdSMike Rapoport 	}
1665ca5999fdSMike Rapoport #endif
1666ca5999fdSMike Rapoport 	return 0;
1667ca5999fdSMike Rapoport }
1668ca5999fdSMike Rapoport 
1669ca5999fdSMike Rapoport #ifndef CONFIG_NUMA_BALANCING
1670ca5999fdSMike Rapoport /*
167114fb1fd7SDavid Hildenbrand  * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
167214fb1fd7SDavid Hildenbrand  * perfectly valid to indicate "no" in that case, which is why our default
167314fb1fd7SDavid Hildenbrand  * implementation defaults to "always no".
167414fb1fd7SDavid Hildenbrand  *
167514fb1fd7SDavid Hildenbrand  * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE
167614fb1fd7SDavid Hildenbrand  * page protection due to NUMA hinting. NUMA hinting faults only apply in
167714fb1fd7SDavid Hildenbrand  * accessible VMAs.
167814fb1fd7SDavid Hildenbrand  *
167914fb1fd7SDavid Hildenbrand  * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault,
168014fb1fd7SDavid Hildenbrand  * looking at the VMA accessibility is sufficient.
1681ca5999fdSMike Rapoport  */
pte_protnone(pte_t pte)1682ca5999fdSMike Rapoport static inline int pte_protnone(pte_t pte)
1683ca5999fdSMike Rapoport {
1684ca5999fdSMike Rapoport 	return 0;
1685ca5999fdSMike Rapoport }
1686ca5999fdSMike Rapoport 
pmd_protnone(pmd_t pmd)1687ca5999fdSMike Rapoport static inline int pmd_protnone(pmd_t pmd)
1688ca5999fdSMike Rapoport {
1689ca5999fdSMike Rapoport 	return 0;
1690ca5999fdSMike Rapoport }
1691ca5999fdSMike Rapoport #endif /* CONFIG_NUMA_BALANCING */
1692ca5999fdSMike Rapoport 
1693ca5999fdSMike Rapoport #endif /* CONFIG_MMU */
1694ca5999fdSMike Rapoport 
1695ca5999fdSMike Rapoport #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
1696ca5999fdSMike Rapoport 
1697ca5999fdSMike Rapoport #ifndef __PAGETABLE_P4D_FOLDED
1698ca5999fdSMike Rapoport int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
1699c8db8c26SLi kunyu void p4d_clear_huge(p4d_t *p4d);
1700ca5999fdSMike Rapoport #else
p4d_set_huge(p4d_t * p4d,phys_addr_t addr,pgprot_t prot)1701ca5999fdSMike Rapoport static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1702ca5999fdSMike Rapoport {
1703ca5999fdSMike Rapoport 	return 0;
1704ca5999fdSMike Rapoport }
p4d_clear_huge(p4d_t * p4d)1705c8db8c26SLi kunyu static inline void p4d_clear_huge(p4d_t *p4d) { }
1706ca5999fdSMike Rapoport #endif /* !__PAGETABLE_P4D_FOLDED */
1707ca5999fdSMike Rapoport 
1708ca5999fdSMike Rapoport int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
1709c742199aSChristophe Leroy int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
1710d8a71905SJonathan Marek int pud_clear_huge(pud_t *pud);
1711ca5999fdSMike Rapoport int pmd_clear_huge(pmd_t *pmd);
1712ca5999fdSMike Rapoport int p4d_free_pud_page(p4d_t *p4d, unsigned long addr);
1713ca5999fdSMike Rapoport int pud_free_pmd_page(pud_t *pud, unsigned long addr);
1714ca5999fdSMike Rapoport int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
1715ca5999fdSMike Rapoport #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
p4d_set_huge(p4d_t * p4d,phys_addr_t addr,pgprot_t prot)1716ca5999fdSMike Rapoport static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1717ca5999fdSMike Rapoport {
1718ca5999fdSMike Rapoport 	return 0;
1719ca5999fdSMike Rapoport }
pud_set_huge(pud_t * pud,phys_addr_t addr,pgprot_t prot)1720ca5999fdSMike Rapoport static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1721ca5999fdSMike Rapoport {
1722ca5999fdSMike Rapoport 	return 0;
1723ca5999fdSMike Rapoport }
pmd_set_huge(pmd_t * pmd,phys_addr_t addr,pgprot_t prot)1724ca5999fdSMike Rapoport static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1725ca5999fdSMike Rapoport {
1726ca5999fdSMike Rapoport 	return 0;
1727ca5999fdSMike Rapoport }
p4d_clear_huge(p4d_t * p4d)1728c8db8c26SLi kunyu static inline void p4d_clear_huge(p4d_t *p4d) { }
pud_clear_huge(pud_t * pud)1729ca5999fdSMike Rapoport static inline int pud_clear_huge(pud_t *pud)
1730ca5999fdSMike Rapoport {
1731ca5999fdSMike Rapoport 	return 0;
1732ca5999fdSMike Rapoport }
pmd_clear_huge(pmd_t * pmd)1733ca5999fdSMike Rapoport static inline int pmd_clear_huge(pmd_t *pmd)
1734ca5999fdSMike Rapoport {
1735ca5999fdSMike Rapoport 	return 0;
1736ca5999fdSMike Rapoport }
p4d_free_pud_page(p4d_t * p4d,unsigned long addr)1737ca5999fdSMike Rapoport static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1738ca5999fdSMike Rapoport {
1739ca5999fdSMike Rapoport 	return 0;
1740ca5999fdSMike Rapoport }
pud_free_pmd_page(pud_t * pud,unsigned long addr)1741ca5999fdSMike Rapoport static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1742ca5999fdSMike Rapoport {
1743ca5999fdSMike Rapoport 	return 0;
1744ca5999fdSMike Rapoport }
pmd_free_pte_page(pmd_t * pmd,unsigned long addr)1745ca5999fdSMike Rapoport static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1746ca5999fdSMike Rapoport {
1747ca5999fdSMike Rapoport 	return 0;
1748ca5999fdSMike Rapoport }
1749ca5999fdSMike Rapoport #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
1750ca5999fdSMike Rapoport 
1751ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
1752ca5999fdSMike Rapoport #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1753ca5999fdSMike Rapoport /*
1754ca5999fdSMike Rapoport  * ARCHes with special requirements for evicting THP backing TLB entries can
1755ca5999fdSMike Rapoport  * implement this. Otherwise also, it can help optimize normal TLB flush in
17561067b261SRandy Dunlap  * THP regime. Stock flush_tlb_range() typically has optimization to nuke the
17571067b261SRandy Dunlap  * entire TLB if flush span is greater than a threshold, which will
17581067b261SRandy Dunlap  * likely be true for a single huge page. Thus a single THP flush will
17591067b261SRandy Dunlap  * invalidate the entire TLB which is not desirable.
1760ca5999fdSMike Rapoport  * e.g. see arch/arc: flush_pmd_tlb_range
1761ca5999fdSMike Rapoport  */
1762ca5999fdSMike Rapoport #define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
1763ca5999fdSMike Rapoport #define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
1764ca5999fdSMike Rapoport #else
1765ca5999fdSMike Rapoport #define flush_pmd_tlb_range(vma, addr, end)	BUILD_BUG()
1766ca5999fdSMike Rapoport #define flush_pud_tlb_range(vma, addr, end)	BUILD_BUG()
1767ca5999fdSMike Rapoport #endif
1768ca5999fdSMike Rapoport #endif
1769ca5999fdSMike Rapoport 
1770ca5999fdSMike Rapoport struct file;
1771ca5999fdSMike Rapoport int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
1772ca5999fdSMike Rapoport 			unsigned long size, pgprot_t *vma_prot);
1773ca5999fdSMike Rapoport 
1774ca5999fdSMike Rapoport #ifndef CONFIG_X86_ESPFIX64
init_espfix_bsp(void)1775ca5999fdSMike Rapoport static inline void init_espfix_bsp(void) { }
1776ca5999fdSMike Rapoport #endif
1777ca5999fdSMike Rapoport 
1778ca5999fdSMike Rapoport extern void __init pgtable_cache_init(void);
1779ca5999fdSMike Rapoport 
1780ca5999fdSMike Rapoport #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
pfn_modify_allowed(unsigned long pfn,pgprot_t prot)1781ca5999fdSMike Rapoport static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
1782ca5999fdSMike Rapoport {
1783ca5999fdSMike Rapoport 	return true;
1784ca5999fdSMike Rapoport }
1785ca5999fdSMike Rapoport 
arch_has_pfn_modify_check(void)1786ca5999fdSMike Rapoport static inline bool arch_has_pfn_modify_check(void)
1787ca5999fdSMike Rapoport {
1788ca5999fdSMike Rapoport 	return false;
1789ca5999fdSMike Rapoport }
1790ca5999fdSMike Rapoport #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
1791ca5999fdSMike Rapoport 
1792ca5999fdSMike Rapoport /*
1793ca5999fdSMike Rapoport  * Architecture PAGE_KERNEL_* fallbacks
1794ca5999fdSMike Rapoport  *
1795ca5999fdSMike Rapoport  * Some architectures don't define certain PAGE_KERNEL_* flags. This is either
1796ca5999fdSMike Rapoport  * because they really don't support them, or the port needs to be updated to
1797ca5999fdSMike Rapoport  * reflect the required functionality. Below are a set of relatively safe
1798ca5999fdSMike Rapoport  * fallbacks, as best effort, which we can count on in lieu of the architectures
1799ca5999fdSMike Rapoport  * not defining them on their own yet.
1800ca5999fdSMike Rapoport  */
1801ca5999fdSMike Rapoport 
1802ca5999fdSMike Rapoport #ifndef PAGE_KERNEL_RO
1803ca5999fdSMike Rapoport # define PAGE_KERNEL_RO PAGE_KERNEL
1804ca5999fdSMike Rapoport #endif
1805ca5999fdSMike Rapoport 
1806ca5999fdSMike Rapoport #ifndef PAGE_KERNEL_EXEC
1807ca5999fdSMike Rapoport # define PAGE_KERNEL_EXEC PAGE_KERNEL
1808ca5999fdSMike Rapoport #endif
1809ca5999fdSMike Rapoport 
1810ca5999fdSMike Rapoport /*
1811ca5999fdSMike Rapoport  * Page Table Modification bits for pgtbl_mod_mask.
1812ca5999fdSMike Rapoport  *
1813ca5999fdSMike Rapoport  * These are used by the p?d_alloc_track*() set of functions an in the generic
1814ca5999fdSMike Rapoport  * vmalloc/ioremap code to track at which page-table levels entries have been
1815ca5999fdSMike Rapoport  * modified. Based on that the code can better decide when vmalloc and ioremap
1816ca5999fdSMike Rapoport  * mapping changes need to be synchronized to other page-tables in the system.
1817ca5999fdSMike Rapoport  */
1818ca5999fdSMike Rapoport #define		__PGTBL_PGD_MODIFIED	0
1819ca5999fdSMike Rapoport #define		__PGTBL_P4D_MODIFIED	1
1820ca5999fdSMike Rapoport #define		__PGTBL_PUD_MODIFIED	2
1821ca5999fdSMike Rapoport #define		__PGTBL_PMD_MODIFIED	3
1822ca5999fdSMike Rapoport #define		__PGTBL_PTE_MODIFIED	4
1823ca5999fdSMike Rapoport 
1824ca5999fdSMike Rapoport #define		PGTBL_PGD_MODIFIED	BIT(__PGTBL_PGD_MODIFIED)
1825ca5999fdSMike Rapoport #define		PGTBL_P4D_MODIFIED	BIT(__PGTBL_P4D_MODIFIED)
1826ca5999fdSMike Rapoport #define		PGTBL_PUD_MODIFIED	BIT(__PGTBL_PUD_MODIFIED)
1827ca5999fdSMike Rapoport #define		PGTBL_PMD_MODIFIED	BIT(__PGTBL_PMD_MODIFIED)
1828ca5999fdSMike Rapoport #define		PGTBL_PTE_MODIFIED	BIT(__PGTBL_PTE_MODIFIED)
1829ca5999fdSMike Rapoport 
1830ca5999fdSMike Rapoport /* Page-Table Modification Mask */
1831ca5999fdSMike Rapoport typedef unsigned int pgtbl_mod_mask;
1832ca5999fdSMike Rapoport 
1833ca5999fdSMike Rapoport #endif /* !__ASSEMBLY__ */
1834ca5999fdSMike Rapoport 
1835cef39703SArnd Bergmann #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
1836cef39703SArnd Bergmann #ifdef CONFIG_PHYS_ADDR_T_64BIT
1837cef39703SArnd Bergmann /*
1838cef39703SArnd Bergmann  * ZSMALLOC needs to know the highest PFN on 32-bit architectures
1839cef39703SArnd Bergmann  * with physical address space extension, but falls back to
1840cef39703SArnd Bergmann  * BITS_PER_LONG otherwise.
1841cef39703SArnd Bergmann  */
1842cef39703SArnd Bergmann #error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
1843cef39703SArnd Bergmann #else
1844cef39703SArnd Bergmann #define MAX_POSSIBLE_PHYSMEM_BITS 32
1845cef39703SArnd Bergmann #endif
1846cef39703SArnd Bergmann #endif
1847cef39703SArnd Bergmann 
1848ca5999fdSMike Rapoport #ifndef has_transparent_hugepage
1849a38c94edSLiu Shixin #define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
1850ca5999fdSMike Rapoport #endif
1851ca5999fdSMike Rapoport 
1852348ad160SAneesh Kumar K.V #ifndef has_transparent_pud_hugepage
1853348ad160SAneesh Kumar K.V #define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1854348ad160SAneesh Kumar K.V #endif
1855ca5999fdSMike Rapoport /*
1856ca5999fdSMike Rapoport  * On some architectures it depends on the mm if the p4d/pud or pmd
1857ca5999fdSMike Rapoport  * layer of the page table hierarchy is folded or not.
1858ca5999fdSMike Rapoport  */
1859ca5999fdSMike Rapoport #ifndef mm_p4d_folded
1860ca5999fdSMike Rapoport #define mm_p4d_folded(mm)	__is_defined(__PAGETABLE_P4D_FOLDED)
1861ca5999fdSMike Rapoport #endif
1862ca5999fdSMike Rapoport 
1863ca5999fdSMike Rapoport #ifndef mm_pud_folded
1864ca5999fdSMike Rapoport #define mm_pud_folded(mm)	__is_defined(__PAGETABLE_PUD_FOLDED)
1865ca5999fdSMike Rapoport #endif
1866ca5999fdSMike Rapoport 
1867ca5999fdSMike Rapoport #ifndef mm_pmd_folded
1868ca5999fdSMike Rapoport #define mm_pmd_folded(mm)	__is_defined(__PAGETABLE_PMD_FOLDED)
1869ca5999fdSMike Rapoport #endif
1870ca5999fdSMike Rapoport 
1871d3f7b1bbSVasily Gorbik #ifndef p4d_offset_lockless
1872d3f7b1bbSVasily Gorbik #define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address)
1873d3f7b1bbSVasily Gorbik #endif
1874d3f7b1bbSVasily Gorbik #ifndef pud_offset_lockless
1875d3f7b1bbSVasily Gorbik #define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address)
1876d3f7b1bbSVasily Gorbik #endif
1877d3f7b1bbSVasily Gorbik #ifndef pmd_offset_lockless
1878d3f7b1bbSVasily Gorbik #define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address)
1879d3f7b1bbSVasily Gorbik #endif
1880d3f7b1bbSVasily Gorbik 
1881ca5999fdSMike Rapoport /*
188264078b3dSPeter Xu  * pXd_leaf() is the API to check whether a pgtable entry is a huge page
188364078b3dSPeter Xu  * mapping.  It should work globally across all archs, without any
188464078b3dSPeter Xu  * dependency on CONFIG_* options.  For architectures that do not support
188564078b3dSPeter Xu  * huge mappings on specific levels, below fallbacks will be used.
188664078b3dSPeter Xu  *
188764078b3dSPeter Xu  * A leaf pgtable entry should always imply the following:
188864078b3dSPeter Xu  *
188964078b3dSPeter Xu  * - It is a "present" entry.  IOW, before using this API, please check it
189064078b3dSPeter Xu  *   with pXd_present() first. NOTE: it may not always mean the "present
189164078b3dSPeter Xu  *   bit" is set.  For example, PROT_NONE entries are always "present".
189264078b3dSPeter Xu  *
189364078b3dSPeter Xu  * - It should _never_ be a swap entry of any type.  Above "present" check
189464078b3dSPeter Xu  *   should have guarded this, but let's be crystal clear on this.
189564078b3dSPeter Xu  *
189664078b3dSPeter Xu  * - It should contain a huge PFN, which points to a huge page larger than
189764078b3dSPeter Xu  *   PAGE_SIZE of the platform.  The PFN format isn't important here.
189864078b3dSPeter Xu  *
189964078b3dSPeter Xu  * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(),
190064078b3dSPeter Xu  *   pXd_devmap(), or hugetlb mappings).
1901ca5999fdSMike Rapoport  */
1902ca5999fdSMike Rapoport #ifndef pgd_leaf
1903c05995b7SPeter Xu #define pgd_leaf(x)	false
1904ca5999fdSMike Rapoport #endif
1905ca5999fdSMike Rapoport #ifndef p4d_leaf
1906c05995b7SPeter Xu #define p4d_leaf(x)	false
1907ca5999fdSMike Rapoport #endif
1908ca5999fdSMike Rapoport #ifndef pud_leaf
1909c05995b7SPeter Xu #define pud_leaf(x)	false
1910ca5999fdSMike Rapoport #endif
1911ca5999fdSMike Rapoport #ifndef pmd_leaf
1912c05995b7SPeter Xu #define pmd_leaf(x)	false
1913ca5999fdSMike Rapoport #endif
1914ca5999fdSMike Rapoport 
1915560dabbdSPeter Zijlstra #ifndef pgd_leaf_size
1916560dabbdSPeter Zijlstra #define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
1917560dabbdSPeter Zijlstra #endif
1918560dabbdSPeter Zijlstra #ifndef p4d_leaf_size
1919560dabbdSPeter Zijlstra #define p4d_leaf_size(x) P4D_SIZE
1920560dabbdSPeter Zijlstra #endif
1921560dabbdSPeter Zijlstra #ifndef pud_leaf_size
1922560dabbdSPeter Zijlstra #define pud_leaf_size(x) PUD_SIZE
1923560dabbdSPeter Zijlstra #endif
1924560dabbdSPeter Zijlstra #ifndef pmd_leaf_size
1925560dabbdSPeter Zijlstra #define pmd_leaf_size(x) PMD_SIZE
1926560dabbdSPeter Zijlstra #endif
192718d095b2SChristophe Leroy #ifndef __pte_leaf_size
1928560dabbdSPeter Zijlstra #ifndef pte_leaf_size
1929560dabbdSPeter Zijlstra #define pte_leaf_size(x) PAGE_SIZE
1930560dabbdSPeter Zijlstra #endif
193118d095b2SChristophe Leroy #define __pte_leaf_size(x,y) pte_leaf_size(y)
193218d095b2SChristophe Leroy #endif
1933560dabbdSPeter Zijlstra 
1934c0f8aa4fSDaniel Axtens /*
193535a76f5cSPeter Xu  * We always define pmd_pfn for all archs as it's used in lots of generic
193635a76f5cSPeter Xu  * code.  Now it happens too for pud_pfn (and can happen for larger
193735a76f5cSPeter Xu  * mappings too in the future; we're not there yet).  Instead of defining
193835a76f5cSPeter Xu  * it for all archs (like pmd_pfn), provide a fallback.
193935a76f5cSPeter Xu  *
194035a76f5cSPeter Xu  * Note that returning 0 here means any arch that didn't define this can
194135a76f5cSPeter Xu  * get severely wrong when it hits a real pud leaf.  It's arch's
194235a76f5cSPeter Xu  * responsibility to properly define it when a huge pud is possible.
194335a76f5cSPeter Xu  */
194435a76f5cSPeter Xu #ifndef pud_pfn
194535a76f5cSPeter Xu #define pud_pfn(x) 0
194635a76f5cSPeter Xu #endif
194735a76f5cSPeter Xu 
194835a76f5cSPeter Xu /*
1949c0f8aa4fSDaniel Axtens  * Some architectures have MMUs that are configurable or selectable at boot
1950c0f8aa4fSDaniel Axtens  * time. These lead to variable PTRS_PER_x. For statically allocated arrays it
1951c0f8aa4fSDaniel Axtens  * helps to have a static maximum value.
1952c0f8aa4fSDaniel Axtens  */
1953c0f8aa4fSDaniel Axtens 
1954c0f8aa4fSDaniel Axtens #ifndef MAX_PTRS_PER_PTE
1955c0f8aa4fSDaniel Axtens #define MAX_PTRS_PER_PTE PTRS_PER_PTE
1956c0f8aa4fSDaniel Axtens #endif
1957c0f8aa4fSDaniel Axtens 
1958c0f8aa4fSDaniel Axtens #ifndef MAX_PTRS_PER_PMD
1959c0f8aa4fSDaniel Axtens #define MAX_PTRS_PER_PMD PTRS_PER_PMD
1960c0f8aa4fSDaniel Axtens #endif
1961c0f8aa4fSDaniel Axtens 
1962c0f8aa4fSDaniel Axtens #ifndef MAX_PTRS_PER_PUD
1963c0f8aa4fSDaniel Axtens #define MAX_PTRS_PER_PUD PTRS_PER_PUD
1964c0f8aa4fSDaniel Axtens #endif
1965c0f8aa4fSDaniel Axtens 
1966c0f8aa4fSDaniel Axtens #ifndef MAX_PTRS_PER_P4D
1967c0f8aa4fSDaniel Axtens #define MAX_PTRS_PER_P4D PTRS_PER_P4D
1968c0f8aa4fSDaniel Axtens #endif
1969c0f8aa4fSDaniel Axtens 
19700515e022SPeter Xu #ifndef pte_pgprot
19710515e022SPeter Xu #define pte_pgprot(x) ((pgprot_t) {0})
19720515e022SPeter Xu #endif
19730515e022SPeter Xu 
19740515e022SPeter Xu #ifndef pmd_pgprot
19750515e022SPeter Xu #define pmd_pgprot(x) ((pgprot_t) {0})
19760515e022SPeter Xu #endif
19770515e022SPeter Xu 
19780515e022SPeter Xu #ifndef pud_pgprot
19790515e022SPeter Xu #define pud_pgprot(x) ((pgprot_t) {0})
19800515e022SPeter Xu #endif
19810515e022SPeter Xu 
198243957b5dSAnshuman Khandual /* description of effects of mapping type and prot in current implementation.
198343957b5dSAnshuman Khandual  * this is due to the limited x86 page protection hardware.  The expected
198443957b5dSAnshuman Khandual  * behavior is in parens:
198543957b5dSAnshuman Khandual  *
198643957b5dSAnshuman Khandual  * map_type	prot
198743957b5dSAnshuman Khandual  *		PROT_NONE	PROT_READ	PROT_WRITE	PROT_EXEC
198843957b5dSAnshuman Khandual  * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
198943957b5dSAnshuman Khandual  *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
199043957b5dSAnshuman Khandual  *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
199143957b5dSAnshuman Khandual  *
199243957b5dSAnshuman Khandual  * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
199343957b5dSAnshuman Khandual  *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
199443957b5dSAnshuman Khandual  *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
199543957b5dSAnshuman Khandual  *
199643957b5dSAnshuman Khandual  * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
199743957b5dSAnshuman Khandual  * MAP_PRIVATE (with Enhanced PAN supported):
199843957b5dSAnshuman Khandual  *								r: (no) no
199943957b5dSAnshuman Khandual  *								w: (no) no
200043957b5dSAnshuman Khandual  *								x: (yes) yes
200143957b5dSAnshuman Khandual  */
200243957b5dSAnshuman Khandual #define DECLARE_VM_GET_PAGE_PROT					\
200343957b5dSAnshuman Khandual pgprot_t vm_get_page_prot(unsigned long vm_flags)			\
200443957b5dSAnshuman Khandual {									\
200543957b5dSAnshuman Khandual 		return protection_map[vm_flags &			\
200643957b5dSAnshuman Khandual 			(VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)];	\
200743957b5dSAnshuman Khandual }									\
200843957b5dSAnshuman Khandual EXPORT_SYMBOL(vm_get_page_prot);
200943957b5dSAnshuman Khandual 
2010ca5999fdSMike Rapoport #endif /* _LINUX_PGTABLE_H */
2011