xref: /freebsd-12.1/sys/mips/mips/pmap.c (revision 3c265696)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1991 Regents of the University of California.
5  * All rights reserved.
6  * Copyright (c) 1994 John S. Dyson
7  * All rights reserved.
8  * Copyright (c) 1994 David Greenman
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * the Systems Programming Group of the University of Utah Computer
13  * Science Department and William Jolitz of UUNET Technologies Inc.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  * 3. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
40  *	from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
41  *	JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
42  */
43 
44 /*
45  *	Manages physical address maps.
46  *
47  *	Since the information managed by this module is
48  *	also stored by the logical address mapping module,
49  *	this module may throw away valid virtual-to-physical
50  *	mappings at almost any time.  However, invalidations
51  *	of virtual-to-physical mappings must be done as
52  *	requested.
53  *
54  *	In order to cope with hardware architectures which
55  *	make virtual-to-physical map invalidates expensive,
56  *	this module may delay invalidate or reduced protection
57  *	operations until such time as they are actually
58  *	necessary.  This module is given full information as
59  *	to which processors are currently using which maps,
60  *	and to when physical maps must be made correct.
61  */
62 
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65 
66 #include "opt_ddb.h"
67 #include "opt_pmap.h"
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/lock.h>
72 #include <sys/mman.h>
73 #include <sys/msgbuf.h>
74 #include <sys/mutex.h>
75 #include <sys/pcpu.h>
76 #include <sys/proc.h>
77 #include <sys/rwlock.h>
78 #include <sys/sched.h>
79 #include <sys/smp.h>
80 #include <sys/sysctl.h>
81 #include <sys/vmmeter.h>
82 
83 #ifdef DDB
84 #include <ddb/ddb.h>
85 #endif
86 
87 #include <vm/vm.h>
88 #include <vm/vm_param.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_extern.h>
94 #include <vm/vm_pageout.h>
95 #include <vm/vm_pager.h>
96 #include <vm/uma.h>
97 
98 #include <machine/cache.h>
99 #include <machine/md_var.h>
100 #include <machine/tlb.h>
101 
102 #undef PMAP_DEBUG
103 
104 #if !defined(DIAGNOSTIC)
105 #define	PMAP_INLINE __inline
106 #else
107 #define	PMAP_INLINE
108 #endif
109 
110 #ifdef PV_STATS
111 #define PV_STAT(x)	do { x ; } while (0)
112 #else
113 #define PV_STAT(x)	do { } while (0)
114 #endif
115 
116 /*
117  * Get PDEs and PTEs for user/kernel address space
118  */
119 #define	pmap_seg_index(v)	(((v) >> SEGSHIFT) & (NPDEPG - 1))
120 #define	pmap_pde_index(v)	(((v) >> PDRSHIFT) & (NPDEPG - 1))
121 #define	pmap_pte_index(v)	(((v) >> PAGE_SHIFT) & (NPTEPG - 1))
122 #define	pmap_pde_pindex(v)	((v) >> PDRSHIFT)
123 
124 #ifdef __mips_n64
125 #define	NUPDE			(NPDEPG * NPDEPG)
126 #define	NUSERPGTBLS		(NUPDE + NPDEPG)
127 #else
128 #define	NUPDE			(NPDEPG)
129 #define	NUSERPGTBLS		(NUPDE)
130 #endif
131 
132 #define	is_kernel_pmap(x)	((x) == kernel_pmap)
133 
134 struct pmap kernel_pmap_store;
135 pd_entry_t *kernel_segmap;
136 
137 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
138 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
139 
140 static int nkpt;
141 unsigned pmap_max_asid;		/* max ASID supported by the system */
142 
143 #define	PMAP_ASID_RESERVED	0
144 
145 vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
146 
147 static void pmap_asid_alloc(pmap_t pmap);
148 
149 static struct rwlock_padalign pvh_global_lock;
150 
151 /*
152  * Data for the pv entry allocation mechanism
153  */
154 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
155 static int pv_entry_count;
156 
157 static void free_pv_chunk(struct pv_chunk *pc);
158 static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
159 static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
160 static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
161 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
162 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
163     vm_offset_t va);
164 static vm_page_t pmap_alloc_direct_page(unsigned int index, int req);
165 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
166     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
167 static void pmap_grow_direct_page(int req);
168 static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
169     pd_entry_t pde);
170 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
171 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
172 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte,
173     vm_offset_t va, vm_page_t m);
174 static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte);
175 static void pmap_invalidate_all(pmap_t pmap);
176 static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va);
177 static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m);
178 
179 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
180 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags);
181 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t);
182 static pt_entry_t init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot);
183 
184 static void pmap_invalidate_page_action(void *arg);
185 static void pmap_invalidate_range_action(void *arg);
186 static void pmap_update_page_action(void *arg);
187 
188 #ifndef __mips_n64
189 /*
190  * This structure is for high memory (memory above 512Meg in 32 bit) support.
191  * The highmem area does not have a KSEG0 mapping, and we need a mechanism to
192  * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc.
193  *
194  * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To
195  * access a highmem physical address on a CPU, we map the physical address to
196  * the reserved virtual address for the CPU in the kernel pagetable.  This is
197  * done with interrupts disabled(although a spinlock and sched_pin would be
198  * sufficient).
199  */
200 struct local_sysmaps {
201 	vm_offset_t	base;
202 	uint32_t	saved_intr;
203 	uint16_t	valid1, valid2;
204 };
205 static struct local_sysmaps sysmap_lmem[MAXCPU];
206 
207 static __inline void
pmap_alloc_lmem_map(void)208 pmap_alloc_lmem_map(void)
209 {
210 	int i;
211 
212 	for (i = 0; i < MAXCPU; i++) {
213 		sysmap_lmem[i].base = virtual_avail;
214 		virtual_avail += PAGE_SIZE * 2;
215 		sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
216 	}
217 }
218 
219 static __inline vm_offset_t
pmap_lmem_map1(vm_paddr_t phys)220 pmap_lmem_map1(vm_paddr_t phys)
221 {
222 	struct local_sysmaps *sysm;
223 	pt_entry_t *pte, npte;
224 	vm_offset_t va;
225 	uint32_t intr;
226 	int cpu;
227 
228 	intr = intr_disable();
229 	cpu = PCPU_GET(cpuid);
230 	sysm = &sysmap_lmem[cpu];
231 	sysm->saved_intr = intr;
232 	va = sysm->base;
233 	npte = TLBLO_PA_TO_PFN(phys) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
234 	pte = pmap_pte(kernel_pmap, va);
235 	*pte = npte;
236 	sysm->valid1 = 1;
237 	return (va);
238 }
239 
240 static __inline vm_offset_t
pmap_lmem_map2(vm_paddr_t phys1,vm_paddr_t phys2)241 pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
242 {
243 	struct local_sysmaps *sysm;
244 	pt_entry_t *pte, npte;
245 	vm_offset_t va1, va2;
246 	uint32_t intr;
247 	int cpu;
248 
249 	intr = intr_disable();
250 	cpu = PCPU_GET(cpuid);
251 	sysm = &sysmap_lmem[cpu];
252 	sysm->saved_intr = intr;
253 	va1 = sysm->base;
254 	va2 = sysm->base + PAGE_SIZE;
255 	npte = TLBLO_PA_TO_PFN(phys1) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
256 	pte = pmap_pte(kernel_pmap, va1);
257 	*pte = npte;
258 	npte = TLBLO_PA_TO_PFN(phys2) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
259 	pte = pmap_pte(kernel_pmap, va2);
260 	*pte = npte;
261 	sysm->valid1 = 1;
262 	sysm->valid2 = 1;
263 	return (va1);
264 }
265 
266 static __inline void
pmap_lmem_unmap(void)267 pmap_lmem_unmap(void)
268 {
269 	struct local_sysmaps *sysm;
270 	pt_entry_t *pte;
271 	int cpu;
272 
273 	cpu = PCPU_GET(cpuid);
274 	sysm = &sysmap_lmem[cpu];
275 	pte = pmap_pte(kernel_pmap, sysm->base);
276 	*pte = PTE_G;
277 	tlb_invalidate_address(kernel_pmap, sysm->base);
278 	sysm->valid1 = 0;
279 	if (sysm->valid2) {
280 		pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE);
281 		*pte = PTE_G;
282 		tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE);
283 		sysm->valid2 = 0;
284 	}
285 	intr_restore(sysm->saved_intr);
286 }
287 #else  /* __mips_n64 */
288 
289 static __inline void
pmap_alloc_lmem_map(void)290 pmap_alloc_lmem_map(void)
291 {
292 }
293 
294 static __inline vm_offset_t
pmap_lmem_map1(vm_paddr_t phys)295 pmap_lmem_map1(vm_paddr_t phys)
296 {
297 
298 	return (0);
299 }
300 
301 static __inline vm_offset_t
pmap_lmem_map2(vm_paddr_t phys1,vm_paddr_t phys2)302 pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
303 {
304 
305 	return (0);
306 }
307 
308 static __inline vm_offset_t
pmap_lmem_unmap(void)309 pmap_lmem_unmap(void)
310 {
311 
312 	return (0);
313 }
314 #endif /* !__mips_n64 */
315 
316 static __inline int
pmap_pte_cache_bits(vm_paddr_t pa,vm_page_t m)317 pmap_pte_cache_bits(vm_paddr_t pa, vm_page_t m)
318 {
319 	vm_memattr_t ma;
320 
321 	ma = pmap_page_get_memattr(m);
322 	if (ma == VM_MEMATTR_WRITE_BACK && !is_cacheable_mem(pa))
323 		ma = VM_MEMATTR_UNCACHEABLE;
324 	return PTE_C(ma);
325 }
326 #define PMAP_PTE_SET_CACHE_BITS(pte, ps, m) {	\
327 	pte &= ~PTE_C_MASK;			\
328 	pte |= pmap_pte_cache_bits(pa, m);	\
329 }
330 
331 /*
332  * Page table entry lookup routines.
333  */
334 static __inline pd_entry_t *
pmap_segmap(pmap_t pmap,vm_offset_t va)335 pmap_segmap(pmap_t pmap, vm_offset_t va)
336 {
337 
338 	return (&pmap->pm_segtab[pmap_seg_index(va)]);
339 }
340 
341 #ifdef __mips_n64
342 static __inline pd_entry_t *
pmap_pdpe_to_pde(pd_entry_t * pdpe,vm_offset_t va)343 pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
344 {
345 	pd_entry_t *pde;
346 
347 	pde = (pd_entry_t *)*pdpe;
348 	return (&pde[pmap_pde_index(va)]);
349 }
350 
351 static __inline pd_entry_t *
pmap_pde(pmap_t pmap,vm_offset_t va)352 pmap_pde(pmap_t pmap, vm_offset_t va)
353 {
354 	pd_entry_t *pdpe;
355 
356 	pdpe = pmap_segmap(pmap, va);
357 	if (*pdpe == NULL)
358 		return (NULL);
359 
360 	return (pmap_pdpe_to_pde(pdpe, va));
361 }
362 #else
363 static __inline pd_entry_t *
pmap_pdpe_to_pde(pd_entry_t * pdpe,vm_offset_t va)364 pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
365 {
366 
367 	return (pdpe);
368 }
369 
370 static __inline
pmap_pde(pmap_t pmap,vm_offset_t va)371 pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va)
372 {
373 
374 	return (pmap_segmap(pmap, va));
375 }
376 #endif
377 
378 static __inline pt_entry_t *
pmap_pde_to_pte(pd_entry_t * pde,vm_offset_t va)379 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
380 {
381 	pt_entry_t *pte;
382 
383 	pte = (pt_entry_t *)*pde;
384 	return (&pte[pmap_pte_index(va)]);
385 }
386 
387 pt_entry_t *
pmap_pte(pmap_t pmap,vm_offset_t va)388 pmap_pte(pmap_t pmap, vm_offset_t va)
389 {
390 	pd_entry_t *pde;
391 
392 	pde = pmap_pde(pmap, va);
393 	if (pde == NULL || *pde == NULL)
394 		return (NULL);
395 
396 	return (pmap_pde_to_pte(pde, va));
397 }
398 
399 vm_offset_t
pmap_steal_memory(vm_size_t size)400 pmap_steal_memory(vm_size_t size)
401 {
402 	vm_paddr_t bank_size, pa;
403 	vm_offset_t va;
404 
405 	size = round_page(size);
406 	bank_size = phys_avail[1] - phys_avail[0];
407 	while (size > bank_size) {
408 		int i;
409 
410 		for (i = 0; phys_avail[i + 2]; i += 2) {
411 			phys_avail[i] = phys_avail[i + 2];
412 			phys_avail[i + 1] = phys_avail[i + 3];
413 		}
414 		phys_avail[i] = 0;
415 		phys_avail[i + 1] = 0;
416 		if (!phys_avail[0])
417 			panic("pmap_steal_memory: out of memory");
418 		bank_size = phys_avail[1] - phys_avail[0];
419 	}
420 
421 	pa = phys_avail[0];
422 	phys_avail[0] += size;
423 	if (MIPS_DIRECT_MAPPABLE(pa) == 0)
424 		panic("Out of memory below 512Meg?");
425 	va = MIPS_PHYS_TO_DIRECT(pa);
426 	bzero((caddr_t)va, size);
427 	return (va);
428 }
429 
430 /*
431  * Bootstrap the system enough to run with virtual memory.  This
432  * assumes that the phys_avail array has been initialized.
433  */
434 static void
pmap_create_kernel_pagetable(void)435 pmap_create_kernel_pagetable(void)
436 {
437 	int i, j;
438 	vm_offset_t ptaddr;
439 	pt_entry_t *pte;
440 #ifdef __mips_n64
441 	pd_entry_t *pde;
442 	vm_offset_t pdaddr;
443 	int npt, npde;
444 #endif
445 
446 	/*
447 	 * Allocate segment table for the kernel
448 	 */
449 	kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
450 
451 	/*
452 	 * Allocate second level page tables for the kernel
453 	 */
454 #ifdef __mips_n64
455 	npde = howmany(NKPT, NPDEPG);
456 	pdaddr = pmap_steal_memory(PAGE_SIZE * npde);
457 #endif
458 	nkpt = NKPT;
459 	ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt);
460 
461 	/*
462 	 * The R[4-7]?00 stores only one copy of the Global bit in the
463 	 * translation lookaside buffer for each 2 page entry. Thus invalid
464 	 * entrys must have the Global bit set so when Entry LO and Entry HI
465 	 * G bits are anded together they will produce a global bit to store
466 	 * in the tlb.
467 	 */
468 	for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++)
469 		*pte = PTE_G;
470 
471 #ifdef __mips_n64
472 	for (i = 0,  npt = nkpt; npt > 0; i++) {
473 		kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE);
474 		pde = (pd_entry_t *)kernel_segmap[i];
475 
476 		for (j = 0; j < NPDEPG && npt > 0; j++, npt--)
477 			pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE);
478 	}
479 #else
480 	for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++)
481 		kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE));
482 #endif
483 
484 	PMAP_LOCK_INIT(kernel_pmap);
485 	kernel_pmap->pm_segtab = kernel_segmap;
486 	CPU_FILL(&kernel_pmap->pm_active);
487 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
488 	kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
489 	kernel_pmap->pm_asid[0].gen = 0;
490 	kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE;
491 }
492 
493 void
pmap_bootstrap(void)494 pmap_bootstrap(void)
495 {
496 	int i;
497 	int need_local_mappings = 0;
498 
499 	/* Sort. */
500 again:
501 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
502 		/*
503 		 * Keep the memory aligned on page boundary.
504 		 */
505 		phys_avail[i] = round_page(phys_avail[i]);
506 		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
507 
508 		if (i < 2)
509 			continue;
510 		if (phys_avail[i - 2] > phys_avail[i]) {
511 			vm_paddr_t ptemp[2];
512 
513 			ptemp[0] = phys_avail[i + 0];
514 			ptemp[1] = phys_avail[i + 1];
515 
516 			phys_avail[i + 0] = phys_avail[i - 2];
517 			phys_avail[i + 1] = phys_avail[i - 1];
518 
519 			phys_avail[i - 2] = ptemp[0];
520 			phys_avail[i - 1] = ptemp[1];
521 			goto again;
522 		}
523 	}
524 
525        	/*
526 	 * In 32 bit, we may have memory which cannot be mapped directly.
527 	 * This memory will need temporary mapping before it can be
528 	 * accessed.
529 	 */
530 	if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1))
531 		need_local_mappings = 1;
532 
533 	/*
534 	 * Copy the phys_avail[] array before we start stealing memory from it.
535 	 */
536 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
537 		physmem_desc[i] = phys_avail[i];
538 		physmem_desc[i + 1] = phys_avail[i + 1];
539 	}
540 
541 	Maxmem = atop(phys_avail[i - 1]);
542 
543 	if (bootverbose) {
544 		printf("Physical memory chunk(s):\n");
545 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
546 			vm_paddr_t size;
547 
548 			size = phys_avail[i + 1] - phys_avail[i];
549 			printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
550 			    (uintmax_t) phys_avail[i],
551 			    (uintmax_t) phys_avail[i + 1] - 1,
552 			    (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
553 		}
554 		printf("Maxmem is 0x%0jx\n", ptoa((uintmax_t)Maxmem));
555 	}
556 	/*
557 	 * Steal the message buffer from the beginning of memory.
558 	 */
559 	msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize);
560 	msgbufinit(msgbufp, msgbufsize);
561 
562 	/*
563 	 * Steal thread0 kstack.
564 	 */
565 	kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
566 
567 	virtual_avail = VM_MIN_KERNEL_ADDRESS;
568 	virtual_end = VM_MAX_KERNEL_ADDRESS;
569 
570 #ifdef SMP
571 	/*
572 	 * Steal some virtual address space to map the pcpu area.
573 	 */
574 	virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2);
575 	pcpup = (struct pcpu *)virtual_avail;
576 	virtual_avail += PAGE_SIZE * 2;
577 
578 	/*
579 	 * Initialize the wired TLB entry mapping the pcpu region for
580 	 * the BSP at 'pcpup'. Up until this point we were operating
581 	 * with the 'pcpup' for the BSP pointing to a virtual address
582 	 * in KSEG0 so there was no need for a TLB mapping.
583 	 */
584 	mips_pcpu_tlb_init(PCPU_ADDR(0));
585 
586 	if (bootverbose)
587 		printf("pcpu is available at virtual address %p.\n", pcpup);
588 #endif
589 
590 	if (need_local_mappings)
591 		pmap_alloc_lmem_map();
592 	pmap_create_kernel_pagetable();
593 	pmap_max_asid = VMNUM_PIDS;
594 	mips_wr_entryhi(0);
595 	mips_wr_pagemask(0);
596 
597  	/*
598 	 * Initialize the global pv list lock.
599 	 */
600 	rw_init(&pvh_global_lock, "pmap pv global");
601 }
602 
603 /*
604  * Initialize a vm_page's machine-dependent fields.
605  */
606 void
pmap_page_init(vm_page_t m)607 pmap_page_init(vm_page_t m)
608 {
609 
610 	TAILQ_INIT(&m->md.pv_list);
611 	m->md.pv_flags = VM_MEMATTR_DEFAULT << PV_MEMATTR_SHIFT;
612 }
613 
614 /*
615  *	Initialize the pmap module.
616  *	Called by vm_init, to initialize any structures that the pmap
617  *	system needs to map virtual memory.
618  */
619 void
pmap_init(void)620 pmap_init(void)
621 {
622 }
623 
624 /***************************************************
625  * Low level helper routines.....
626  ***************************************************/
627 
628 #ifdef	SMP
629 static __inline void
pmap_call_on_active_cpus(pmap_t pmap,void (* fn)(void *),void * arg)630 pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
631 {
632 	int	cpuid, cpu, self;
633 	cpuset_t active_cpus;
634 
635 	sched_pin();
636 	if (is_kernel_pmap(pmap)) {
637 		smp_rendezvous(NULL, fn, NULL, arg);
638 		goto out;
639 	}
640 	/* Force ASID update on inactive CPUs */
641 	CPU_FOREACH(cpu) {
642 		if (!CPU_ISSET(cpu, &pmap->pm_active))
643 			pmap->pm_asid[cpu].gen = 0;
644 	}
645 	cpuid = PCPU_GET(cpuid);
646 	/*
647 	 * XXX: barrier/locking for active?
648 	 *
649 	 * Take a snapshot of active here, any further changes are ignored.
650 	 * tlb update/invalidate should be harmless on inactive CPUs
651 	 */
652 	active_cpus = pmap->pm_active;
653 	self = CPU_ISSET(cpuid, &active_cpus);
654 	CPU_CLR(cpuid, &active_cpus);
655 	/* Optimize for the case where this cpu is the only active one */
656 	if (CPU_EMPTY(&active_cpus)) {
657 		if (self)
658 			fn(arg);
659 	} else {
660 		if (self)
661 			CPU_SET(cpuid, &active_cpus);
662 		smp_rendezvous_cpus(active_cpus, NULL, fn, NULL, arg);
663 	}
664 out:
665 	sched_unpin();
666 }
667 #else /* !SMP */
668 static __inline void
pmap_call_on_active_cpus(pmap_t pmap,void (* fn)(void *),void * arg)669 pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
670 {
671 	int	cpuid;
672 
673 	if (is_kernel_pmap(pmap)) {
674 		fn(arg);
675 		return;
676 	}
677 	cpuid = PCPU_GET(cpuid);
678 	if (!CPU_ISSET(cpuid, &pmap->pm_active))
679 		pmap->pm_asid[cpuid].gen = 0;
680 	else
681 		fn(arg);
682 }
683 #endif /* SMP */
684 
685 static void
pmap_invalidate_all(pmap_t pmap)686 pmap_invalidate_all(pmap_t pmap)
687 {
688 
689 	pmap_call_on_active_cpus(pmap,
690 	    (void (*)(void *))tlb_invalidate_all_user, pmap);
691 }
692 
693 struct pmap_invalidate_page_arg {
694 	pmap_t pmap;
695 	vm_offset_t va;
696 };
697 
698 static void
pmap_invalidate_page_action(void * arg)699 pmap_invalidate_page_action(void *arg)
700 {
701 	struct pmap_invalidate_page_arg *p = arg;
702 
703 	tlb_invalidate_address(p->pmap, p->va);
704 }
705 
706 static void
pmap_invalidate_page(pmap_t pmap,vm_offset_t va)707 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
708 {
709 	struct pmap_invalidate_page_arg arg;
710 
711 	arg.pmap = pmap;
712 	arg.va = va;
713 	pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &arg);
714 }
715 
716 struct pmap_invalidate_range_arg {
717 	pmap_t pmap;
718 	vm_offset_t sva;
719 	vm_offset_t eva;
720 };
721 
722 static void
pmap_invalidate_range_action(void * arg)723 pmap_invalidate_range_action(void *arg)
724 {
725 	struct pmap_invalidate_range_arg *p = arg;
726 
727 	tlb_invalidate_range(p->pmap, p->sva, p->eva);
728 }
729 
730 static void
pmap_invalidate_range(pmap_t pmap,vm_offset_t sva,vm_offset_t eva)731 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
732 {
733 	struct pmap_invalidate_range_arg arg;
734 
735 	arg.pmap = pmap;
736 	arg.sva = sva;
737 	arg.eva = eva;
738 	pmap_call_on_active_cpus(pmap, pmap_invalidate_range_action, &arg);
739 }
740 
741 struct pmap_update_page_arg {
742 	pmap_t pmap;
743 	vm_offset_t va;
744 	pt_entry_t pte;
745 };
746 
747 static void
pmap_update_page_action(void * arg)748 pmap_update_page_action(void *arg)
749 {
750 	struct pmap_update_page_arg *p = arg;
751 
752 	tlb_update(p->pmap, p->va, p->pte);
753 }
754 
755 static void
pmap_update_page(pmap_t pmap,vm_offset_t va,pt_entry_t pte)756 pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
757 {
758 	struct pmap_update_page_arg arg;
759 
760 	arg.pmap = pmap;
761 	arg.va = va;
762 	arg.pte = pte;
763 	pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg);
764 }
765 
766 /*
767  *	Routine:	pmap_extract
768  *	Function:
769  *		Extract the physical page address associated
770  *		with the given map/virtual_address pair.
771  */
772 vm_paddr_t
pmap_extract(pmap_t pmap,vm_offset_t va)773 pmap_extract(pmap_t pmap, vm_offset_t va)
774 {
775 	pt_entry_t *pte;
776 	vm_offset_t retval = 0;
777 
778 	PMAP_LOCK(pmap);
779 	pte = pmap_pte(pmap, va);
780 	if (pte) {
781 		retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK);
782 	}
783 	PMAP_UNLOCK(pmap);
784 	return (retval);
785 }
786 
787 /*
788  *	Routine:	pmap_extract_and_hold
789  *	Function:
790  *		Atomically extract and hold the physical page
791  *		with the given pmap and virtual address pair
792  *		if that mapping permits the given protection.
793  */
794 vm_page_t
pmap_extract_and_hold(pmap_t pmap,vm_offset_t va,vm_prot_t prot)795 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
796 {
797 	pt_entry_t pte, *ptep;
798 	vm_paddr_t pa, pte_pa;
799 	vm_page_t m;
800 
801 	m = NULL;
802 	pa = 0;
803 	PMAP_LOCK(pmap);
804 retry:
805 	ptep = pmap_pte(pmap, va);
806 	if (ptep != NULL) {
807 		pte = *ptep;
808 		if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) ||
809 		    (prot & VM_PROT_WRITE) == 0)) {
810 			pte_pa = TLBLO_PTE_TO_PA(pte);
811 			if (vm_page_pa_tryrelock(pmap, pte_pa, &pa))
812 				goto retry;
813 			m = PHYS_TO_VM_PAGE(pte_pa);
814 			vm_page_hold(m);
815 		}
816 	}
817 	PA_UNLOCK_COND(pa);
818 	PMAP_UNLOCK(pmap);
819 	return (m);
820 }
821 
822 /***************************************************
823  * Low level mapping routines.....
824  ***************************************************/
825 
826 /*
827  * add a wired page to the kva
828  */
829 void
pmap_kenter_attr(vm_offset_t va,vm_paddr_t pa,vm_memattr_t ma)830 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
831 {
832 	pt_entry_t *pte;
833 	pt_entry_t opte, npte;
834 
835 #ifdef PMAP_DEBUG
836 	printf("pmap_kenter:  va: %p -> pa: %p\n", (void *)va, (void *)pa);
837 #endif
838 
839 	pte = pmap_pte(kernel_pmap, va);
840 	opte = *pte;
841 	npte = TLBLO_PA_TO_PFN(pa) | PTE_C(ma) | PTE_D | PTE_V | PTE_G;
842 	*pte = npte;
843 	if (pte_test(&opte, PTE_V) && opte != npte)
844 		pmap_update_page(kernel_pmap, va, npte);
845 }
846 
847 void
pmap_kenter(vm_offset_t va,vm_paddr_t pa)848 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
849 {
850 
851 	KASSERT(is_cacheable_mem(pa),
852 		("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa));
853 
854 	pmap_kenter_attr(va, pa, VM_MEMATTR_DEFAULT);
855 }
856 
857 /*
858  * remove a page from the kernel pagetables
859  */
860  /* PMAP_INLINE */ void
pmap_kremove(vm_offset_t va)861 pmap_kremove(vm_offset_t va)
862 {
863 	pt_entry_t *pte;
864 
865 	/*
866 	 * Write back all caches from the page being destroyed
867 	 */
868 	mips_dcache_wbinv_range_index(va, PAGE_SIZE);
869 
870 	pte = pmap_pte(kernel_pmap, va);
871 	*pte = PTE_G;
872 	pmap_invalidate_page(kernel_pmap, va);
873 }
874 
875 /*
876  *	Used to map a range of physical addresses into kernel
877  *	virtual address space.
878  *
879  *	The value passed in '*virt' is a suggested virtual address for
880  *	the mapping. Architectures which can support a direct-mapped
881  *	physical to virtual region can return the appropriate address
882  *	within that region, leaving '*virt' unchanged. Other
883  *	architectures should map the pages starting at '*virt' and
884  *	update '*virt' with the first usable address after the mapped
885  *	region.
886  *
887  *	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
888  */
889 vm_offset_t
pmap_map(vm_offset_t * virt,vm_paddr_t start,vm_paddr_t end,int prot)890 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
891 {
892 	vm_offset_t va, sva;
893 
894 	if (MIPS_DIRECT_MAPPABLE(end - 1))
895 		return (MIPS_PHYS_TO_DIRECT(start));
896 
897 	va = sva = *virt;
898 	while (start < end) {
899 		pmap_kenter(va, start);
900 		va += PAGE_SIZE;
901 		start += PAGE_SIZE;
902 	}
903 	*virt = va;
904 	return (sva);
905 }
906 
907 /*
908  * Add a list of wired pages to the kva
909  * this routine is only used for temporary
910  * kernel mappings that do not need to have
911  * page modification or references recorded.
912  * Note that old mappings are simply written
913  * over.  The page *must* be wired.
914  */
915 void
pmap_qenter(vm_offset_t va,vm_page_t * m,int count)916 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
917 {
918 	int i;
919 	vm_offset_t origva = va;
920 
921 	for (i = 0; i < count; i++) {
922 		pmap_flush_pvcache(m[i]);
923 		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
924 		va += PAGE_SIZE;
925 	}
926 
927 	mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count);
928 }
929 
930 /*
931  * this routine jerks page mappings from the
932  * kernel -- it is meant only for temporary mappings.
933  */
934 void
pmap_qremove(vm_offset_t va,int count)935 pmap_qremove(vm_offset_t va, int count)
936 {
937 	pt_entry_t *pte;
938 	vm_offset_t origva;
939 
940 	if (count < 1)
941 		return;
942 	mips_dcache_wbinv_range_index(va, PAGE_SIZE * count);
943 	origva = va;
944 	do {
945 		pte = pmap_pte(kernel_pmap, va);
946 		*pte = PTE_G;
947 		va += PAGE_SIZE;
948 	} while (--count > 0);
949 	pmap_invalidate_range(kernel_pmap, origva, va);
950 }
951 
952 /***************************************************
953  * Page table page management routines.....
954  ***************************************************/
955 
956 /*
957  * Decrements a page table page's wire count, which is used to record the
958  * number of valid page table entries within the page.  If the wire count
959  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
960  * page table page was unmapped and FALSE otherwise.
961  */
962 static PMAP_INLINE boolean_t
pmap_unwire_ptp(pmap_t pmap,vm_offset_t va,vm_page_t m)963 pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
964 {
965 
966 	--m->wire_count;
967 	if (m->wire_count == 0) {
968 		_pmap_unwire_ptp(pmap, va, m);
969 		return (TRUE);
970 	} else
971 		return (FALSE);
972 }
973 
974 static void
_pmap_unwire_ptp(pmap_t pmap,vm_offset_t va,vm_page_t m)975 _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
976 {
977 	pd_entry_t *pde;
978 
979 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
980 	/*
981 	 * unmap the page table page
982 	 */
983 #ifdef __mips_n64
984 	if (m->pindex < NUPDE)
985 		pde = pmap_pde(pmap, va);
986 	else
987 		pde = pmap_segmap(pmap, va);
988 #else
989 	pde = pmap_pde(pmap, va);
990 #endif
991 	*pde = 0;
992 	pmap->pm_stats.resident_count--;
993 
994 #ifdef __mips_n64
995 	if (m->pindex < NUPDE) {
996 		pd_entry_t *pdp;
997 		vm_page_t pdpg;
998 
999 		/*
1000 		 * Recursively decrement next level pagetable refcount
1001 		 */
1002 		pdp = (pd_entry_t *)*pmap_segmap(pmap, va);
1003 		pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp));
1004 		pmap_unwire_ptp(pmap, va, pdpg);
1005 	}
1006 #endif
1007 
1008 	/*
1009 	 * If the page is finally unwired, simply free it.
1010 	 */
1011 	vm_page_free_zero(m);
1012 	vm_wire_sub(1);
1013 }
1014 
1015 /*
1016  * After removing a page table entry, this routine is used to
1017  * conditionally free the page, and manage the hold/wire counts.
1018  */
1019 static int
pmap_unuse_pt(pmap_t pmap,vm_offset_t va,pd_entry_t pde)1020 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
1021 {
1022 	vm_page_t mpte;
1023 
1024 	if (va >= VM_MAXUSER_ADDRESS)
1025 		return (0);
1026 	KASSERT(pde != 0, ("pmap_unuse_pt: pde != 0"));
1027 	mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pde));
1028 	return (pmap_unwire_ptp(pmap, va, mpte));
1029 }
1030 
1031 void
pmap_pinit0(pmap_t pmap)1032 pmap_pinit0(pmap_t pmap)
1033 {
1034 	int i;
1035 
1036 	PMAP_LOCK_INIT(pmap);
1037 	pmap->pm_segtab = kernel_segmap;
1038 	CPU_ZERO(&pmap->pm_active);
1039 	for (i = 0; i < MAXCPU; i++) {
1040 		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1041 		pmap->pm_asid[i].gen = 0;
1042 	}
1043 	PCPU_SET(curpmap, pmap);
1044 	TAILQ_INIT(&pmap->pm_pvchunk);
1045 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1046 }
1047 
1048 static void
pmap_grow_direct_page(int req)1049 pmap_grow_direct_page(int req)
1050 {
1051 
1052 #ifdef __mips_n64
1053 	vm_wait(NULL);
1054 #else
1055 	if (!vm_page_reclaim_contig(req, 1, 0, MIPS_KSEG0_LARGEST_PHYS,
1056 	    PAGE_SIZE, 0))
1057 		vm_wait(NULL);
1058 #endif
1059 }
1060 
1061 static vm_page_t
pmap_alloc_direct_page(unsigned int index,int req)1062 pmap_alloc_direct_page(unsigned int index, int req)
1063 {
1064 	vm_page_t m;
1065 
1066 	m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED |
1067 	    VM_ALLOC_ZERO);
1068 	if (m == NULL)
1069 		return (NULL);
1070 
1071 	if ((m->flags & PG_ZERO) == 0)
1072 		pmap_zero_page(m);
1073 
1074 	m->pindex = index;
1075 	return (m);
1076 }
1077 
1078 /*
1079  * Initialize a preallocated and zeroed pmap structure,
1080  * such as one in a vmspace structure.
1081  */
1082 int
pmap_pinit(pmap_t pmap)1083 pmap_pinit(pmap_t pmap)
1084 {
1085 	vm_offset_t ptdva;
1086 	vm_page_t ptdpg;
1087 	int i, req_class;
1088 
1089 	/*
1090 	 * allocate the page directory page
1091 	 */
1092 	req_class = VM_ALLOC_NORMAL;
1093 	while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, req_class)) ==
1094 	    NULL)
1095 		pmap_grow_direct_page(req_class);
1096 
1097 	ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
1098 	pmap->pm_segtab = (pd_entry_t *)ptdva;
1099 	CPU_ZERO(&pmap->pm_active);
1100 	for (i = 0; i < MAXCPU; i++) {
1101 		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1102 		pmap->pm_asid[i].gen = 0;
1103 	}
1104 	TAILQ_INIT(&pmap->pm_pvchunk);
1105 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1106 
1107 	return (1);
1108 }
1109 
1110 /*
1111  * this routine is called if the page table page is not
1112  * mapped correctly.
1113  */
1114 static vm_page_t
_pmap_allocpte(pmap_t pmap,unsigned ptepindex,u_int flags)1115 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags)
1116 {
1117 	vm_offset_t pageva;
1118 	vm_page_t m;
1119 	int req_class;
1120 
1121 	/*
1122 	 * Find or fabricate a new pagetable page
1123 	 */
1124 	req_class = VM_ALLOC_NORMAL;
1125 	if ((m = pmap_alloc_direct_page(ptepindex, req_class)) == NULL) {
1126 		if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
1127 			PMAP_UNLOCK(pmap);
1128 			rw_wunlock(&pvh_global_lock);
1129 			pmap_grow_direct_page(req_class);
1130 			rw_wlock(&pvh_global_lock);
1131 			PMAP_LOCK(pmap);
1132 		}
1133 
1134 		/*
1135 		 * Indicate the need to retry.	While waiting, the page
1136 		 * table page may have been allocated.
1137 		 */
1138 		return (NULL);
1139 	}
1140 
1141 	/*
1142 	 * Map the pagetable page into the process address space, if it
1143 	 * isn't already there.
1144 	 */
1145 	pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1146 
1147 #ifdef __mips_n64
1148 	if (ptepindex >= NUPDE) {
1149 		pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva;
1150 	} else {
1151 		pd_entry_t *pdep, *pde;
1152 		int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT);
1153 		int pdeindex = ptepindex & (NPDEPG - 1);
1154 		vm_page_t pg;
1155 
1156 		pdep = &pmap->pm_segtab[segindex];
1157 		if (*pdep == NULL) {
1158 			/* recurse for allocating page dir */
1159 			if (_pmap_allocpte(pmap, NUPDE + segindex,
1160 			    flags) == NULL) {
1161 				/* alloc failed, release current */
1162 				vm_page_unwire_noq(m);
1163 				vm_page_free_zero(m);
1164 				return (NULL);
1165 			}
1166 		} else {
1167 			pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep));
1168 			pg->wire_count++;
1169 		}
1170 		/* Next level entry */
1171 		pde = (pd_entry_t *)*pdep;
1172 		pde[pdeindex] = (pd_entry_t)pageva;
1173 	}
1174 #else
1175 	pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva;
1176 #endif
1177 	pmap->pm_stats.resident_count++;
1178 	return (m);
1179 }
1180 
1181 static vm_page_t
pmap_allocpte(pmap_t pmap,vm_offset_t va,u_int flags)1182 pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
1183 {
1184 	unsigned ptepindex;
1185 	pd_entry_t *pde;
1186 	vm_page_t m;
1187 
1188 	/*
1189 	 * Calculate pagetable page index
1190 	 */
1191 	ptepindex = pmap_pde_pindex(va);
1192 retry:
1193 	/*
1194 	 * Get the page directory entry
1195 	 */
1196 	pde = pmap_pde(pmap, va);
1197 
1198 	/*
1199 	 * If the page table page is mapped, we just increment the hold
1200 	 * count, and activate it.
1201 	 */
1202 	if (pde != NULL && *pde != NULL) {
1203 		m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde));
1204 		m->wire_count++;
1205 	} else {
1206 		/*
1207 		 * Here if the pte page isn't mapped, or if it has been
1208 		 * deallocated.
1209 		 */
1210 		m = _pmap_allocpte(pmap, ptepindex, flags);
1211 		if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
1212 			goto retry;
1213 	}
1214 	return (m);
1215 }
1216 
1217 
1218 /***************************************************
1219  * Pmap allocation/deallocation routines.
1220  ***************************************************/
1221 
1222 /*
1223  * Release any resources held by the given physical map.
1224  * Called when a pmap initialized by pmap_pinit is being released.
1225  * Should only be called if the map contains no valid mappings.
1226  */
1227 void
pmap_release(pmap_t pmap)1228 pmap_release(pmap_t pmap)
1229 {
1230 	vm_offset_t ptdva;
1231 	vm_page_t ptdpg;
1232 
1233 	KASSERT(pmap->pm_stats.resident_count == 0,
1234 	    ("pmap_release: pmap resident count %ld != 0",
1235 	    pmap->pm_stats.resident_count));
1236 
1237 	ptdva = (vm_offset_t)pmap->pm_segtab;
1238 	ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva));
1239 
1240 	vm_page_unwire_noq(ptdpg);
1241 	vm_page_free_zero(ptdpg);
1242 }
1243 
1244 /*
1245  * grow the number of kernel page table entries, if needed
1246  */
1247 void
pmap_growkernel(vm_offset_t addr)1248 pmap_growkernel(vm_offset_t addr)
1249 {
1250 	vm_page_t nkpg;
1251 	pd_entry_t *pde, *pdpe;
1252 	pt_entry_t *pte;
1253 	int i, req_class;
1254 
1255 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1256 	req_class = VM_ALLOC_INTERRUPT;
1257 	addr = roundup2(addr, NBSEG);
1258 	if (addr - 1 >= vm_map_max(kernel_map))
1259 		addr = vm_map_max(kernel_map);
1260 	while (kernel_vm_end < addr) {
1261 		pdpe = pmap_segmap(kernel_pmap, kernel_vm_end);
1262 #ifdef __mips_n64
1263 		if (*pdpe == 0) {
1264 			/* new intermediate page table entry */
1265 			nkpg = pmap_alloc_direct_page(nkpt, req_class);
1266 			if (nkpg == NULL)
1267 				panic("pmap_growkernel: no memory to grow kernel");
1268 			*pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1269 			continue; /* try again */
1270 		}
1271 #endif
1272 		pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
1273 		if (*pde != 0) {
1274 			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1275 			if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
1276 				kernel_vm_end = vm_map_max(kernel_map);
1277 				break;
1278 			}
1279 			continue;
1280 		}
1281 
1282 		/*
1283 		 * This index is bogus, but out of the way
1284 		 */
1285 		nkpg = pmap_alloc_direct_page(nkpt, req_class);
1286 #ifndef __mips_n64
1287 		if (nkpg == NULL && vm_page_reclaim_contig(req_class, 1,
1288 		    0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0))
1289 			nkpg = pmap_alloc_direct_page(nkpt, req_class);
1290 #endif
1291 		if (nkpg == NULL)
1292 			panic("pmap_growkernel: no memory to grow kernel");
1293 		nkpt++;
1294 		*pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1295 
1296 		/*
1297 		 * The R[4-7]?00 stores only one copy of the Global bit in
1298 		 * the translation lookaside buffer for each 2 page entry.
1299 		 * Thus invalid entrys must have the Global bit set so when
1300 		 * Entry LO and Entry HI G bits are anded together they will
1301 		 * produce a global bit to store in the tlb.
1302 		 */
1303 		pte = (pt_entry_t *)*pde;
1304 		for (i = 0; i < NPTEPG; i++)
1305 			pte[i] = PTE_G;
1306 
1307 		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1308 		if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
1309 			kernel_vm_end = vm_map_max(kernel_map);
1310 			break;
1311 		}
1312 	}
1313 }
1314 
1315 /***************************************************
1316  * page management routines.
1317  ***************************************************/
1318 
1319 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1320 #ifdef __mips_n64
1321 CTASSERT(_NPCM == 3);
1322 CTASSERT(_NPCPV == 168);
1323 #else
1324 CTASSERT(_NPCM == 11);
1325 CTASSERT(_NPCPV == 336);
1326 #endif
1327 
1328 static __inline struct pv_chunk *
pv_to_chunk(pv_entry_t pv)1329 pv_to_chunk(pv_entry_t pv)
1330 {
1331 
1332 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1333 }
1334 
1335 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1336 
1337 #ifdef __mips_n64
1338 #define	PC_FREE0_1	0xfffffffffffffffful
1339 #define	PC_FREE2	0x000000fffffffffful
1340 #else
1341 #define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
1342 #define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
1343 #endif
1344 
1345 static const u_long pc_freemask[_NPCM] = {
1346 #ifdef __mips_n64
1347 	PC_FREE0_1, PC_FREE0_1, PC_FREE2
1348 #else
1349 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1350 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1351 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1352 	PC_FREE0_9, PC_FREE10
1353 #endif
1354 };
1355 
1356 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
1357 
1358 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1359     "Current number of pv entries");
1360 
1361 #ifdef PV_STATS
1362 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1363 
1364 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1365     "Current number of pv entry chunks");
1366 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1367     "Current number of pv entry chunks allocated");
1368 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1369     "Current number of pv entry chunks frees");
1370 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1371     "Number of times tried to get a chunk page but failed.");
1372 
1373 static long pv_entry_frees, pv_entry_allocs;
1374 static int pv_entry_spare;
1375 
1376 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1377     "Current number of pv entry frees");
1378 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1379     "Current number of pv entry allocs");
1380 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1381     "Current number of spare pv entries");
1382 #endif
1383 
1384 /*
1385  * We are in a serious low memory condition.  Resort to
1386  * drastic measures to free some pages so we can allocate
1387  * another pv entry chunk.
1388  */
1389 static vm_page_t
pmap_pv_reclaim(pmap_t locked_pmap)1390 pmap_pv_reclaim(pmap_t locked_pmap)
1391 {
1392 	struct pch newtail;
1393 	struct pv_chunk *pc;
1394 	pd_entry_t *pde;
1395 	pmap_t pmap;
1396 	pt_entry_t *pte, oldpte;
1397 	pv_entry_t pv;
1398 	vm_offset_t va;
1399 	vm_page_t m, m_pc;
1400 	u_long inuse;
1401 	int bit, field, freed, idx;
1402 
1403 	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1404 	pmap = NULL;
1405 	m_pc = NULL;
1406 	TAILQ_INIT(&newtail);
1407 	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
1408 		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1409 		if (pmap != pc->pc_pmap) {
1410 			if (pmap != NULL) {
1411 				pmap_invalidate_all(pmap);
1412 				if (pmap != locked_pmap)
1413 					PMAP_UNLOCK(pmap);
1414 			}
1415 			pmap = pc->pc_pmap;
1416 			/* Avoid deadlock and lock recursion. */
1417 			if (pmap > locked_pmap)
1418 				PMAP_LOCK(pmap);
1419 			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
1420 				pmap = NULL;
1421 				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1422 				continue;
1423 			}
1424 		}
1425 
1426 		/*
1427 		 * Destroy every non-wired, 4 KB page mapping in the chunk.
1428 		 */
1429 		freed = 0;
1430 		for (field = 0; field < _NPCM; field++) {
1431 			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
1432 			    inuse != 0; inuse &= ~(1UL << bit)) {
1433 				bit = ffsl(inuse) - 1;
1434 				idx = field * sizeof(inuse) * NBBY + bit;
1435 				pv = &pc->pc_pventry[idx];
1436 				va = pv->pv_va;
1437 				pde = pmap_pde(pmap, va);
1438 				KASSERT(pde != NULL && *pde != 0,
1439 				    ("pmap_pv_reclaim: pde"));
1440 				pte = pmap_pde_to_pte(pde, va);
1441 				oldpte = *pte;
1442 				if (pte_test(&oldpte, PTE_W))
1443 					continue;
1444 				if (is_kernel_pmap(pmap))
1445 					*pte = PTE_G;
1446 				else
1447 					*pte = 0;
1448 				m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(oldpte));
1449 				if (pte_test(&oldpte, PTE_D))
1450 					vm_page_dirty(m);
1451 				if (m->md.pv_flags & PV_TABLE_REF)
1452 					vm_page_aflag_set(m, PGA_REFERENCED);
1453 				m->md.pv_flags &= ~PV_TABLE_REF;
1454 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1455 				if (TAILQ_EMPTY(&m->md.pv_list))
1456 					vm_page_aflag_clear(m, PGA_WRITEABLE);
1457 				pc->pc_map[field] |= 1UL << bit;
1458 				pmap_unuse_pt(pmap, va, *pde);
1459 				freed++;
1460 			}
1461 		}
1462 		if (freed == 0) {
1463 			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1464 			continue;
1465 		}
1466 		/* Every freed mapping is for a 4 KB page. */
1467 		pmap->pm_stats.resident_count -= freed;
1468 		PV_STAT(pv_entry_frees += freed);
1469 		PV_STAT(pv_entry_spare += freed);
1470 		pv_entry_count -= freed;
1471 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1472 		for (field = 0; field < _NPCM; field++)
1473 			if (pc->pc_map[field] != pc_freemask[field]) {
1474 				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1475 				    pc_list);
1476 				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1477 
1478 				/*
1479 				 * One freed pv entry in locked_pmap is
1480 				 * sufficient.
1481 				 */
1482 				if (pmap == locked_pmap)
1483 					goto out;
1484 				break;
1485 			}
1486 		if (field == _NPCM) {
1487 			PV_STAT(pv_entry_spare -= _NPCPV);
1488 			PV_STAT(pc_chunk_count--);
1489 			PV_STAT(pc_chunk_frees++);
1490 			/* Entire chunk is free; return it. */
1491 			m_pc = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(
1492 			    (vm_offset_t)pc));
1493 			dump_drop_page(m_pc->phys_addr);
1494 			break;
1495 		}
1496 	}
1497 out:
1498 	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
1499 	if (pmap != NULL) {
1500 		pmap_invalidate_all(pmap);
1501 		if (pmap != locked_pmap)
1502 			PMAP_UNLOCK(pmap);
1503 	}
1504 	return (m_pc);
1505 }
1506 
1507 /*
1508  * free the pv_entry back to the free list
1509  */
1510 static void
free_pv_entry(pmap_t pmap,pv_entry_t pv)1511 free_pv_entry(pmap_t pmap, pv_entry_t pv)
1512 {
1513 	struct pv_chunk *pc;
1514 	int bit, field, idx;
1515 
1516 	rw_assert(&pvh_global_lock, RA_WLOCKED);
1517 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1518 	PV_STAT(pv_entry_frees++);
1519 	PV_STAT(pv_entry_spare++);
1520 	pv_entry_count--;
1521 	pc = pv_to_chunk(pv);
1522 	idx = pv - &pc->pc_pventry[0];
1523 	field = idx / (sizeof(u_long) * NBBY);
1524 	bit = idx % (sizeof(u_long) * NBBY);
1525 	pc->pc_map[field] |= 1ul << bit;
1526 	for (idx = 0; idx < _NPCM; idx++)
1527 		if (pc->pc_map[idx] != pc_freemask[idx]) {
1528 			/*
1529 			 * 98% of the time, pc is already at the head of the
1530 			 * list.  If it isn't already, move it to the head.
1531 			 */
1532 			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
1533 			    pc)) {
1534 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1535 				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1536 				    pc_list);
1537 			}
1538 			return;
1539 		}
1540 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1541 	free_pv_chunk(pc);
1542 }
1543 
1544 static void
free_pv_chunk(struct pv_chunk * pc)1545 free_pv_chunk(struct pv_chunk *pc)
1546 {
1547 	vm_page_t m;
1548 
1549  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1550 	PV_STAT(pv_entry_spare -= _NPCPV);
1551 	PV_STAT(pc_chunk_count--);
1552 	PV_STAT(pc_chunk_frees++);
1553 	/* entire chunk is free, return it */
1554 	m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc));
1555 	dump_drop_page(m->phys_addr);
1556 	vm_page_unwire_noq(m);
1557 	vm_page_free(m);
1558 }
1559 
1560 /*
1561  * get a new pv_entry, allocating a block from the system
1562  * when needed.
1563  */
1564 static pv_entry_t
get_pv_entry(pmap_t pmap,boolean_t try)1565 get_pv_entry(pmap_t pmap, boolean_t try)
1566 {
1567 	struct pv_chunk *pc;
1568 	pv_entry_t pv;
1569 	vm_page_t m;
1570 	int bit, field, idx;
1571 
1572 	rw_assert(&pvh_global_lock, RA_WLOCKED);
1573 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1574 	PV_STAT(pv_entry_allocs++);
1575 	pv_entry_count++;
1576 retry:
1577 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1578 	if (pc != NULL) {
1579 		for (field = 0; field < _NPCM; field++) {
1580 			if (pc->pc_map[field]) {
1581 				bit = ffsl(pc->pc_map[field]) - 1;
1582 				break;
1583 			}
1584 		}
1585 		if (field < _NPCM) {
1586 			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
1587 			pv = &pc->pc_pventry[idx];
1588 			pc->pc_map[field] &= ~(1ul << bit);
1589 			/* If this was the last item, move it to tail */
1590 			for (field = 0; field < _NPCM; field++)
1591 				if (pc->pc_map[field] != 0) {
1592 					PV_STAT(pv_entry_spare--);
1593 					return (pv);	/* not full, return */
1594 				}
1595 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1596 			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
1597 			PV_STAT(pv_entry_spare--);
1598 			return (pv);
1599 		}
1600 	}
1601 	/* No free items, allocate another chunk */
1602 	m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL |
1603 	    VM_ALLOC_WIRED);
1604 	if (m == NULL) {
1605 		if (try) {
1606 			pv_entry_count--;
1607 			PV_STAT(pc_chunk_tryfail++);
1608 			return (NULL);
1609 		}
1610 		m = pmap_pv_reclaim(pmap);
1611 		if (m == NULL)
1612 			goto retry;
1613 	}
1614 	PV_STAT(pc_chunk_count++);
1615 	PV_STAT(pc_chunk_allocs++);
1616 	dump_add_page(m->phys_addr);
1617 	pc = (struct pv_chunk *)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1618 	pc->pc_pmap = pmap;
1619 	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
1620 	for (field = 1; field < _NPCM; field++)
1621 		pc->pc_map[field] = pc_freemask[field];
1622 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1623 	pv = &pc->pc_pventry[0];
1624 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1625 	PV_STAT(pv_entry_spare += _NPCPV - 1);
1626 	return (pv);
1627 }
1628 
1629 static pv_entry_t
pmap_pvh_remove(struct md_page * pvh,pmap_t pmap,vm_offset_t va)1630 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1631 {
1632 	pv_entry_t pv;
1633 
1634 	rw_assert(&pvh_global_lock, RA_WLOCKED);
1635 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
1636 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1637 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
1638 			break;
1639 		}
1640 	}
1641 	return (pv);
1642 }
1643 
1644 static void
pmap_pvh_free(struct md_page * pvh,pmap_t pmap,vm_offset_t va)1645 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1646 {
1647 	pv_entry_t pv;
1648 
1649 	pv = pmap_pvh_remove(pvh, pmap, va);
1650 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx",
1651 	     (u_long)VM_PAGE_TO_PHYS(__containerof(pvh, struct vm_page, md)),
1652 	     (u_long)va));
1653 	free_pv_entry(pmap, pv);
1654 }
1655 
1656 static void
pmap_remove_entry(pmap_t pmap,vm_page_t m,vm_offset_t va)1657 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1658 {
1659 
1660 	rw_assert(&pvh_global_lock, RA_WLOCKED);
1661 	pmap_pvh_free(&m->md, pmap, va);
1662 	if (TAILQ_EMPTY(&m->md.pv_list))
1663 		vm_page_aflag_clear(m, PGA_WRITEABLE);
1664 }
1665 
1666 /*
1667  * Conditionally create a pv entry.
1668  */
1669 static boolean_t
pmap_try_insert_pv_entry(pmap_t pmap,vm_page_t mpte,vm_offset_t va,vm_page_t m)1670 pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va,
1671     vm_page_t m)
1672 {
1673 	pv_entry_t pv;
1674 
1675 	rw_assert(&pvh_global_lock, RA_WLOCKED);
1676 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1677 	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1678 		pv->pv_va = va;
1679 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1680 		return (TRUE);
1681 	} else
1682 		return (FALSE);
1683 }
1684 
1685 /*
1686  * pmap_remove_pte: do the things to unmap a page in a process
1687  */
1688 static int
pmap_remove_pte(struct pmap * pmap,pt_entry_t * ptq,vm_offset_t va,pd_entry_t pde)1689 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
1690     pd_entry_t pde)
1691 {
1692 	pt_entry_t oldpte;
1693 	vm_page_t m;
1694 	vm_paddr_t pa;
1695 
1696 	rw_assert(&pvh_global_lock, RA_WLOCKED);
1697 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1698 
1699 	/*
1700 	 * Write back all cache lines from the page being unmapped.
1701 	 */
1702 	mips_dcache_wbinv_range_index(va, PAGE_SIZE);
1703 
1704 	oldpte = *ptq;
1705 	if (is_kernel_pmap(pmap))
1706 		*ptq = PTE_G;
1707 	else
1708 		*ptq = 0;
1709 
1710 	if (pte_test(&oldpte, PTE_W))
1711 		pmap->pm_stats.wired_count -= 1;
1712 
1713 	pmap->pm_stats.resident_count -= 1;
1714 
1715 	if (pte_test(&oldpte, PTE_MANAGED)) {
1716 		pa = TLBLO_PTE_TO_PA(oldpte);
1717 		m = PHYS_TO_VM_PAGE(pa);
1718 		if (pte_test(&oldpte, PTE_D)) {
1719 			KASSERT(!pte_test(&oldpte, PTE_RO),
1720 			    ("%s: modified page not writable: va: %p, pte: %#jx",
1721 			    __func__, (void *)va, (uintmax_t)oldpte));
1722 			vm_page_dirty(m);
1723 		}
1724 		if (m->md.pv_flags & PV_TABLE_REF)
1725 			vm_page_aflag_set(m, PGA_REFERENCED);
1726 		m->md.pv_flags &= ~PV_TABLE_REF;
1727 
1728 		pmap_remove_entry(pmap, m, va);
1729 	}
1730 	return (pmap_unuse_pt(pmap, va, pde));
1731 }
1732 
1733 /*
1734  * Remove a single page from a process address space
1735  */
1736 static void
pmap_remove_page(struct pmap * pmap,vm_offset_t va)1737 pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1738 {
1739 	pd_entry_t *pde;
1740 	pt_entry_t *ptq;
1741 
1742 	rw_assert(&pvh_global_lock, RA_WLOCKED);
1743 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1744 	pde = pmap_pde(pmap, va);
1745 	if (pde == NULL || *pde == 0)
1746 		return;
1747 	ptq = pmap_pde_to_pte(pde, va);
1748 
1749 	/*
1750 	 * If there is no pte for this address, just skip it!
1751 	 */
1752 	if (!pte_test(ptq, PTE_V))
1753 		return;
1754 
1755 	(void)pmap_remove_pte(pmap, ptq, va, *pde);
1756 	pmap_invalidate_page(pmap, va);
1757 }
1758 
1759 /*
1760  *	Remove the given range of addresses from the specified map.
1761  *
1762  *	It is assumed that the start and end are properly
1763  *	rounded to the page size.
1764  */
1765 void
pmap_remove(pmap_t pmap,vm_offset_t sva,vm_offset_t eva)1766 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1767 {
1768 	pd_entry_t *pde, *pdpe;
1769 	pt_entry_t *pte;
1770 	vm_offset_t va, va_next;
1771 
1772 	/*
1773 	 * Perform an unsynchronized read.  This is, however, safe.
1774 	 */
1775 	if (pmap->pm_stats.resident_count == 0)
1776 		return;
1777 
1778 	rw_wlock(&pvh_global_lock);
1779 	PMAP_LOCK(pmap);
1780 
1781 	/*
1782 	 * special handling of removing one page.  a very common operation
1783 	 * and easy to short circuit some code.
1784 	 */
1785 	if ((sva + PAGE_SIZE) == eva) {
1786 		pmap_remove_page(pmap, sva);
1787 		goto out;
1788 	}
1789 	for (; sva < eva; sva = va_next) {
1790 		pdpe = pmap_segmap(pmap, sva);
1791 #ifdef __mips_n64
1792 		if (*pdpe == 0) {
1793 			va_next = (sva + NBSEG) & ~SEGMASK;
1794 			if (va_next < sva)
1795 				va_next = eva;
1796 			continue;
1797 		}
1798 #endif
1799 		va_next = (sva + NBPDR) & ~PDRMASK;
1800 		if (va_next < sva)
1801 			va_next = eva;
1802 
1803 		pde = pmap_pdpe_to_pde(pdpe, sva);
1804 		if (*pde == NULL)
1805 			continue;
1806 
1807 		/*
1808 		 * Limit our scan to either the end of the va represented
1809 		 * by the current page table page, or to the end of the
1810 		 * range being removed.
1811 		 */
1812 		if (va_next > eva)
1813 			va_next = eva;
1814 
1815 		va = va_next;
1816 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1817 		    sva += PAGE_SIZE) {
1818 			if (!pte_test(pte, PTE_V)) {
1819 				if (va != va_next) {
1820 					pmap_invalidate_range(pmap, va, sva);
1821 					va = va_next;
1822 				}
1823 				continue;
1824 			}
1825 			if (va == va_next)
1826 				va = sva;
1827 			if (pmap_remove_pte(pmap, pte, sva, *pde)) {
1828 				sva += PAGE_SIZE;
1829 				break;
1830 			}
1831 		}
1832 		if (va != va_next)
1833 			pmap_invalidate_range(pmap, va, sva);
1834 	}
1835 out:
1836 	rw_wunlock(&pvh_global_lock);
1837 	PMAP_UNLOCK(pmap);
1838 }
1839 
1840 /*
1841  *	Routine:	pmap_remove_all
1842  *	Function:
1843  *		Removes this physical page from
1844  *		all physical maps in which it resides.
1845  *		Reflects back modify bits to the pager.
1846  *
1847  *	Notes:
1848  *		Original versions of this routine were very
1849  *		inefficient because they iteratively called
1850  *		pmap_remove (slow...)
1851  */
1852 
1853 void
pmap_remove_all(vm_page_t m)1854 pmap_remove_all(vm_page_t m)
1855 {
1856 	pv_entry_t pv;
1857 	pmap_t pmap;
1858 	pd_entry_t *pde;
1859 	pt_entry_t *pte, tpte;
1860 
1861 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1862 	    ("pmap_remove_all: page %p is not managed", m));
1863 	rw_wlock(&pvh_global_lock);
1864 
1865 	if (m->md.pv_flags & PV_TABLE_REF)
1866 		vm_page_aflag_set(m, PGA_REFERENCED);
1867 
1868 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1869 		pmap = PV_PMAP(pv);
1870 		PMAP_LOCK(pmap);
1871 
1872 		/*
1873 		 * If it's last mapping writeback all caches from
1874 		 * the page being destroyed
1875 	 	 */
1876 		if (TAILQ_NEXT(pv, pv_list) == NULL)
1877 			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
1878 
1879 		pmap->pm_stats.resident_count--;
1880 
1881 		pde = pmap_pde(pmap, pv->pv_va);
1882 		KASSERT(pde != NULL && *pde != 0, ("pmap_remove_all: pde"));
1883 		pte = pmap_pde_to_pte(pde, pv->pv_va);
1884 
1885 		tpte = *pte;
1886 		if (is_kernel_pmap(pmap))
1887 			*pte = PTE_G;
1888 		else
1889 			*pte = 0;
1890 
1891 		if (pte_test(&tpte, PTE_W))
1892 			pmap->pm_stats.wired_count--;
1893 
1894 		/*
1895 		 * Update the vm_page_t clean and reference bits.
1896 		 */
1897 		if (pte_test(&tpte, PTE_D)) {
1898 			KASSERT(!pte_test(&tpte, PTE_RO),
1899 			    ("%s: modified page not writable: va: %p, pte: %#jx",
1900 			    __func__, (void *)pv->pv_va, (uintmax_t)tpte));
1901 			vm_page_dirty(m);
1902 		}
1903 		pmap_invalidate_page(pmap, pv->pv_va);
1904 
1905 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1906 		pmap_unuse_pt(pmap, pv->pv_va, *pde);
1907 		free_pv_entry(pmap, pv);
1908 		PMAP_UNLOCK(pmap);
1909 	}
1910 
1911 	vm_page_aflag_clear(m, PGA_WRITEABLE);
1912 	m->md.pv_flags &= ~PV_TABLE_REF;
1913 	rw_wunlock(&pvh_global_lock);
1914 }
1915 
1916 /*
1917  *	Set the physical protection on the
1918  *	specified range of this map as requested.
1919  */
1920 void
pmap_protect(pmap_t pmap,vm_offset_t sva,vm_offset_t eva,vm_prot_t prot)1921 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1922 {
1923 	pt_entry_t pbits, *pte;
1924 	pd_entry_t *pde, *pdpe;
1925 	vm_offset_t va, va_next;
1926 	vm_paddr_t pa;
1927 	vm_page_t m;
1928 
1929 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1930 		pmap_remove(pmap, sva, eva);
1931 		return;
1932 	}
1933 	if (prot & VM_PROT_WRITE)
1934 		return;
1935 
1936 	PMAP_LOCK(pmap);
1937 	for (; sva < eva; sva = va_next) {
1938 		pdpe = pmap_segmap(pmap, sva);
1939 #ifdef __mips_n64
1940 		if (*pdpe == 0) {
1941 			va_next = (sva + NBSEG) & ~SEGMASK;
1942 			if (va_next < sva)
1943 				va_next = eva;
1944 			continue;
1945 		}
1946 #endif
1947 		va_next = (sva + NBPDR) & ~PDRMASK;
1948 		if (va_next < sva)
1949 			va_next = eva;
1950 
1951 		pde = pmap_pdpe_to_pde(pdpe, sva);
1952 		if (*pde == NULL)
1953 			continue;
1954 
1955 		/*
1956 		 * Limit our scan to either the end of the va represented
1957 		 * by the current page table page, or to the end of the
1958 		 * range being write protected.
1959 		 */
1960 		if (va_next > eva)
1961 			va_next = eva;
1962 
1963 		va = va_next;
1964 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1965 		    sva += PAGE_SIZE) {
1966 			pbits = *pte;
1967 			if (!pte_test(&pbits, PTE_V) || pte_test(&pbits,
1968 			    PTE_RO)) {
1969 				if (va != va_next) {
1970 					pmap_invalidate_range(pmap, va, sva);
1971 					va = va_next;
1972 				}
1973 				continue;
1974 			}
1975 			pte_set(&pbits, PTE_RO);
1976 			if (pte_test(&pbits, PTE_D)) {
1977 				pte_clear(&pbits, PTE_D);
1978 				if (pte_test(&pbits, PTE_MANAGED)) {
1979 					pa = TLBLO_PTE_TO_PA(pbits);
1980 					m = PHYS_TO_VM_PAGE(pa);
1981 					vm_page_dirty(m);
1982 				}
1983 				if (va == va_next)
1984 					va = sva;
1985 			} else {
1986 				/*
1987 				 * Unless PTE_D is set, any TLB entries
1988 				 * mapping "sva" don't allow write access, so
1989 				 * they needn't be invalidated.
1990 				 */
1991 				if (va != va_next) {
1992 					pmap_invalidate_range(pmap, va, sva);
1993 					va = va_next;
1994 				}
1995 			}
1996 			*pte = pbits;
1997 		}
1998 		if (va != va_next)
1999 			pmap_invalidate_range(pmap, va, sva);
2000 	}
2001 	PMAP_UNLOCK(pmap);
2002 }
2003 
2004 /*
2005  *	Insert the given physical page (p) at
2006  *	the specified virtual address (v) in the
2007  *	target physical map with the protection requested.
2008  *
2009  *	If specified, the page will be wired down, meaning
2010  *	that the related pte can not be reclaimed.
2011  *
2012  *	NB:  This is the only routine which MAY NOT lazy-evaluate
2013  *	or lose information.  That is, this routine must actually
2014  *	insert this page into the given map NOW.
2015  */
2016 int
pmap_enter(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot,u_int flags,int8_t psind __unused)2017 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
2018     u_int flags, int8_t psind __unused)
2019 {
2020 	vm_paddr_t pa, opa;
2021 	pt_entry_t *pte;
2022 	pt_entry_t origpte, newpte;
2023 	pv_entry_t pv;
2024 	vm_page_t mpte, om;
2025 
2026 	va &= ~PAGE_MASK;
2027  	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
2028 	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
2029 	    va >= kmi.clean_eva,
2030 	    ("pmap_enter: managed mapping within the clean submap"));
2031 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
2032 		VM_OBJECT_ASSERT_LOCKED(m->object);
2033 	pa = VM_PAGE_TO_PHYS(m);
2034 	newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, flags, prot);
2035 	if ((flags & PMAP_ENTER_WIRED) != 0)
2036 		newpte |= PTE_W;
2037 	if (is_kernel_pmap(pmap))
2038 		newpte |= PTE_G;
2039 	PMAP_PTE_SET_CACHE_BITS(newpte, pa, m);
2040 	if ((m->oflags & VPO_UNMANAGED) == 0)
2041 		newpte |= PTE_MANAGED;
2042 
2043 	mpte = NULL;
2044 
2045 	rw_wlock(&pvh_global_lock);
2046 	PMAP_LOCK(pmap);
2047 
2048 	/*
2049 	 * In the case that a page table page is not resident, we are
2050 	 * creating it here.
2051 	 */
2052 	if (va < VM_MAXUSER_ADDRESS) {
2053 		mpte = pmap_allocpte(pmap, va, flags);
2054 		if (mpte == NULL) {
2055 			KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
2056 			    ("pmap_allocpte failed with sleep allowed"));
2057 			rw_wunlock(&pvh_global_lock);
2058 			PMAP_UNLOCK(pmap);
2059 			return (KERN_RESOURCE_SHORTAGE);
2060 		}
2061 	}
2062 	pte = pmap_pte(pmap, va);
2063 
2064 	/*
2065 	 * Page Directory table entry not valid, we need a new PT page
2066 	 */
2067 	if (pte == NULL) {
2068 		panic("pmap_enter: invalid page directory, pdir=%p, va=%p",
2069 		    (void *)pmap->pm_segtab, (void *)va);
2070 	}
2071 
2072 	origpte = *pte;
2073 	KASSERT(!pte_test(&origpte, PTE_D | PTE_RO | PTE_V),
2074 	    ("pmap_enter: modified page not writable: va: %p, pte: %#jx",
2075 	    (void *)va, (uintmax_t)origpte));
2076 	opa = TLBLO_PTE_TO_PA(origpte);
2077 
2078 	/*
2079 	 * Mapping has not changed, must be protection or wiring change.
2080 	 */
2081 	if (pte_test(&origpte, PTE_V) && opa == pa) {
2082 		/*
2083 		 * Wiring change, just update stats. We don't worry about
2084 		 * wiring PT pages as they remain resident as long as there
2085 		 * are valid mappings in them. Hence, if a user page is
2086 		 * wired, the PT page will be also.
2087 		 */
2088 		if (pte_test(&newpte, PTE_W) && !pte_test(&origpte, PTE_W))
2089 			pmap->pm_stats.wired_count++;
2090 		else if (!pte_test(&newpte, PTE_W) && pte_test(&origpte,
2091 		    PTE_W))
2092 			pmap->pm_stats.wired_count--;
2093 
2094 		/*
2095 		 * Remove extra pte reference
2096 		 */
2097 		if (mpte)
2098 			mpte->wire_count--;
2099 
2100 		if (pte_test(&origpte, PTE_MANAGED)) {
2101 			m->md.pv_flags |= PV_TABLE_REF;
2102 			if (!pte_test(&newpte, PTE_RO))
2103 				vm_page_aflag_set(m, PGA_WRITEABLE);
2104 		}
2105 		goto validate;
2106 	}
2107 
2108 	pv = NULL;
2109 
2110 	/*
2111 	 * Mapping has changed, invalidate old range and fall through to
2112 	 * handle validating new mapping.
2113 	 */
2114 	if (opa) {
2115 		if (is_kernel_pmap(pmap))
2116 			*pte = PTE_G;
2117 		else
2118 			*pte = 0;
2119 		if (pte_test(&origpte, PTE_W))
2120 			pmap->pm_stats.wired_count--;
2121 		if (pte_test(&origpte, PTE_MANAGED)) {
2122 			om = PHYS_TO_VM_PAGE(opa);
2123 			if (pte_test(&origpte, PTE_D))
2124 				vm_page_dirty(om);
2125 			if ((om->md.pv_flags & PV_TABLE_REF) != 0) {
2126 				om->md.pv_flags &= ~PV_TABLE_REF;
2127 				vm_page_aflag_set(om, PGA_REFERENCED);
2128 			}
2129 			pv = pmap_pvh_remove(&om->md, pmap, va);
2130 			if (!pte_test(&newpte, PTE_MANAGED))
2131 				free_pv_entry(pmap, pv);
2132 			if ((om->aflags & PGA_WRITEABLE) != 0 &&
2133 			    TAILQ_EMPTY(&om->md.pv_list))
2134 				vm_page_aflag_clear(om, PGA_WRITEABLE);
2135 		}
2136 		pmap_invalidate_page(pmap, va);
2137 		origpte = 0;
2138 		if (mpte != NULL) {
2139 			mpte->wire_count--;
2140 			KASSERT(mpte->wire_count > 0,
2141 			    ("pmap_enter: missing reference to page table page,"
2142 			    " va: %p", (void *)va));
2143 		}
2144 	} else
2145 		pmap->pm_stats.resident_count++;
2146 
2147 	/*
2148 	 * Enter on the PV list if part of our managed memory.
2149 	 */
2150 	if (pte_test(&newpte, PTE_MANAGED)) {
2151 		m->md.pv_flags |= PV_TABLE_REF;
2152 		if (pv == NULL) {
2153 			pv = get_pv_entry(pmap, FALSE);
2154 			pv->pv_va = va;
2155 		}
2156 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2157 		if (!pte_test(&newpte, PTE_RO))
2158 			vm_page_aflag_set(m, PGA_WRITEABLE);
2159 	}
2160 
2161 	/*
2162 	 * Increment counters
2163 	 */
2164 	if (pte_test(&newpte, PTE_W))
2165 		pmap->pm_stats.wired_count++;
2166 
2167 validate:
2168 
2169 #ifdef PMAP_DEBUG
2170 	printf("pmap_enter:  va: %p -> pa: %p\n", (void *)va, (void *)pa);
2171 #endif
2172 
2173 	/*
2174 	 * if the mapping or permission bits are different, we need to
2175 	 * update the pte.
2176 	 */
2177 	if (origpte != newpte) {
2178 		*pte = newpte;
2179 		if (pte_test(&origpte, PTE_V)) {
2180 			KASSERT(opa == pa, ("pmap_enter: invalid update"));
2181 			if (pte_test(&origpte, PTE_D)) {
2182 				if (pte_test(&origpte, PTE_MANAGED))
2183 					vm_page_dirty(m);
2184 			}
2185 			pmap_update_page(pmap, va, newpte);
2186 		}
2187 	}
2188 
2189 	/*
2190 	 * Sync I & D caches for executable pages.  Do this only if the
2191 	 * target pmap belongs to the current process.  Otherwise, an
2192 	 * unresolvable TLB miss may occur.
2193 	 */
2194 	if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
2195 	    (prot & VM_PROT_EXECUTE)) {
2196 		mips_icache_sync_range(va, PAGE_SIZE);
2197 		mips_dcache_wbinv_range(va, PAGE_SIZE);
2198 	}
2199 	rw_wunlock(&pvh_global_lock);
2200 	PMAP_UNLOCK(pmap);
2201 	return (KERN_SUCCESS);
2202 }
2203 
2204 /*
2205  * this code makes some *MAJOR* assumptions:
2206  * 1. Current pmap & pmap exists.
2207  * 2. Not wired.
2208  * 3. Read access.
2209  * 4. No page table pages.
2210  * but is *MUCH* faster than pmap_enter...
2211  */
2212 
2213 void
pmap_enter_quick(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot)2214 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2215 {
2216 
2217 	rw_wlock(&pvh_global_lock);
2218 	PMAP_LOCK(pmap);
2219 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
2220 	rw_wunlock(&pvh_global_lock);
2221 	PMAP_UNLOCK(pmap);
2222 }
2223 
2224 static vm_page_t
pmap_enter_quick_locked(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot,vm_page_t mpte)2225 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2226     vm_prot_t prot, vm_page_t mpte)
2227 {
2228 	pt_entry_t *pte, npte;
2229 	vm_paddr_t pa;
2230 
2231 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2232 	    (m->oflags & VPO_UNMANAGED) != 0,
2233 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2234 	rw_assert(&pvh_global_lock, RA_WLOCKED);
2235 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2236 
2237 	/*
2238 	 * In the case that a page table page is not resident, we are
2239 	 * creating it here.
2240 	 */
2241 	if (va < VM_MAXUSER_ADDRESS) {
2242 		pd_entry_t *pde;
2243 		unsigned ptepindex;
2244 
2245 		/*
2246 		 * Calculate pagetable page index
2247 		 */
2248 		ptepindex = pmap_pde_pindex(va);
2249 		if (mpte && (mpte->pindex == ptepindex)) {
2250 			mpte->wire_count++;
2251 		} else {
2252 			/*
2253 			 * Get the page directory entry
2254 			 */
2255 			pde = pmap_pde(pmap, va);
2256 
2257 			/*
2258 			 * If the page table page is mapped, we just
2259 			 * increment the hold count, and activate it.
2260 			 */
2261 			if (pde && *pde != 0) {
2262 				mpte = PHYS_TO_VM_PAGE(
2263 				    MIPS_DIRECT_TO_PHYS(*pde));
2264 				mpte->wire_count++;
2265 			} else {
2266 				mpte = _pmap_allocpte(pmap, ptepindex,
2267 				    PMAP_ENTER_NOSLEEP);
2268 				if (mpte == NULL)
2269 					return (mpte);
2270 			}
2271 		}
2272 	} else {
2273 		mpte = NULL;
2274 	}
2275 
2276 	pte = pmap_pte(pmap, va);
2277 	if (pte_test(pte, PTE_V)) {
2278 		if (mpte != NULL) {
2279 			mpte->wire_count--;
2280 			mpte = NULL;
2281 		}
2282 		return (mpte);
2283 	}
2284 
2285 	/*
2286 	 * Enter on the PV list if part of our managed memory.
2287 	 */
2288 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
2289 	    !pmap_try_insert_pv_entry(pmap, mpte, va, m)) {
2290 		if (mpte != NULL) {
2291 			pmap_unwire_ptp(pmap, va, mpte);
2292 			mpte = NULL;
2293 		}
2294 		return (mpte);
2295 	}
2296 
2297 	/*
2298 	 * Increment counters
2299 	 */
2300 	pmap->pm_stats.resident_count++;
2301 
2302 	pa = VM_PAGE_TO_PHYS(m);
2303 
2304 	/*
2305 	 * Now validate mapping with RO protection
2306 	 */
2307 	npte = PTE_RO | TLBLO_PA_TO_PFN(pa) | PTE_V;
2308 	if ((m->oflags & VPO_UNMANAGED) == 0)
2309 		npte |= PTE_MANAGED;
2310 
2311 	PMAP_PTE_SET_CACHE_BITS(npte, pa, m);
2312 
2313 	if (is_kernel_pmap(pmap))
2314 		*pte = npte | PTE_G;
2315 	else {
2316 		*pte = npte;
2317 		/*
2318 		 * Sync I & D caches.  Do this only if the target pmap
2319 		 * belongs to the current process.  Otherwise, an
2320 		 * unresolvable TLB miss may occur. */
2321 		if (pmap == &curproc->p_vmspace->vm_pmap) {
2322 			va &= ~PAGE_MASK;
2323 			mips_icache_sync_range(va, PAGE_SIZE);
2324 			mips_dcache_wbinv_range(va, PAGE_SIZE);
2325 		}
2326 	}
2327 	return (mpte);
2328 }
2329 
2330 /*
2331  * Make a temporary mapping for a physical address.  This is only intended
2332  * to be used for panic dumps.
2333  *
2334  * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2335  */
2336 void *
pmap_kenter_temporary(vm_paddr_t pa,int i)2337 pmap_kenter_temporary(vm_paddr_t pa, int i)
2338 {
2339 	vm_offset_t va;
2340 
2341 	if (i != 0)
2342 		printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
2343 		    __func__);
2344 
2345 	if (MIPS_DIRECT_MAPPABLE(pa)) {
2346 		va = MIPS_PHYS_TO_DIRECT(pa);
2347 	} else {
2348 #ifndef __mips_n64    /* XXX : to be converted to new style */
2349 		int cpu;
2350 		register_t intr;
2351 		struct local_sysmaps *sysm;
2352 		pt_entry_t *pte, npte;
2353 
2354 		/* If this is used other than for dumps, we may need to leave
2355 		 * interrupts disasbled on return. If crash dumps don't work when
2356 		 * we get to this point, we might want to consider this (leaving things
2357 		 * disabled as a starting point ;-)
2358 	 	 */
2359 		intr = intr_disable();
2360 		cpu = PCPU_GET(cpuid);
2361 		sysm = &sysmap_lmem[cpu];
2362 		/* Since this is for the debugger, no locks or any other fun */
2363 		npte = TLBLO_PA_TO_PFN(pa) | PTE_C_CACHE | PTE_D | PTE_V |
2364 		    PTE_G;
2365 		pte = pmap_pte(kernel_pmap, sysm->base);
2366 		*pte = npte;
2367 		sysm->valid1 = 1;
2368 		pmap_update_page(kernel_pmap, sysm->base, npte);
2369 		va = sysm->base;
2370 		intr_restore(intr);
2371 #endif
2372 	}
2373 	return ((void *)va);
2374 }
2375 
2376 void
pmap_kenter_temporary_free(vm_paddr_t pa)2377 pmap_kenter_temporary_free(vm_paddr_t pa)
2378 {
2379 #ifndef __mips_n64    /* XXX : to be converted to new style */
2380 	int cpu;
2381 	register_t intr;
2382 	struct local_sysmaps *sysm;
2383 #endif
2384 
2385 	if (MIPS_DIRECT_MAPPABLE(pa)) {
2386 		/* nothing to do for this case */
2387 		return;
2388 	}
2389 #ifndef __mips_n64    /* XXX : to be converted to new style */
2390 	cpu = PCPU_GET(cpuid);
2391 	sysm = &sysmap_lmem[cpu];
2392 	if (sysm->valid1) {
2393 		pt_entry_t *pte;
2394 
2395 		intr = intr_disable();
2396 		pte = pmap_pte(kernel_pmap, sysm->base);
2397 		*pte = PTE_G;
2398 		pmap_invalidate_page(kernel_pmap, sysm->base);
2399 		intr_restore(intr);
2400 		sysm->valid1 = 0;
2401 	}
2402 #endif
2403 }
2404 
2405 /*
2406  * Maps a sequence of resident pages belonging to the same object.
2407  * The sequence begins with the given page m_start.  This page is
2408  * mapped at the given virtual address start.  Each subsequent page is
2409  * mapped at a virtual address that is offset from start by the same
2410  * amount as the page is offset from m_start within the object.  The
2411  * last page in the sequence is the page with the largest offset from
2412  * m_start that can be mapped at a virtual address less than the given
2413  * virtual address end.  Not every virtual page between start and end
2414  * is mapped; only those for which a resident page exists with the
2415  * corresponding offset from m_start are mapped.
2416  */
2417 void
pmap_enter_object(pmap_t pmap,vm_offset_t start,vm_offset_t end,vm_page_t m_start,vm_prot_t prot)2418 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2419     vm_page_t m_start, vm_prot_t prot)
2420 {
2421 	vm_page_t m, mpte;
2422 	vm_pindex_t diff, psize;
2423 
2424 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
2425 
2426 	psize = atop(end - start);
2427 	mpte = NULL;
2428 	m = m_start;
2429 	rw_wlock(&pvh_global_lock);
2430 	PMAP_LOCK(pmap);
2431 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2432 		mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
2433 		    prot, mpte);
2434 		m = TAILQ_NEXT(m, listq);
2435 	}
2436 	rw_wunlock(&pvh_global_lock);
2437  	PMAP_UNLOCK(pmap);
2438 }
2439 
2440 /*
2441  * pmap_object_init_pt preloads the ptes for a given object
2442  * into the specified pmap.  This eliminates the blast of soft
2443  * faults on process startup and immediately after an mmap.
2444  */
2445 void
pmap_object_init_pt(pmap_t pmap,vm_offset_t addr,vm_object_t object,vm_pindex_t pindex,vm_size_t size)2446 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2447     vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2448 {
2449 	VM_OBJECT_ASSERT_WLOCKED(object);
2450 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2451 	    ("pmap_object_init_pt: non-device object"));
2452 }
2453 
2454 /*
2455  *	Clear the wired attribute from the mappings for the specified range of
2456  *	addresses in the given pmap.  Every valid mapping within that range
2457  *	must have the wired attribute set.  In contrast, invalid mappings
2458  *	cannot have the wired attribute set, so they are ignored.
2459  *
2460  *	The wired attribute of the page table entry is not a hardware feature,
2461  *	so there is no need to invalidate any TLB entries.
2462  */
2463 void
pmap_unwire(pmap_t pmap,vm_offset_t sva,vm_offset_t eva)2464 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2465 {
2466 	pd_entry_t *pde, *pdpe;
2467 	pt_entry_t *pte;
2468 	vm_offset_t va_next;
2469 
2470 	PMAP_LOCK(pmap);
2471 	for (; sva < eva; sva = va_next) {
2472 		pdpe = pmap_segmap(pmap, sva);
2473 #ifdef __mips_n64
2474 		if (*pdpe == NULL) {
2475 			va_next = (sva + NBSEG) & ~SEGMASK;
2476 			if (va_next < sva)
2477 				va_next = eva;
2478 			continue;
2479 		}
2480 #endif
2481 		va_next = (sva + NBPDR) & ~PDRMASK;
2482 		if (va_next < sva)
2483 			va_next = eva;
2484 		pde = pmap_pdpe_to_pde(pdpe, sva);
2485 		if (*pde == NULL)
2486 			continue;
2487 		if (va_next > eva)
2488 			va_next = eva;
2489 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
2490 		    sva += PAGE_SIZE) {
2491 			if (!pte_test(pte, PTE_V))
2492 				continue;
2493 			if (!pte_test(pte, PTE_W))
2494 				panic("pmap_unwire: pte %#jx is missing PG_W",
2495 				    (uintmax_t)*pte);
2496 			pte_clear(pte, PTE_W);
2497 			pmap->pm_stats.wired_count--;
2498 		}
2499 	}
2500 	PMAP_UNLOCK(pmap);
2501 }
2502 
2503 /*
2504  *	Copy the range specified by src_addr/len
2505  *	from the source map to the range dst_addr/len
2506  *	in the destination map.
2507  *
2508  *	This routine is only advisory and need not do anything.
2509  */
2510 
2511 void
pmap_copy(pmap_t dst_pmap,pmap_t src_pmap,vm_offset_t dst_addr,vm_size_t len,vm_offset_t src_addr)2512 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2513     vm_size_t len, vm_offset_t src_addr)
2514 {
2515 }
2516 
2517 /*
2518  *	pmap_zero_page zeros the specified hardware page by mapping
2519  *	the page into KVM and using bzero to clear its contents.
2520  *
2521  * 	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2522  */
2523 void
pmap_zero_page(vm_page_t m)2524 pmap_zero_page(vm_page_t m)
2525 {
2526 	vm_offset_t va;
2527 	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2528 
2529 	if (MIPS_DIRECT_MAPPABLE(phys)) {
2530 		va = MIPS_PHYS_TO_DIRECT(phys);
2531 		bzero((caddr_t)va, PAGE_SIZE);
2532 		mips_dcache_wbinv_range(va, PAGE_SIZE);
2533 	} else {
2534 		va = pmap_lmem_map1(phys);
2535 		bzero((caddr_t)va, PAGE_SIZE);
2536 		mips_dcache_wbinv_range(va, PAGE_SIZE);
2537 		pmap_lmem_unmap();
2538 	}
2539 }
2540 
2541 /*
2542  *	pmap_zero_page_area zeros the specified hardware page by mapping
2543  *	the page into KVM and using bzero to clear its contents.
2544  *
2545  *	off and size may not cover an area beyond a single hardware page.
2546  */
2547 void
pmap_zero_page_area(vm_page_t m,int off,int size)2548 pmap_zero_page_area(vm_page_t m, int off, int size)
2549 {
2550 	vm_offset_t va;
2551 	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2552 
2553 	if (MIPS_DIRECT_MAPPABLE(phys)) {
2554 		va = MIPS_PHYS_TO_DIRECT(phys);
2555 		bzero((char *)(caddr_t)va + off, size);
2556 		mips_dcache_wbinv_range(va + off, size);
2557 	} else {
2558 		va = pmap_lmem_map1(phys);
2559 		bzero((char *)va + off, size);
2560 		mips_dcache_wbinv_range(va + off, size);
2561 		pmap_lmem_unmap();
2562 	}
2563 }
2564 
2565 /*
2566  *	pmap_copy_page copies the specified (machine independent)
2567  *	page by mapping the page into virtual memory and using
2568  *	bcopy to copy the page, one machine dependent page at a
2569  *	time.
2570  *
2571  * 	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2572  */
2573 void
pmap_copy_page(vm_page_t src,vm_page_t dst)2574 pmap_copy_page(vm_page_t src, vm_page_t dst)
2575 {
2576 	vm_offset_t va_src, va_dst;
2577 	vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src);
2578 	vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst);
2579 
2580 	if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) {
2581 		/* easy case, all can be accessed via KSEG0 */
2582 		/*
2583 		 * Flush all caches for VA that are mapped to this page
2584 		 * to make sure that data in SDRAM is up to date
2585 		 */
2586 		pmap_flush_pvcache(src);
2587 		mips_dcache_wbinv_range_index(
2588 		    MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE);
2589 		va_src = MIPS_PHYS_TO_DIRECT(phys_src);
2590 		va_dst = MIPS_PHYS_TO_DIRECT(phys_dst);
2591 		bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2592 		mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2593 	} else {
2594 		va_src = pmap_lmem_map2(phys_src, phys_dst);
2595 		va_dst = va_src + PAGE_SIZE;
2596 		bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2597 		mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2598 		pmap_lmem_unmap();
2599 	}
2600 }
2601 
2602 int unmapped_buf_allowed;
2603 
2604 void
pmap_copy_pages(vm_page_t ma[],vm_offset_t a_offset,vm_page_t mb[],vm_offset_t b_offset,int xfersize)2605 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2606     vm_offset_t b_offset, int xfersize)
2607 {
2608 	char *a_cp, *b_cp;
2609 	vm_page_t a_m, b_m;
2610 	vm_offset_t a_pg_offset, b_pg_offset;
2611 	vm_paddr_t a_phys, b_phys;
2612 	int cnt;
2613 
2614 	while (xfersize > 0) {
2615 		a_pg_offset = a_offset & PAGE_MASK;
2616 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2617 		a_m = ma[a_offset >> PAGE_SHIFT];
2618 		a_phys = VM_PAGE_TO_PHYS(a_m);
2619 		b_pg_offset = b_offset & PAGE_MASK;
2620 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2621 		b_m = mb[b_offset >> PAGE_SHIFT];
2622 		b_phys = VM_PAGE_TO_PHYS(b_m);
2623 		if (MIPS_DIRECT_MAPPABLE(a_phys) &&
2624 		    MIPS_DIRECT_MAPPABLE(b_phys)) {
2625 			pmap_flush_pvcache(a_m);
2626 			mips_dcache_wbinv_range_index(
2627 			    MIPS_PHYS_TO_DIRECT(b_phys), PAGE_SIZE);
2628 			a_cp = (char *)MIPS_PHYS_TO_DIRECT(a_phys) +
2629 			    a_pg_offset;
2630 			b_cp = (char *)MIPS_PHYS_TO_DIRECT(b_phys) +
2631 			    b_pg_offset;
2632 			bcopy(a_cp, b_cp, cnt);
2633 			mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt);
2634 		} else {
2635 			a_cp = (char *)pmap_lmem_map2(a_phys, b_phys);
2636 			b_cp = (char *)a_cp + PAGE_SIZE;
2637 			a_cp += a_pg_offset;
2638 			b_cp += b_pg_offset;
2639 			bcopy(a_cp, b_cp, cnt);
2640 			mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt);
2641 			pmap_lmem_unmap();
2642 		}
2643 		a_offset += cnt;
2644 		b_offset += cnt;
2645 		xfersize -= cnt;
2646 	}
2647 }
2648 
2649 vm_offset_t
pmap_quick_enter_page(vm_page_t m)2650 pmap_quick_enter_page(vm_page_t m)
2651 {
2652 #if defined(__mips_n64)
2653 	return MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
2654 #else
2655 	vm_paddr_t pa;
2656 	struct local_sysmaps *sysm;
2657 	pt_entry_t *pte, npte;
2658 
2659 	pa = VM_PAGE_TO_PHYS(m);
2660 
2661 	if (MIPS_DIRECT_MAPPABLE(pa)) {
2662 		if (pmap_page_get_memattr(m) != VM_MEMATTR_WRITE_BACK)
2663 			return (MIPS_PHYS_TO_DIRECT_UNCACHED(pa));
2664 		else
2665 			return (MIPS_PHYS_TO_DIRECT(pa));
2666 	}
2667 	critical_enter();
2668 	sysm = &sysmap_lmem[PCPU_GET(cpuid)];
2669 
2670 	KASSERT(sysm->valid1 == 0, ("pmap_quick_enter_page: PTE busy"));
2671 
2672 	pte = pmap_pte(kernel_pmap, sysm->base);
2673 	npte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G;
2674 	PMAP_PTE_SET_CACHE_BITS(npte, pa, m);
2675 	*pte = npte;
2676 	sysm->valid1 = 1;
2677 
2678 	return (sysm->base);
2679 #endif
2680 }
2681 
2682 void
pmap_quick_remove_page(vm_offset_t addr)2683 pmap_quick_remove_page(vm_offset_t addr)
2684 {
2685 	mips_dcache_wbinv_range(addr, PAGE_SIZE);
2686 
2687 #if !defined(__mips_n64)
2688 	struct local_sysmaps *sysm;
2689 	pt_entry_t *pte;
2690 
2691 	if (addr >= MIPS_KSEG0_START && addr < MIPS_KSEG0_END)
2692 		return;
2693 
2694 	sysm = &sysmap_lmem[PCPU_GET(cpuid)];
2695 
2696 	KASSERT(sysm->valid1 != 0,
2697 	    ("pmap_quick_remove_page: PTE not in use"));
2698 	KASSERT(sysm->base == addr,
2699 	    ("pmap_quick_remove_page: invalid address"));
2700 
2701 	pte = pmap_pte(kernel_pmap, addr);
2702 	*pte = PTE_G;
2703 	tlb_invalidate_address(kernel_pmap, addr);
2704 	sysm->valid1 = 0;
2705 	critical_exit();
2706 #endif
2707 }
2708 
2709 /*
2710  * Returns true if the pmap's pv is one of the first
2711  * 16 pvs linked to from this page.  This count may
2712  * be changed upwards or downwards in the future; it
2713  * is only necessary that true be returned for a small
2714  * subset of pmaps for proper page aging.
2715  */
2716 boolean_t
pmap_page_exists_quick(pmap_t pmap,vm_page_t m)2717 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2718 {
2719 	pv_entry_t pv;
2720 	int loops = 0;
2721 	boolean_t rv;
2722 
2723 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2724 	    ("pmap_page_exists_quick: page %p is not managed", m));
2725 	rv = FALSE;
2726 	rw_wlock(&pvh_global_lock);
2727 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2728 		if (PV_PMAP(pv) == pmap) {
2729 			rv = TRUE;
2730 			break;
2731 		}
2732 		loops++;
2733 		if (loops >= 16)
2734 			break;
2735 	}
2736 	rw_wunlock(&pvh_global_lock);
2737 	return (rv);
2738 }
2739 
2740 /*
2741  * Remove all pages from specified address space
2742  * this aids process exit speeds.  Also, this code
2743  * is special cased for current process only, but
2744  * can have the more generic (and slightly slower)
2745  * mode enabled.  This is much faster than pmap_remove
2746  * in the case of running down an entire address space.
2747  */
2748 void
pmap_remove_pages(pmap_t pmap)2749 pmap_remove_pages(pmap_t pmap)
2750 {
2751 	pd_entry_t *pde;
2752 	pt_entry_t *pte, tpte;
2753 	pv_entry_t pv;
2754 	vm_page_t m;
2755 	struct pv_chunk *pc, *npc;
2756 	u_long inuse, bitmask;
2757 	int allfree, bit, field, idx;
2758 
2759 	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2760 		printf("warning: pmap_remove_pages called with non-current pmap\n");
2761 		return;
2762 	}
2763 	rw_wlock(&pvh_global_lock);
2764 	PMAP_LOCK(pmap);
2765 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2766 		allfree = 1;
2767 		for (field = 0; field < _NPCM; field++) {
2768 			inuse = ~pc->pc_map[field] & pc_freemask[field];
2769 			while (inuse != 0) {
2770 				bit = ffsl(inuse) - 1;
2771 				bitmask = 1UL << bit;
2772 				idx = field * sizeof(inuse) * NBBY + bit;
2773 				pv = &pc->pc_pventry[idx];
2774 				inuse &= ~bitmask;
2775 
2776 				pde = pmap_pde(pmap, pv->pv_va);
2777 				KASSERT(pde != NULL && *pde != 0,
2778 				    ("pmap_remove_pages: pde"));
2779 				pte = pmap_pde_to_pte(pde, pv->pv_va);
2780 				if (!pte_test(pte, PTE_V))
2781 					panic("pmap_remove_pages: bad pte");
2782 				tpte = *pte;
2783 
2784 /*
2785  * We cannot remove wired pages from a process' mapping at this time
2786  */
2787 				if (pte_test(&tpte, PTE_W)) {
2788 					allfree = 0;
2789 					continue;
2790 				}
2791 				*pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2792 
2793 				m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte));
2794 				KASSERT(m != NULL,
2795 				    ("pmap_remove_pages: bad tpte %#jx",
2796 				    (uintmax_t)tpte));
2797 
2798 				/*
2799 				 * Update the vm_page_t clean and reference bits.
2800 				 */
2801 				if (pte_test(&tpte, PTE_D))
2802 					vm_page_dirty(m);
2803 
2804 				/* Mark free */
2805 				PV_STAT(pv_entry_frees++);
2806 				PV_STAT(pv_entry_spare++);
2807 				pv_entry_count--;
2808 				pc->pc_map[field] |= bitmask;
2809 				pmap->pm_stats.resident_count--;
2810 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2811 				if (TAILQ_EMPTY(&m->md.pv_list))
2812 					vm_page_aflag_clear(m, PGA_WRITEABLE);
2813 				pmap_unuse_pt(pmap, pv->pv_va, *pde);
2814 			}
2815 		}
2816 		if (allfree) {
2817 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2818 			free_pv_chunk(pc);
2819 		}
2820 	}
2821 	pmap_invalidate_all(pmap);
2822 	PMAP_UNLOCK(pmap);
2823 	rw_wunlock(&pvh_global_lock);
2824 }
2825 
2826 /*
2827  * pmap_testbit tests bits in pte's
2828  */
2829 static boolean_t
pmap_testbit(vm_page_t m,int bit)2830 pmap_testbit(vm_page_t m, int bit)
2831 {
2832 	pv_entry_t pv;
2833 	pmap_t pmap;
2834 	pt_entry_t *pte;
2835 	boolean_t rv = FALSE;
2836 
2837 	if (m->oflags & VPO_UNMANAGED)
2838 		return (rv);
2839 
2840 	rw_assert(&pvh_global_lock, RA_WLOCKED);
2841 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2842 		pmap = PV_PMAP(pv);
2843 		PMAP_LOCK(pmap);
2844 		pte = pmap_pte(pmap, pv->pv_va);
2845 		rv = pte_test(pte, bit);
2846 		PMAP_UNLOCK(pmap);
2847 		if (rv)
2848 			break;
2849 	}
2850 	return (rv);
2851 }
2852 
2853 /*
2854  *	pmap_page_wired_mappings:
2855  *
2856  *	Return the number of managed mappings to the given physical page
2857  *	that are wired.
2858  */
2859 int
pmap_page_wired_mappings(vm_page_t m)2860 pmap_page_wired_mappings(vm_page_t m)
2861 {
2862 	pv_entry_t pv;
2863 	pmap_t pmap;
2864 	pt_entry_t *pte;
2865 	int count;
2866 
2867 	count = 0;
2868 	if ((m->oflags & VPO_UNMANAGED) != 0)
2869 		return (count);
2870 	rw_wlock(&pvh_global_lock);
2871 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2872 		pmap = PV_PMAP(pv);
2873 		PMAP_LOCK(pmap);
2874 		pte = pmap_pte(pmap, pv->pv_va);
2875 		if (pte_test(pte, PTE_W))
2876 			count++;
2877 		PMAP_UNLOCK(pmap);
2878 	}
2879 	rw_wunlock(&pvh_global_lock);
2880 	return (count);
2881 }
2882 
2883 /*
2884  * Clear the write and modified bits in each of the given page's mappings.
2885  */
2886 void
pmap_remove_write(vm_page_t m)2887 pmap_remove_write(vm_page_t m)
2888 {
2889 	pmap_t pmap;
2890 	pt_entry_t pbits, *pte;
2891 	pv_entry_t pv;
2892 
2893 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2894 	    ("pmap_remove_write: page %p is not managed", m));
2895 
2896 	/*
2897 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2898 	 * set by another thread while the object is locked.  Thus,
2899 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2900 	 */
2901 	VM_OBJECT_ASSERT_WLOCKED(m->object);
2902 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2903 		return;
2904 	rw_wlock(&pvh_global_lock);
2905 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2906 		pmap = PV_PMAP(pv);
2907 		PMAP_LOCK(pmap);
2908 		pte = pmap_pte(pmap, pv->pv_va);
2909 		KASSERT(pte != NULL && pte_test(pte, PTE_V),
2910 		    ("page on pv_list has no pte"));
2911 		pbits = *pte;
2912 		if (pte_test(&pbits, PTE_D)) {
2913 			pte_clear(&pbits, PTE_D);
2914 			vm_page_dirty(m);
2915 		}
2916 		pte_set(&pbits, PTE_RO);
2917 		if (pbits != *pte) {
2918 			*pte = pbits;
2919 			pmap_update_page(pmap, pv->pv_va, pbits);
2920 		}
2921 		PMAP_UNLOCK(pmap);
2922 	}
2923 	vm_page_aflag_clear(m, PGA_WRITEABLE);
2924 	rw_wunlock(&pvh_global_lock);
2925 }
2926 
2927 /*
2928  *	pmap_ts_referenced:
2929  *
2930  *	Return the count of reference bits for a page, clearing all of them.
2931  */
2932 int
pmap_ts_referenced(vm_page_t m)2933 pmap_ts_referenced(vm_page_t m)
2934 {
2935 
2936 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2937 	    ("pmap_ts_referenced: page %p is not managed", m));
2938 	if (m->md.pv_flags & PV_TABLE_REF) {
2939 		rw_wlock(&pvh_global_lock);
2940 		m->md.pv_flags &= ~PV_TABLE_REF;
2941 		rw_wunlock(&pvh_global_lock);
2942 		return (1);
2943 	}
2944 	return (0);
2945 }
2946 
2947 /*
2948  *	pmap_is_modified:
2949  *
2950  *	Return whether or not the specified physical page was modified
2951  *	in any physical maps.
2952  */
2953 boolean_t
pmap_is_modified(vm_page_t m)2954 pmap_is_modified(vm_page_t m)
2955 {
2956 	boolean_t rv;
2957 
2958 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2959 	    ("pmap_is_modified: page %p is not managed", m));
2960 
2961 	/*
2962 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2963 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2964 	 * is clear, no PTEs can have PTE_D set.
2965 	 */
2966 	VM_OBJECT_ASSERT_WLOCKED(m->object);
2967 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2968 		return (FALSE);
2969 	rw_wlock(&pvh_global_lock);
2970 	rv = pmap_testbit(m, PTE_D);
2971 	rw_wunlock(&pvh_global_lock);
2972 	return (rv);
2973 }
2974 
2975 /* N/C */
2976 
2977 /*
2978  *	pmap_is_prefaultable:
2979  *
2980  *	Return whether or not the specified virtual address is elgible
2981  *	for prefault.
2982  */
2983 boolean_t
pmap_is_prefaultable(pmap_t pmap,vm_offset_t addr)2984 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2985 {
2986 	pd_entry_t *pde;
2987 	pt_entry_t *pte;
2988 	boolean_t rv;
2989 
2990 	rv = FALSE;
2991 	PMAP_LOCK(pmap);
2992 	pde = pmap_pde(pmap, addr);
2993 	if (pde != NULL && *pde != 0) {
2994 		pte = pmap_pde_to_pte(pde, addr);
2995 		rv = (*pte == 0);
2996 	}
2997 	PMAP_UNLOCK(pmap);
2998 	return (rv);
2999 }
3000 
3001 /*
3002  *	Apply the given advice to the specified range of addresses within the
3003  *	given pmap.  Depending on the advice, clear the referenced and/or
3004  *	modified flags in each mapping and set the mapped page's dirty field.
3005  */
3006 void
pmap_advise(pmap_t pmap,vm_offset_t sva,vm_offset_t eva,int advice)3007 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
3008 {
3009 	pd_entry_t *pde, *pdpe;
3010 	pt_entry_t *pte;
3011 	vm_offset_t va, va_next;
3012 	vm_paddr_t pa;
3013 	vm_page_t m;
3014 
3015 	if (advice != MADV_DONTNEED && advice != MADV_FREE)
3016 		return;
3017 	rw_wlock(&pvh_global_lock);
3018 	PMAP_LOCK(pmap);
3019 	for (; sva < eva; sva = va_next) {
3020 		pdpe = pmap_segmap(pmap, sva);
3021 #ifdef __mips_n64
3022 		if (*pdpe == 0) {
3023 			va_next = (sva + NBSEG) & ~SEGMASK;
3024 			if (va_next < sva)
3025 				va_next = eva;
3026 			continue;
3027 		}
3028 #endif
3029 		va_next = (sva + NBPDR) & ~PDRMASK;
3030 		if (va_next < sva)
3031 			va_next = eva;
3032 
3033 		pde = pmap_pdpe_to_pde(pdpe, sva);
3034 		if (*pde == NULL)
3035 			continue;
3036 
3037 		/*
3038 		 * Limit our scan to either the end of the va represented
3039 		 * by the current page table page, or to the end of the
3040 		 * range being write protected.
3041 		 */
3042 		if (va_next > eva)
3043 			va_next = eva;
3044 
3045 		va = va_next;
3046 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
3047 		    sva += PAGE_SIZE) {
3048 			if (!pte_test(pte, PTE_MANAGED | PTE_V)) {
3049 				if (va != va_next) {
3050 					pmap_invalidate_range(pmap, va, sva);
3051 					va = va_next;
3052 				}
3053 				continue;
3054 			}
3055 			pa = TLBLO_PTE_TO_PA(*pte);
3056 			m = PHYS_TO_VM_PAGE(pa);
3057 			m->md.pv_flags &= ~PV_TABLE_REF;
3058 			if (pte_test(pte, PTE_D)) {
3059 				if (advice == MADV_DONTNEED) {
3060 					/*
3061 					 * Future calls to pmap_is_modified()
3062 					 * can be avoided by making the page
3063 					 * dirty now.
3064 					 */
3065 					vm_page_dirty(m);
3066 				} else {
3067 					pte_clear(pte, PTE_D);
3068 					if (va == va_next)
3069 						va = sva;
3070 				}
3071 			} else {
3072 				/*
3073 				 * Unless PTE_D is set, any TLB entries
3074 				 * mapping "sva" don't allow write access, so
3075 				 * they needn't be invalidated.
3076 				 */
3077 				if (va != va_next) {
3078 					pmap_invalidate_range(pmap, va, sva);
3079 					va = va_next;
3080 				}
3081 			}
3082 		}
3083 		if (va != va_next)
3084 			pmap_invalidate_range(pmap, va, sva);
3085 	}
3086 	rw_wunlock(&pvh_global_lock);
3087 	PMAP_UNLOCK(pmap);
3088 }
3089 
3090 /*
3091  *	Clear the modify bits on the specified physical page.
3092  */
3093 void
pmap_clear_modify(vm_page_t m)3094 pmap_clear_modify(vm_page_t m)
3095 {
3096 	pmap_t pmap;
3097 	pt_entry_t *pte;
3098 	pv_entry_t pv;
3099 
3100 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3101 	    ("pmap_clear_modify: page %p is not managed", m));
3102 	VM_OBJECT_ASSERT_WLOCKED(m->object);
3103 	KASSERT(!vm_page_xbusied(m),
3104 	    ("pmap_clear_modify: page %p is exclusive busied", m));
3105 
3106 	/*
3107 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set.
3108 	 * If the object containing the page is locked and the page is not
3109 	 * write busied, then PGA_WRITEABLE cannot be concurrently set.
3110 	 */
3111 	if ((m->aflags & PGA_WRITEABLE) == 0)
3112 		return;
3113 	rw_wlock(&pvh_global_lock);
3114 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
3115 		pmap = PV_PMAP(pv);
3116 		PMAP_LOCK(pmap);
3117 		pte = pmap_pte(pmap, pv->pv_va);
3118 		if (pte_test(pte, PTE_D)) {
3119 			pte_clear(pte, PTE_D);
3120 			pmap_update_page(pmap, pv->pv_va, *pte);
3121 		}
3122 		PMAP_UNLOCK(pmap);
3123 	}
3124 	rw_wunlock(&pvh_global_lock);
3125 }
3126 
3127 /*
3128  *	pmap_is_referenced:
3129  *
3130  *	Return whether or not the specified physical page was referenced
3131  *	in any physical maps.
3132  */
3133 boolean_t
pmap_is_referenced(vm_page_t m)3134 pmap_is_referenced(vm_page_t m)
3135 {
3136 
3137 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3138 	    ("pmap_is_referenced: page %p is not managed", m));
3139 	return ((m->md.pv_flags & PV_TABLE_REF) != 0);
3140 }
3141 
3142 /*
3143  * Miscellaneous support routines follow
3144  */
3145 
3146 /*
3147  * Map a set of physical memory pages into the kernel virtual
3148  * address space. Return a pointer to where it is mapped. This
3149  * routine is intended to be used for mapping device memory,
3150  * NOT real memory.
3151  *
3152  * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit.
3153  */
3154 void *
pmap_mapdev_attr(vm_paddr_t pa,vm_size_t size,vm_memattr_t ma)3155 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
3156 {
3157         vm_offset_t va, tmpva, offset;
3158 
3159 	/*
3160 	 * KSEG1 maps only first 512M of phys address space. For
3161 	 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
3162 	 */
3163 	if (MIPS_DIRECT_MAPPABLE(pa + size - 1) && ma == VM_MEMATTR_UNCACHEABLE)
3164 		return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa));
3165 	else {
3166 		offset = pa & PAGE_MASK;
3167 		size = roundup(size + offset, PAGE_SIZE);
3168 
3169 		va = kva_alloc(size);
3170 		if (!va)
3171 			panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3172 		pa = trunc_page(pa);
3173 		for (tmpva = va; size > 0;) {
3174 			pmap_kenter_attr(tmpva, pa, ma);
3175 			size -= PAGE_SIZE;
3176 			tmpva += PAGE_SIZE;
3177 			pa += PAGE_SIZE;
3178 		}
3179 	}
3180 
3181 	return ((void *)(va + offset));
3182 }
3183 
3184 void *
pmap_mapdev(vm_paddr_t pa,vm_size_t size)3185 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
3186 {
3187 	return pmap_mapdev_attr(pa, size, VM_MEMATTR_UNCACHEABLE);
3188 }
3189 
3190 void
pmap_unmapdev(vm_offset_t va,vm_size_t size)3191 pmap_unmapdev(vm_offset_t va, vm_size_t size)
3192 {
3193 #ifndef __mips_n64
3194 	vm_offset_t base, offset;
3195 
3196 	/* If the address is within KSEG1 then there is nothing to do */
3197 	if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END)
3198 		return;
3199 
3200 	base = trunc_page(va);
3201 	offset = va & PAGE_MASK;
3202 	size = roundup(size + offset, PAGE_SIZE);
3203 	kva_free(base, size);
3204 #endif
3205 }
3206 
3207 /*
3208  * perform the pmap work for mincore
3209  */
3210 int
pmap_mincore(pmap_t pmap,vm_offset_t addr,vm_paddr_t * locked_pa)3211 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3212 {
3213 	pt_entry_t *ptep, pte;
3214 	vm_paddr_t pa;
3215 	vm_page_t m;
3216 	int val;
3217 
3218 	PMAP_LOCK(pmap);
3219 retry:
3220 	ptep = pmap_pte(pmap, addr);
3221 	pte = (ptep != NULL) ? *ptep : 0;
3222 	if (!pte_test(&pte, PTE_V)) {
3223 		val = 0;
3224 		goto out;
3225 	}
3226 	val = MINCORE_INCORE;
3227 	if (pte_test(&pte, PTE_D))
3228 		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3229 	pa = TLBLO_PTE_TO_PA(pte);
3230 	if (pte_test(&pte, PTE_MANAGED)) {
3231 		/*
3232 		 * This may falsely report the given address as
3233 		 * MINCORE_REFERENCED.  Unfortunately, due to the lack of
3234 		 * per-PTE reference information, it is impossible to
3235 		 * determine if the address is MINCORE_REFERENCED.
3236 		 */
3237 		m = PHYS_TO_VM_PAGE(pa);
3238 		if ((m->aflags & PGA_REFERENCED) != 0)
3239 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3240 	}
3241 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3242 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
3243 	    pte_test(&pte, PTE_MANAGED)) {
3244 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3245 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3246 			goto retry;
3247 	} else
3248 out:
3249 		PA_UNLOCK_COND(*locked_pa);
3250 	PMAP_UNLOCK(pmap);
3251 	return (val);
3252 }
3253 
3254 void
pmap_activate(struct thread * td)3255 pmap_activate(struct thread *td)
3256 {
3257 	pmap_t pmap, oldpmap;
3258 	struct proc *p = td->td_proc;
3259 	u_int cpuid;
3260 
3261 	critical_enter();
3262 
3263 	pmap = vmspace_pmap(p->p_vmspace);
3264 	oldpmap = PCPU_GET(curpmap);
3265 	cpuid = PCPU_GET(cpuid);
3266 
3267 	if (oldpmap)
3268 		CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
3269 	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
3270 	pmap_asid_alloc(pmap);
3271 	if (td == curthread) {
3272 		PCPU_SET(segbase, pmap->pm_segtab);
3273 		mips_wr_entryhi(pmap->pm_asid[cpuid].asid);
3274 	}
3275 
3276 	PCPU_SET(curpmap, pmap);
3277 	critical_exit();
3278 }
3279 
3280 static void
pmap_sync_icache_one(void * arg __unused)3281 pmap_sync_icache_one(void *arg __unused)
3282 {
3283 
3284 	mips_icache_sync_all();
3285 	mips_dcache_wbinv_all();
3286 }
3287 
3288 void
pmap_sync_icache(pmap_t pm,vm_offset_t va,vm_size_t sz)3289 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3290 {
3291 
3292 	smp_rendezvous(NULL, pmap_sync_icache_one, NULL, NULL);
3293 }
3294 
3295 /*
3296  *	Increase the starting virtual address of the given mapping if a
3297  *	different alignment might result in more superpage mappings.
3298  */
3299 void
pmap_align_superpage(vm_object_t object,vm_ooffset_t offset,vm_offset_t * addr,vm_size_t size)3300 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3301     vm_offset_t *addr, vm_size_t size)
3302 {
3303 	vm_offset_t superpage_offset;
3304 
3305 	if (size < PDRSIZE)
3306 		return;
3307 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3308 		offset += ptoa(object->pg_color);
3309 	superpage_offset = offset & PDRMASK;
3310 	if (size - ((PDRSIZE - superpage_offset) & PDRMASK) < PDRSIZE ||
3311 	    (*addr & PDRMASK) == superpage_offset)
3312 		return;
3313 	if ((*addr & PDRMASK) < superpage_offset)
3314 		*addr = (*addr & ~PDRMASK) + superpage_offset;
3315 	else
3316 		*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
3317 }
3318 
3319 #ifdef DDB
DB_SHOW_COMMAND(ptable,ddb_pid_dump)3320 DB_SHOW_COMMAND(ptable, ddb_pid_dump)
3321 {
3322 	pmap_t pmap;
3323 	struct thread *td = NULL;
3324 	struct proc *p;
3325 	int i, j, k;
3326 	vm_paddr_t pa;
3327 	vm_offset_t va;
3328 
3329 	if (have_addr) {
3330 		td = db_lookup_thread(addr, true);
3331 		if (td == NULL) {
3332 			db_printf("Invalid pid or tid");
3333 			return;
3334 		}
3335 		p = td->td_proc;
3336 		if (p->p_vmspace == NULL) {
3337 			db_printf("No vmspace for process");
3338 			return;
3339 		}
3340 			pmap = vmspace_pmap(p->p_vmspace);
3341 	} else
3342 		pmap = kernel_pmap;
3343 
3344 	db_printf("pmap:%p segtab:%p asid:%x generation:%x\n",
3345 	    pmap, pmap->pm_segtab, pmap->pm_asid[0].asid,
3346 	    pmap->pm_asid[0].gen);
3347 	for (i = 0; i < NPDEPG; i++) {
3348 		pd_entry_t *pdpe;
3349 		pt_entry_t *pde;
3350 		pt_entry_t pte;
3351 
3352 		pdpe = (pd_entry_t *)pmap->pm_segtab[i];
3353 		if (pdpe == NULL)
3354 			continue;
3355 		db_printf("[%4d] %p\n", i, pdpe);
3356 #ifdef __mips_n64
3357 		for (j = 0; j < NPDEPG; j++) {
3358 			pde = (pt_entry_t *)pdpe[j];
3359 			if (pde == NULL)
3360 				continue;
3361 			db_printf("\t[%4d] %p\n", j, pde);
3362 #else
3363 		{
3364 			j = 0;
3365 			pde =  (pt_entry_t *)pdpe;
3366 #endif
3367 			for (k = 0; k < NPTEPG; k++) {
3368 				pte = pde[k];
3369 				if (pte == 0 || !pte_test(&pte, PTE_V))
3370 					continue;
3371 				pa = TLBLO_PTE_TO_PA(pte);
3372 				va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT);
3373 				db_printf("\t\t[%04d] va: %p pte: %8jx pa:%jx\n",
3374 				       k, (void *)va, (uintmax_t)pte, (uintmax_t)pa);
3375 			}
3376 		}
3377 	}
3378 }
3379 #endif
3380 
3381 /*
3382  * Allocate TLB address space tag (called ASID or TLBPID) and return it.
3383  * It takes almost as much or more time to search the TLB for a
3384  * specific ASID and flush those entries as it does to flush the entire TLB.
3385  * Therefore, when we allocate a new ASID, we just take the next number. When
3386  * we run out of numbers, we flush the TLB, increment the generation count
3387  * and start over. ASID zero is reserved for kernel use.
3388  */
3389 static void
3390 pmap_asid_alloc(pmap)
3391 	pmap_t pmap;
3392 {
3393 	if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
3394 	    pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
3395 	else {
3396 		if (PCPU_GET(next_asid) == pmap_max_asid) {
3397 			tlb_invalidate_all_user(NULL);
3398 			PCPU_SET(asid_generation,
3399 			    (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
3400 			if (PCPU_GET(asid_generation) == 0) {
3401 				PCPU_SET(asid_generation, 1);
3402 			}
3403 			PCPU_SET(next_asid, 1);	/* 0 means invalid */
3404 		}
3405 		pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3406 		pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3407 		PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3408 	}
3409 }
3410 
3411 static pt_entry_t
3412 init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot)
3413 {
3414 	pt_entry_t rw;
3415 
3416 	if (!(prot & VM_PROT_WRITE))
3417 		rw = PTE_V | PTE_RO;
3418 	else if ((m->oflags & VPO_UNMANAGED) == 0) {
3419 		if ((access & VM_PROT_WRITE) != 0)
3420 			rw = PTE_V | PTE_D;
3421 		else
3422 			rw = PTE_V;
3423 	} else
3424 		/* Needn't emulate a modified bit for unmanaged pages. */
3425 		rw = PTE_V | PTE_D;
3426 	return (rw);
3427 }
3428 
3429 /*
3430  * pmap_emulate_modified : do dirty bit emulation
3431  *
3432  * On SMP, update just the local TLB, other CPUs will update their
3433  * TLBs from PTE lazily, if they get the exception.
3434  * Returns 0 in case of sucess, 1 if the page is read only and we
3435  * need to fault.
3436  */
3437 int
3438 pmap_emulate_modified(pmap_t pmap, vm_offset_t va)
3439 {
3440 	pt_entry_t *pte;
3441 
3442 	PMAP_LOCK(pmap);
3443 	pte = pmap_pte(pmap, va);
3444 	if (pte == NULL)
3445 		panic("pmap_emulate_modified: can't find PTE");
3446 #ifdef SMP
3447 	/* It is possible that some other CPU changed m-bit */
3448 	if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) {
3449 		tlb_update(pmap, va, *pte);
3450 		PMAP_UNLOCK(pmap);
3451 		return (0);
3452 	}
3453 #else
3454 	if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D))
3455 		panic("pmap_emulate_modified: invalid pte");
3456 #endif
3457 	if (pte_test(pte, PTE_RO)) {
3458 		PMAP_UNLOCK(pmap);
3459 		return (1);
3460 	}
3461 	pte_set(pte, PTE_D);
3462 	tlb_update(pmap, va, *pte);
3463 	if (!pte_test(pte, PTE_MANAGED))
3464 		panic("pmap_emulate_modified: unmanaged page");
3465 	PMAP_UNLOCK(pmap);
3466 	return (0);
3467 }
3468 
3469 /*
3470  *	Routine:	pmap_kextract
3471  *	Function:
3472  *		Extract the physical page address associated
3473  *		virtual address.
3474  */
3475 vm_paddr_t
3476 pmap_kextract(vm_offset_t va)
3477 {
3478 	int mapped;
3479 
3480 	/*
3481 	 * First, the direct-mapped regions.
3482 	 */
3483 #if defined(__mips_n64)
3484 	if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END)
3485 		return (MIPS_XKPHYS_TO_PHYS(va));
3486 #endif
3487 	if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END)
3488 		return (MIPS_KSEG0_TO_PHYS(va));
3489 
3490 	if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END)
3491 		return (MIPS_KSEG1_TO_PHYS(va));
3492 
3493 	/*
3494 	 * User virtual addresses.
3495 	 */
3496 	if (va < VM_MAXUSER_ADDRESS) {
3497 		pt_entry_t *ptep;
3498 
3499 		if (curproc && curproc->p_vmspace) {
3500 			ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3501 			if (ptep) {
3502 				return (TLBLO_PTE_TO_PA(*ptep) |
3503 				    (va & PAGE_MASK));
3504 			}
3505 			return (0);
3506 		}
3507 	}
3508 
3509 	/*
3510 	 * Should be kernel virtual here, otherwise fail
3511 	 */
3512 	mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END);
3513 #if defined(__mips_n64)
3514 	mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END);
3515 #endif
3516 	/*
3517 	 * Kernel virtual.
3518 	 */
3519 
3520 	if (mapped) {
3521 		pt_entry_t *ptep;
3522 
3523 		/* Is the kernel pmap initialized? */
3524 		if (!CPU_EMPTY(&kernel_pmap->pm_active)) {
3525 			/* It's inside the virtual address range */
3526 			ptep = pmap_pte(kernel_pmap, va);
3527 			if (ptep) {
3528 				return (TLBLO_PTE_TO_PA(*ptep) |
3529 				    (va & PAGE_MASK));
3530 			}
3531 		}
3532 		return (0);
3533 	}
3534 
3535 	panic("%s for unknown address space %p.", __func__, (void *)va);
3536 }
3537 
3538 
3539 void
3540 pmap_flush_pvcache(vm_page_t m)
3541 {
3542 	pv_entry_t pv;
3543 
3544 	if (m != NULL) {
3545 		for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3546 		    pv = TAILQ_NEXT(pv, pv_list)) {
3547 			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
3548 		}
3549 	}
3550 }
3551 
3552 void
3553 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
3554 {
3555 
3556 	/*
3557 	 * It appears that this function can only be called before any mappings
3558 	 * for the page are established.  If this ever changes, this code will
3559 	 * need to walk the pv_list and make each of the existing mappings
3560 	 * uncacheable, being careful to sync caches and PTEs (and maybe
3561 	 * invalidate TLB?) for any current mapping it modifies.
3562 	 */
3563 	if (TAILQ_FIRST(&m->md.pv_list) != NULL)
3564 		panic("Can't change memattr on page with existing mappings");
3565 
3566 	/* Clean memattr portion of pv_flags */
3567 	m->md.pv_flags &= ~PV_MEMATTR_MASK;
3568 	m->md.pv_flags |= (ma << PV_MEMATTR_SHIFT) & PV_MEMATTR_MASK;
3569 }
3570 
3571 static __inline void
3572 pmap_pte_attr(pt_entry_t *pte, vm_memattr_t ma)
3573 {
3574 	u_int npte;
3575 
3576 	npte = *(u_int *)pte;
3577 	npte &= ~PTE_C_MASK;
3578 	npte |= PTE_C(ma);
3579 	*pte = npte;
3580 }
3581 
3582 int
3583 pmap_change_attr(vm_offset_t sva, vm_size_t size, vm_memattr_t ma)
3584 {
3585 	pd_entry_t *pde, *pdpe;
3586 	pt_entry_t *pte;
3587 	vm_offset_t ova, eva, va, va_next;
3588 	pmap_t pmap;
3589 
3590 	ova = sva;
3591 	eva = sva + size;
3592 	if (eva < sva)
3593 		return (EINVAL);
3594 
3595 	pmap = kernel_pmap;
3596 	PMAP_LOCK(pmap);
3597 
3598 	for (; sva < eva; sva = va_next) {
3599 		pdpe = pmap_segmap(pmap, sva);
3600 #ifdef __mips_n64
3601 		if (*pdpe == 0) {
3602 			va_next = (sva + NBSEG) & ~SEGMASK;
3603 			if (va_next < sva)
3604 				va_next = eva;
3605 			continue;
3606 		}
3607 #endif
3608 		va_next = (sva + NBPDR) & ~PDRMASK;
3609 		if (va_next < sva)
3610 			va_next = eva;
3611 
3612 		pde = pmap_pdpe_to_pde(pdpe, sva);
3613 		if (*pde == NULL)
3614 			continue;
3615 
3616 		/*
3617 		 * Limit our scan to either the end of the va represented
3618 		 * by the current page table page, or to the end of the
3619 		 * range being removed.
3620 		 */
3621 		if (va_next > eva)
3622 			va_next = eva;
3623 
3624 		va = va_next;
3625 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
3626 		    sva += PAGE_SIZE) {
3627 			if (!pte_test(pte, PTE_V) || pte_cache_bits(pte) == ma) {
3628 				if (va != va_next) {
3629 					pmap_invalidate_range(pmap, va, sva);
3630 					va = va_next;
3631 				}
3632 				continue;
3633 			}
3634 			if (va == va_next)
3635 				va = sva;
3636 
3637 			pmap_pte_attr(pte, ma);
3638 		}
3639 		if (va != va_next)
3640 			pmap_invalidate_range(pmap, va, sva);
3641 	}
3642 	PMAP_UNLOCK(pmap);
3643 
3644 	/* Flush caches to be in the safe side */
3645 	mips_dcache_wbinv_range(ova, size);
3646 	return 0;
3647 }
3648 
3649 boolean_t
3650 pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
3651 {
3652 
3653 	switch (mode) {
3654 	case VM_MEMATTR_UNCACHEABLE:
3655 	case VM_MEMATTR_WRITE_BACK:
3656 #ifdef MIPS_CCA_WC
3657 	case VM_MEMATTR_WRITE_COMBINING:
3658 #endif
3659 		return (TRUE);
3660 	default:
3661 		return (FALSE);
3662 	}
3663 }
3664