xref: /f-stack/freebsd/vm/vm_phys.c (revision 22ce4aff)
1a9643ea8Slogwang /*-
2*22ce4affSfengbojiang  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*22ce4affSfengbojiang  *
4a9643ea8Slogwang  * Copyright (c) 2002-2006 Rice University
5a9643ea8Slogwang  * Copyright (c) 2007 Alan L. Cox <[email protected]>
6a9643ea8Slogwang  * All rights reserved.
7a9643ea8Slogwang  *
8a9643ea8Slogwang  * This software was developed for the FreeBSD Project by Alan L. Cox,
9a9643ea8Slogwang  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
10a9643ea8Slogwang  *
11a9643ea8Slogwang  * Redistribution and use in source and binary forms, with or without
12a9643ea8Slogwang  * modification, are permitted provided that the following conditions
13a9643ea8Slogwang  * are met:
14a9643ea8Slogwang  * 1. Redistributions of source code must retain the above copyright
15a9643ea8Slogwang  *    notice, this list of conditions and the following disclaimer.
16a9643ea8Slogwang  * 2. Redistributions in binary form must reproduce the above copyright
17a9643ea8Slogwang  *    notice, this list of conditions and the following disclaimer in the
18a9643ea8Slogwang  *    documentation and/or other materials provided with the distribution.
19a9643ea8Slogwang  *
20a9643ea8Slogwang  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21a9643ea8Slogwang  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22a9643ea8Slogwang  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23a9643ea8Slogwang  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
24a9643ea8Slogwang  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25a9643ea8Slogwang  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26a9643ea8Slogwang  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
27a9643ea8Slogwang  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28a9643ea8Slogwang  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29a9643ea8Slogwang  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
30a9643ea8Slogwang  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31a9643ea8Slogwang  * POSSIBILITY OF SUCH DAMAGE.
32a9643ea8Slogwang  */
33a9643ea8Slogwang 
34a9643ea8Slogwang /*
35a9643ea8Slogwang  *	Physical memory system implementation
36a9643ea8Slogwang  *
37a9643ea8Slogwang  * Any external functions defined by this module are only to be used by the
38a9643ea8Slogwang  * virtual memory system.
39a9643ea8Slogwang  */
40a9643ea8Slogwang 
41a9643ea8Slogwang #include <sys/cdefs.h>
42a9643ea8Slogwang __FBSDID("$FreeBSD$");
43a9643ea8Slogwang 
44a9643ea8Slogwang #include "opt_ddb.h"
45a9643ea8Slogwang #include "opt_vm.h"
46a9643ea8Slogwang 
47a9643ea8Slogwang #include <sys/param.h>
48a9643ea8Slogwang #include <sys/systm.h>
49*22ce4affSfengbojiang #include <sys/domainset.h>
50a9643ea8Slogwang #include <sys/lock.h>
51a9643ea8Slogwang #include <sys/kernel.h>
52a9643ea8Slogwang #include <sys/malloc.h>
53a9643ea8Slogwang #include <sys/mutex.h>
54a9643ea8Slogwang #include <sys/proc.h>
55a9643ea8Slogwang #include <sys/queue.h>
56a9643ea8Slogwang #include <sys/rwlock.h>
57a9643ea8Slogwang #include <sys/sbuf.h>
58a9643ea8Slogwang #include <sys/sysctl.h>
59a9643ea8Slogwang #include <sys/tree.h>
60a9643ea8Slogwang #include <sys/vmmeter.h>
61a9643ea8Slogwang 
62a9643ea8Slogwang #include <ddb/ddb.h>
63a9643ea8Slogwang 
64a9643ea8Slogwang #include <vm/vm.h>
65a9643ea8Slogwang #include <vm/vm_param.h>
66a9643ea8Slogwang #include <vm/vm_kern.h>
67a9643ea8Slogwang #include <vm/vm_object.h>
68a9643ea8Slogwang #include <vm/vm_page.h>
69a9643ea8Slogwang #include <vm/vm_phys.h>
70*22ce4affSfengbojiang #include <vm/vm_pagequeue.h>
71a9643ea8Slogwang 
72a9643ea8Slogwang _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
73a9643ea8Slogwang     "Too many physsegs.");
74a9643ea8Slogwang 
75*22ce4affSfengbojiang #ifdef NUMA
76*22ce4affSfengbojiang struct mem_affinity __read_mostly *mem_affinity;
77*22ce4affSfengbojiang int __read_mostly *mem_locality;
78a9643ea8Slogwang #endif
79a9643ea8Slogwang 
80*22ce4affSfengbojiang int __read_mostly vm_ndomains = 1;
81*22ce4affSfengbojiang domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1);
82a9643ea8Slogwang 
83*22ce4affSfengbojiang struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX];
84*22ce4affSfengbojiang int __read_mostly vm_phys_nsegs;
85*22ce4affSfengbojiang static struct vm_phys_seg vm_phys_early_segs[8];
86*22ce4affSfengbojiang static int vm_phys_early_nsegs;
87a9643ea8Slogwang 
88a9643ea8Slogwang struct vm_phys_fictitious_seg;
89a9643ea8Slogwang static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
90a9643ea8Slogwang     struct vm_phys_fictitious_seg *);
91a9643ea8Slogwang 
92a9643ea8Slogwang RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
93*22ce4affSfengbojiang     RB_INITIALIZER(&vm_phys_fictitious_tree);
94a9643ea8Slogwang 
95a9643ea8Slogwang struct vm_phys_fictitious_seg {
96a9643ea8Slogwang 	RB_ENTRY(vm_phys_fictitious_seg) node;
97a9643ea8Slogwang 	/* Memory region data */
98a9643ea8Slogwang 	vm_paddr_t	start;
99a9643ea8Slogwang 	vm_paddr_t	end;
100a9643ea8Slogwang 	vm_page_t	first_page;
101a9643ea8Slogwang };
102a9643ea8Slogwang 
103a9643ea8Slogwang RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
104a9643ea8Slogwang     vm_phys_fictitious_cmp);
105a9643ea8Slogwang 
106*22ce4affSfengbojiang static struct rwlock_padalign vm_phys_fictitious_reg_lock;
107a9643ea8Slogwang MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
108a9643ea8Slogwang 
109*22ce4affSfengbojiang static struct vm_freelist __aligned(CACHE_LINE_SIZE)
110*22ce4affSfengbojiang     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL]
111*22ce4affSfengbojiang     [VM_NFREEORDER_MAX];
112a9643ea8Slogwang 
113*22ce4affSfengbojiang static int __read_mostly vm_nfreelists;
114*22ce4affSfengbojiang 
115*22ce4affSfengbojiang /*
116*22ce4affSfengbojiang  * These "avail lists" are globals used to communicate boot-time physical
117*22ce4affSfengbojiang  * memory layout to other parts of the kernel.  Each physically contiguous
118*22ce4affSfengbojiang  * region of memory is defined by a start address at an even index and an
119*22ce4affSfengbojiang  * end address at the following odd index.  Each list is terminated by a
120*22ce4affSfengbojiang  * pair of zero entries.
121*22ce4affSfengbojiang  *
122*22ce4affSfengbojiang  * dump_avail tells the dump code what regions to include in a crash dump, and
123*22ce4affSfengbojiang  * phys_avail is all of the remaining physical memory that is available for
124*22ce4affSfengbojiang  * the vm system.
125*22ce4affSfengbojiang  *
126*22ce4affSfengbojiang  * Initially dump_avail and phys_avail are identical.  Boot time memory
127*22ce4affSfengbojiang  * allocations remove extents from phys_avail that may still be included
128*22ce4affSfengbojiang  * in dumps.
129*22ce4affSfengbojiang  */
130*22ce4affSfengbojiang vm_paddr_t phys_avail[PHYS_AVAIL_COUNT];
131*22ce4affSfengbojiang vm_paddr_t dump_avail[PHYS_AVAIL_COUNT];
132a9643ea8Slogwang 
133a9643ea8Slogwang /*
134a9643ea8Slogwang  * Provides the mapping from VM_FREELIST_* to free list indices (flind).
135a9643ea8Slogwang  */
136*22ce4affSfengbojiang static int __read_mostly vm_freelist_to_flind[VM_NFREELIST];
137a9643ea8Slogwang 
138a9643ea8Slogwang CTASSERT(VM_FREELIST_DEFAULT == 0);
139a9643ea8Slogwang 
140a9643ea8Slogwang #ifdef VM_FREELIST_DMA32
141a9643ea8Slogwang #define	VM_DMA32_BOUNDARY	((vm_paddr_t)1 << 32)
142a9643ea8Slogwang #endif
143a9643ea8Slogwang 
144a9643ea8Slogwang /*
145a9643ea8Slogwang  * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about
146a9643ea8Slogwang  * the ordering of the free list boundaries.
147a9643ea8Slogwang  */
148a9643ea8Slogwang #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY)
149a9643ea8Slogwang CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY);
150a9643ea8Slogwang #endif
151a9643ea8Slogwang 
152a9643ea8Slogwang static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
153*22ce4affSfengbojiang SYSCTL_OID(_vm, OID_AUTO, phys_free,
154*22ce4affSfengbojiang     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
155*22ce4affSfengbojiang     sysctl_vm_phys_free, "A",
156*22ce4affSfengbojiang     "Phys Free Info");
157a9643ea8Slogwang 
158a9643ea8Slogwang static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
159*22ce4affSfengbojiang SYSCTL_OID(_vm, OID_AUTO, phys_segs,
160*22ce4affSfengbojiang     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
161*22ce4affSfengbojiang     sysctl_vm_phys_segs, "A",
162*22ce4affSfengbojiang     "Phys Seg Info");
163a9643ea8Slogwang 
164*22ce4affSfengbojiang #ifdef NUMA
165a9643ea8Slogwang static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
166*22ce4affSfengbojiang SYSCTL_OID(_vm, OID_AUTO, phys_locality,
167*22ce4affSfengbojiang     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
168*22ce4affSfengbojiang     sysctl_vm_phys_locality, "A",
169*22ce4affSfengbojiang     "Phys Locality Info");
170a9643ea8Slogwang #endif
171a9643ea8Slogwang 
172a9643ea8Slogwang SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
173a9643ea8Slogwang     &vm_ndomains, 0, "Number of physical memory domains available.");
174a9643ea8Slogwang 
175a9643ea8Slogwang static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg,
176a9643ea8Slogwang     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
177a9643ea8Slogwang     vm_paddr_t boundary);
178a9643ea8Slogwang static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
179a9643ea8Slogwang static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
180a9643ea8Slogwang static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
181*22ce4affSfengbojiang     int order, int tail);
182a9643ea8Slogwang 
183a9643ea8Slogwang /*
184a9643ea8Slogwang  * Red-black tree helpers for vm fictitious range management.
185a9643ea8Slogwang  */
186a9643ea8Slogwang static inline int
vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg * p,struct vm_phys_fictitious_seg * range)187a9643ea8Slogwang vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
188a9643ea8Slogwang     struct vm_phys_fictitious_seg *range)
189a9643ea8Slogwang {
190a9643ea8Slogwang 
191a9643ea8Slogwang 	KASSERT(range->start != 0 && range->end != 0,
192a9643ea8Slogwang 	    ("Invalid range passed on search for vm_fictitious page"));
193a9643ea8Slogwang 	if (p->start >= range->end)
194a9643ea8Slogwang 		return (1);
195a9643ea8Slogwang 	if (p->start < range->start)
196a9643ea8Slogwang 		return (-1);
197a9643ea8Slogwang 
198a9643ea8Slogwang 	return (0);
199a9643ea8Slogwang }
200a9643ea8Slogwang 
201a9643ea8Slogwang static int
vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg * p1,struct vm_phys_fictitious_seg * p2)202a9643ea8Slogwang vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
203a9643ea8Slogwang     struct vm_phys_fictitious_seg *p2)
204a9643ea8Slogwang {
205a9643ea8Slogwang 
206a9643ea8Slogwang 	/* Check if this is a search for a page */
207a9643ea8Slogwang 	if (p1->end == 0)
208a9643ea8Slogwang 		return (vm_phys_fictitious_in_range(p1, p2));
209a9643ea8Slogwang 
210a9643ea8Slogwang 	KASSERT(p2->end != 0,
211a9643ea8Slogwang     ("Invalid range passed as second parameter to vm fictitious comparison"));
212a9643ea8Slogwang 
213a9643ea8Slogwang 	/* Searching to add a new range */
214a9643ea8Slogwang 	if (p1->end <= p2->start)
215a9643ea8Slogwang 		return (-1);
216a9643ea8Slogwang 	if (p1->start >= p2->end)
217a9643ea8Slogwang 		return (1);
218a9643ea8Slogwang 
219a9643ea8Slogwang 	panic("Trying to add overlapping vm fictitious ranges:\n"
220a9643ea8Slogwang 	    "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
221a9643ea8Slogwang 	    (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
222a9643ea8Slogwang }
223a9643ea8Slogwang 
224*22ce4affSfengbojiang int
vm_phys_domain_match(int prefer,vm_paddr_t low,vm_paddr_t high)225*22ce4affSfengbojiang vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high)
226a9643ea8Slogwang {
227*22ce4affSfengbojiang #ifdef NUMA
228*22ce4affSfengbojiang 	domainset_t mask;
229*22ce4affSfengbojiang 	int i;
230a9643ea8Slogwang 
231*22ce4affSfengbojiang 	if (vm_ndomains == 1 || mem_affinity == NULL)
232*22ce4affSfengbojiang 		return (0);
233a9643ea8Slogwang 
234*22ce4affSfengbojiang 	DOMAINSET_ZERO(&mask);
235*22ce4affSfengbojiang 	/*
236*22ce4affSfengbojiang 	 * Check for any memory that overlaps low, high.
237*22ce4affSfengbojiang 	 */
238*22ce4affSfengbojiang 	for (i = 0; mem_affinity[i].end != 0; i++)
239*22ce4affSfengbojiang 		if (mem_affinity[i].start <= high &&
240*22ce4affSfengbojiang 		    mem_affinity[i].end >= low)
241*22ce4affSfengbojiang 			DOMAINSET_SET(mem_affinity[i].domain, &mask);
242*22ce4affSfengbojiang 	if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask))
243*22ce4affSfengbojiang 		return (prefer);
244*22ce4affSfengbojiang 	if (DOMAINSET_EMPTY(&mask))
245*22ce4affSfengbojiang 		panic("vm_phys_domain_match:  Impossible constraint");
246*22ce4affSfengbojiang 	return (DOMAINSET_FFS(&mask) - 1);
247a9643ea8Slogwang #else
248a9643ea8Slogwang 	return (0);
249a9643ea8Slogwang #endif
250a9643ea8Slogwang }
251a9643ea8Slogwang 
252a9643ea8Slogwang /*
253a9643ea8Slogwang  * Outputs the state of the physical memory allocator, specifically,
254a9643ea8Slogwang  * the amount of physical memory in each free list.
255a9643ea8Slogwang  */
256a9643ea8Slogwang static int
sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)257a9643ea8Slogwang sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
258a9643ea8Slogwang {
259a9643ea8Slogwang 	struct sbuf sbuf;
260a9643ea8Slogwang 	struct vm_freelist *fl;
261a9643ea8Slogwang 	int dom, error, flind, oind, pind;
262a9643ea8Slogwang 
263a9643ea8Slogwang 	error = sysctl_wire_old_buffer(req, 0);
264a9643ea8Slogwang 	if (error != 0)
265a9643ea8Slogwang 		return (error);
266a9643ea8Slogwang 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
267a9643ea8Slogwang 	for (dom = 0; dom < vm_ndomains; dom++) {
268a9643ea8Slogwang 		sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
269a9643ea8Slogwang 		for (flind = 0; flind < vm_nfreelists; flind++) {
270a9643ea8Slogwang 			sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
271a9643ea8Slogwang 			    "\n  ORDER (SIZE)  |  NUMBER"
272a9643ea8Slogwang 			    "\n              ", flind);
273a9643ea8Slogwang 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
274a9643ea8Slogwang 				sbuf_printf(&sbuf, "  |  POOL %d", pind);
275a9643ea8Slogwang 			sbuf_printf(&sbuf, "\n--            ");
276a9643ea8Slogwang 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
277a9643ea8Slogwang 				sbuf_printf(&sbuf, "-- --      ");
278a9643ea8Slogwang 			sbuf_printf(&sbuf, "--\n");
279a9643ea8Slogwang 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
280a9643ea8Slogwang 				sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
281a9643ea8Slogwang 				    1 << (PAGE_SHIFT - 10 + oind));
282a9643ea8Slogwang 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
283a9643ea8Slogwang 				fl = vm_phys_free_queues[dom][flind][pind];
284a9643ea8Slogwang 					sbuf_printf(&sbuf, "  |  %6d",
285a9643ea8Slogwang 					    fl[oind].lcnt);
286a9643ea8Slogwang 				}
287a9643ea8Slogwang 				sbuf_printf(&sbuf, "\n");
288a9643ea8Slogwang 			}
289a9643ea8Slogwang 		}
290a9643ea8Slogwang 	}
291a9643ea8Slogwang 	error = sbuf_finish(&sbuf);
292a9643ea8Slogwang 	sbuf_delete(&sbuf);
293a9643ea8Slogwang 	return (error);
294a9643ea8Slogwang }
295a9643ea8Slogwang 
296a9643ea8Slogwang /*
297a9643ea8Slogwang  * Outputs the set of physical memory segments.
298a9643ea8Slogwang  */
299a9643ea8Slogwang static int
sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)300a9643ea8Slogwang sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
301a9643ea8Slogwang {
302a9643ea8Slogwang 	struct sbuf sbuf;
303a9643ea8Slogwang 	struct vm_phys_seg *seg;
304a9643ea8Slogwang 	int error, segind;
305a9643ea8Slogwang 
306a9643ea8Slogwang 	error = sysctl_wire_old_buffer(req, 0);
307a9643ea8Slogwang 	if (error != 0)
308a9643ea8Slogwang 		return (error);
309a9643ea8Slogwang 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
310a9643ea8Slogwang 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
311a9643ea8Slogwang 		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
312a9643ea8Slogwang 		seg = &vm_phys_segs[segind];
313a9643ea8Slogwang 		sbuf_printf(&sbuf, "start:     %#jx\n",
314a9643ea8Slogwang 		    (uintmax_t)seg->start);
315a9643ea8Slogwang 		sbuf_printf(&sbuf, "end:       %#jx\n",
316a9643ea8Slogwang 		    (uintmax_t)seg->end);
317a9643ea8Slogwang 		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
318a9643ea8Slogwang 		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
319a9643ea8Slogwang 	}
320a9643ea8Slogwang 	error = sbuf_finish(&sbuf);
321a9643ea8Slogwang 	sbuf_delete(&sbuf);
322a9643ea8Slogwang 	return (error);
323a9643ea8Slogwang }
324a9643ea8Slogwang 
325a9643ea8Slogwang /*
326a9643ea8Slogwang  * Return affinity, or -1 if there's no affinity information.
327a9643ea8Slogwang  */
328a9643ea8Slogwang int
vm_phys_mem_affinity(int f,int t)329a9643ea8Slogwang vm_phys_mem_affinity(int f, int t)
330a9643ea8Slogwang {
331a9643ea8Slogwang 
332*22ce4affSfengbojiang #ifdef NUMA
333a9643ea8Slogwang 	if (mem_locality == NULL)
334a9643ea8Slogwang 		return (-1);
335a9643ea8Slogwang 	if (f >= vm_ndomains || t >= vm_ndomains)
336a9643ea8Slogwang 		return (-1);
337a9643ea8Slogwang 	return (mem_locality[f * vm_ndomains + t]);
338a9643ea8Slogwang #else
339a9643ea8Slogwang 	return (-1);
340a9643ea8Slogwang #endif
341a9643ea8Slogwang }
342a9643ea8Slogwang 
343*22ce4affSfengbojiang #ifdef NUMA
344a9643ea8Slogwang /*
345a9643ea8Slogwang  * Outputs the VM locality table.
346a9643ea8Slogwang  */
347a9643ea8Slogwang static int
sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)348a9643ea8Slogwang sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)
349a9643ea8Slogwang {
350a9643ea8Slogwang 	struct sbuf sbuf;
351a9643ea8Slogwang 	int error, i, j;
352a9643ea8Slogwang 
353a9643ea8Slogwang 	error = sysctl_wire_old_buffer(req, 0);
354a9643ea8Slogwang 	if (error != 0)
355a9643ea8Slogwang 		return (error);
356a9643ea8Slogwang 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
357a9643ea8Slogwang 
358a9643ea8Slogwang 	sbuf_printf(&sbuf, "\n");
359a9643ea8Slogwang 
360a9643ea8Slogwang 	for (i = 0; i < vm_ndomains; i++) {
361a9643ea8Slogwang 		sbuf_printf(&sbuf, "%d: ", i);
362a9643ea8Slogwang 		for (j = 0; j < vm_ndomains; j++) {
363a9643ea8Slogwang 			sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j));
364a9643ea8Slogwang 		}
365a9643ea8Slogwang 		sbuf_printf(&sbuf, "\n");
366a9643ea8Slogwang 	}
367a9643ea8Slogwang 	error = sbuf_finish(&sbuf);
368a9643ea8Slogwang 	sbuf_delete(&sbuf);
369a9643ea8Slogwang 	return (error);
370a9643ea8Slogwang }
371a9643ea8Slogwang #endif
372a9643ea8Slogwang 
373a9643ea8Slogwang static void
vm_freelist_add(struct vm_freelist * fl,vm_page_t m,int order,int tail)374a9643ea8Slogwang vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
375a9643ea8Slogwang {
376a9643ea8Slogwang 
377a9643ea8Slogwang 	m->order = order;
378a9643ea8Slogwang 	if (tail)
379*22ce4affSfengbojiang 		TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
380a9643ea8Slogwang 	else
381*22ce4affSfengbojiang 		TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
382a9643ea8Slogwang 	fl[order].lcnt++;
383a9643ea8Slogwang }
384a9643ea8Slogwang 
385a9643ea8Slogwang static void
vm_freelist_rem(struct vm_freelist * fl,vm_page_t m,int order)386a9643ea8Slogwang vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
387a9643ea8Slogwang {
388a9643ea8Slogwang 
389*22ce4affSfengbojiang 	TAILQ_REMOVE(&fl[order].pl, m, listq);
390a9643ea8Slogwang 	fl[order].lcnt--;
391a9643ea8Slogwang 	m->order = VM_NFREEORDER;
392a9643ea8Slogwang }
393a9643ea8Slogwang 
394a9643ea8Slogwang /*
395a9643ea8Slogwang  * Create a physical memory segment.
396a9643ea8Slogwang  */
397a9643ea8Slogwang static void
_vm_phys_create_seg(vm_paddr_t start,vm_paddr_t end,int domain)398a9643ea8Slogwang _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
399a9643ea8Slogwang {
400a9643ea8Slogwang 	struct vm_phys_seg *seg;
401a9643ea8Slogwang 
402a9643ea8Slogwang 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
403a9643ea8Slogwang 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
404*22ce4affSfengbojiang 	KASSERT(domain >= 0 && domain < vm_ndomains,
405a9643ea8Slogwang 	    ("vm_phys_create_seg: invalid domain provided"));
406a9643ea8Slogwang 	seg = &vm_phys_segs[vm_phys_nsegs++];
407a9643ea8Slogwang 	while (seg > vm_phys_segs && (seg - 1)->start >= end) {
408a9643ea8Slogwang 		*seg = *(seg - 1);
409a9643ea8Slogwang 		seg--;
410a9643ea8Slogwang 	}
411a9643ea8Slogwang 	seg->start = start;
412a9643ea8Slogwang 	seg->end = end;
413a9643ea8Slogwang 	seg->domain = domain;
414a9643ea8Slogwang }
415a9643ea8Slogwang 
416a9643ea8Slogwang static void
vm_phys_create_seg(vm_paddr_t start,vm_paddr_t end)417a9643ea8Slogwang vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
418a9643ea8Slogwang {
419*22ce4affSfengbojiang #ifdef NUMA
420a9643ea8Slogwang 	int i;
421a9643ea8Slogwang 
422a9643ea8Slogwang 	if (mem_affinity == NULL) {
423a9643ea8Slogwang 		_vm_phys_create_seg(start, end, 0);
424a9643ea8Slogwang 		return;
425a9643ea8Slogwang 	}
426a9643ea8Slogwang 
427a9643ea8Slogwang 	for (i = 0;; i++) {
428a9643ea8Slogwang 		if (mem_affinity[i].end == 0)
429a9643ea8Slogwang 			panic("Reached end of affinity info");
430a9643ea8Slogwang 		if (mem_affinity[i].end <= start)
431a9643ea8Slogwang 			continue;
432a9643ea8Slogwang 		if (mem_affinity[i].start > start)
433a9643ea8Slogwang 			panic("No affinity info for start %jx",
434a9643ea8Slogwang 			    (uintmax_t)start);
435a9643ea8Slogwang 		if (mem_affinity[i].end >= end) {
436a9643ea8Slogwang 			_vm_phys_create_seg(start, end,
437a9643ea8Slogwang 			    mem_affinity[i].domain);
438a9643ea8Slogwang 			break;
439a9643ea8Slogwang 		}
440a9643ea8Slogwang 		_vm_phys_create_seg(start, mem_affinity[i].end,
441a9643ea8Slogwang 		    mem_affinity[i].domain);
442a9643ea8Slogwang 		start = mem_affinity[i].end;
443a9643ea8Slogwang 	}
444a9643ea8Slogwang #else
445a9643ea8Slogwang 	_vm_phys_create_seg(start, end, 0);
446a9643ea8Slogwang #endif
447a9643ea8Slogwang }
448a9643ea8Slogwang 
449a9643ea8Slogwang /*
450a9643ea8Slogwang  * Add a physical memory segment.
451a9643ea8Slogwang  */
452a9643ea8Slogwang void
vm_phys_add_seg(vm_paddr_t start,vm_paddr_t end)453a9643ea8Slogwang vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
454a9643ea8Slogwang {
455a9643ea8Slogwang 	vm_paddr_t paddr;
456a9643ea8Slogwang 
457a9643ea8Slogwang 	KASSERT((start & PAGE_MASK) == 0,
458a9643ea8Slogwang 	    ("vm_phys_define_seg: start is not page aligned"));
459a9643ea8Slogwang 	KASSERT((end & PAGE_MASK) == 0,
460a9643ea8Slogwang 	    ("vm_phys_define_seg: end is not page aligned"));
461a9643ea8Slogwang 
462a9643ea8Slogwang 	/*
463a9643ea8Slogwang 	 * Split the physical memory segment if it spans two or more free
464a9643ea8Slogwang 	 * list boundaries.
465a9643ea8Slogwang 	 */
466a9643ea8Slogwang 	paddr = start;
467a9643ea8Slogwang #ifdef	VM_FREELIST_LOWMEM
468a9643ea8Slogwang 	if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) {
469a9643ea8Slogwang 		vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY);
470a9643ea8Slogwang 		paddr = VM_LOWMEM_BOUNDARY;
471a9643ea8Slogwang 	}
472a9643ea8Slogwang #endif
473a9643ea8Slogwang #ifdef	VM_FREELIST_DMA32
474a9643ea8Slogwang 	if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) {
475a9643ea8Slogwang 		vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY);
476a9643ea8Slogwang 		paddr = VM_DMA32_BOUNDARY;
477a9643ea8Slogwang 	}
478a9643ea8Slogwang #endif
479a9643ea8Slogwang 	vm_phys_create_seg(paddr, end);
480a9643ea8Slogwang }
481a9643ea8Slogwang 
482a9643ea8Slogwang /*
483a9643ea8Slogwang  * Initialize the physical memory allocator.
484a9643ea8Slogwang  *
485a9643ea8Slogwang  * Requires that vm_page_array is initialized!
486a9643ea8Slogwang  */
487a9643ea8Slogwang void
vm_phys_init(void)488a9643ea8Slogwang vm_phys_init(void)
489a9643ea8Slogwang {
490a9643ea8Slogwang 	struct vm_freelist *fl;
491*22ce4affSfengbojiang 	struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg;
492a9643ea8Slogwang 	u_long npages;
493a9643ea8Slogwang 	int dom, flind, freelist, oind, pind, segind;
494a9643ea8Slogwang 
495a9643ea8Slogwang 	/*
496a9643ea8Slogwang 	 * Compute the number of free lists, and generate the mapping from the
497a9643ea8Slogwang 	 * manifest constants VM_FREELIST_* to the free list indices.
498a9643ea8Slogwang 	 *
499a9643ea8Slogwang 	 * Initially, the entries of vm_freelist_to_flind[] are set to either
500a9643ea8Slogwang 	 * 0 or 1 to indicate which free lists should be created.
501a9643ea8Slogwang 	 */
502a9643ea8Slogwang 	npages = 0;
503a9643ea8Slogwang 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
504a9643ea8Slogwang 		seg = &vm_phys_segs[segind];
505a9643ea8Slogwang #ifdef	VM_FREELIST_LOWMEM
506a9643ea8Slogwang 		if (seg->end <= VM_LOWMEM_BOUNDARY)
507a9643ea8Slogwang 			vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1;
508a9643ea8Slogwang 		else
509a9643ea8Slogwang #endif
510a9643ea8Slogwang #ifdef	VM_FREELIST_DMA32
511a9643ea8Slogwang 		if (
512a9643ea8Slogwang #ifdef	VM_DMA32_NPAGES_THRESHOLD
513a9643ea8Slogwang 		    /*
514a9643ea8Slogwang 		     * Create the DMA32 free list only if the amount of
515a9643ea8Slogwang 		     * physical memory above physical address 4G exceeds the
516a9643ea8Slogwang 		     * given threshold.
517a9643ea8Slogwang 		     */
518a9643ea8Slogwang 		    npages > VM_DMA32_NPAGES_THRESHOLD &&
519a9643ea8Slogwang #endif
520a9643ea8Slogwang 		    seg->end <= VM_DMA32_BOUNDARY)
521a9643ea8Slogwang 			vm_freelist_to_flind[VM_FREELIST_DMA32] = 1;
522a9643ea8Slogwang 		else
523a9643ea8Slogwang #endif
524a9643ea8Slogwang 		{
525a9643ea8Slogwang 			npages += atop(seg->end - seg->start);
526a9643ea8Slogwang 			vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1;
527a9643ea8Slogwang 		}
528a9643ea8Slogwang 	}
529a9643ea8Slogwang 	/* Change each entry into a running total of the free lists. */
530a9643ea8Slogwang 	for (freelist = 1; freelist < VM_NFREELIST; freelist++) {
531a9643ea8Slogwang 		vm_freelist_to_flind[freelist] +=
532a9643ea8Slogwang 		    vm_freelist_to_flind[freelist - 1];
533a9643ea8Slogwang 	}
534a9643ea8Slogwang 	vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1];
535a9643ea8Slogwang 	KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists"));
536a9643ea8Slogwang 	/* Change each entry into a free list index. */
537a9643ea8Slogwang 	for (freelist = 0; freelist < VM_NFREELIST; freelist++)
538a9643ea8Slogwang 		vm_freelist_to_flind[freelist]--;
539a9643ea8Slogwang 
540a9643ea8Slogwang 	/*
541a9643ea8Slogwang 	 * Initialize the first_page and free_queues fields of each physical
542a9643ea8Slogwang 	 * memory segment.
543a9643ea8Slogwang 	 */
544a9643ea8Slogwang #ifdef VM_PHYSSEG_SPARSE
545a9643ea8Slogwang 	npages = 0;
546a9643ea8Slogwang #endif
547a9643ea8Slogwang 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
548a9643ea8Slogwang 		seg = &vm_phys_segs[segind];
549a9643ea8Slogwang #ifdef VM_PHYSSEG_SPARSE
550a9643ea8Slogwang 		seg->first_page = &vm_page_array[npages];
551a9643ea8Slogwang 		npages += atop(seg->end - seg->start);
552a9643ea8Slogwang #else
553a9643ea8Slogwang 		seg->first_page = PHYS_TO_VM_PAGE(seg->start);
554a9643ea8Slogwang #endif
555a9643ea8Slogwang #ifdef	VM_FREELIST_LOWMEM
556a9643ea8Slogwang 		if (seg->end <= VM_LOWMEM_BOUNDARY) {
557a9643ea8Slogwang 			flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM];
558a9643ea8Slogwang 			KASSERT(flind >= 0,
559a9643ea8Slogwang 			    ("vm_phys_init: LOWMEM flind < 0"));
560a9643ea8Slogwang 		} else
561a9643ea8Slogwang #endif
562a9643ea8Slogwang #ifdef	VM_FREELIST_DMA32
563a9643ea8Slogwang 		if (seg->end <= VM_DMA32_BOUNDARY) {
564a9643ea8Slogwang 			flind = vm_freelist_to_flind[VM_FREELIST_DMA32];
565a9643ea8Slogwang 			KASSERT(flind >= 0,
566a9643ea8Slogwang 			    ("vm_phys_init: DMA32 flind < 0"));
567a9643ea8Slogwang 		} else
568a9643ea8Slogwang #endif
569a9643ea8Slogwang 		{
570a9643ea8Slogwang 			flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT];
571a9643ea8Slogwang 			KASSERT(flind >= 0,
572a9643ea8Slogwang 			    ("vm_phys_init: DEFAULT flind < 0"));
573a9643ea8Slogwang 		}
574a9643ea8Slogwang 		seg->free_queues = &vm_phys_free_queues[seg->domain][flind];
575a9643ea8Slogwang 	}
576a9643ea8Slogwang 
577a9643ea8Slogwang 	/*
578*22ce4affSfengbojiang 	 * Coalesce physical memory segments that are contiguous and share the
579*22ce4affSfengbojiang 	 * same per-domain free queues.
580*22ce4affSfengbojiang 	 */
581*22ce4affSfengbojiang 	prev_seg = vm_phys_segs;
582*22ce4affSfengbojiang 	seg = &vm_phys_segs[1];
583*22ce4affSfengbojiang 	end_seg = &vm_phys_segs[vm_phys_nsegs];
584*22ce4affSfengbojiang 	while (seg < end_seg) {
585*22ce4affSfengbojiang 		if (prev_seg->end == seg->start &&
586*22ce4affSfengbojiang 		    prev_seg->free_queues == seg->free_queues) {
587*22ce4affSfengbojiang 			prev_seg->end = seg->end;
588*22ce4affSfengbojiang 			KASSERT(prev_seg->domain == seg->domain,
589*22ce4affSfengbojiang 			    ("vm_phys_init: free queues cannot span domains"));
590*22ce4affSfengbojiang 			vm_phys_nsegs--;
591*22ce4affSfengbojiang 			end_seg--;
592*22ce4affSfengbojiang 			for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++)
593*22ce4affSfengbojiang 				*tmp_seg = *(tmp_seg + 1);
594*22ce4affSfengbojiang 		} else {
595*22ce4affSfengbojiang 			prev_seg = seg;
596*22ce4affSfengbojiang 			seg++;
597*22ce4affSfengbojiang 		}
598*22ce4affSfengbojiang 	}
599*22ce4affSfengbojiang 
600*22ce4affSfengbojiang 	/*
601a9643ea8Slogwang 	 * Initialize the free queues.
602a9643ea8Slogwang 	 */
603a9643ea8Slogwang 	for (dom = 0; dom < vm_ndomains; dom++) {
604a9643ea8Slogwang 		for (flind = 0; flind < vm_nfreelists; flind++) {
605a9643ea8Slogwang 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
606a9643ea8Slogwang 				fl = vm_phys_free_queues[dom][flind][pind];
607a9643ea8Slogwang 				for (oind = 0; oind < VM_NFREEORDER; oind++)
608a9643ea8Slogwang 					TAILQ_INIT(&fl[oind].pl);
609a9643ea8Slogwang 			}
610a9643ea8Slogwang 		}
611a9643ea8Slogwang 	}
612a9643ea8Slogwang 
613a9643ea8Slogwang 	rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
614a9643ea8Slogwang }
615a9643ea8Slogwang 
616a9643ea8Slogwang /*
617*22ce4affSfengbojiang  * Register info about the NUMA topology of the system.
618*22ce4affSfengbojiang  *
619*22ce4affSfengbojiang  * Invoked by platform-dependent code prior to vm_phys_init().
620*22ce4affSfengbojiang  */
621*22ce4affSfengbojiang void
vm_phys_register_domains(int ndomains,struct mem_affinity * affinity,int * locality)622*22ce4affSfengbojiang vm_phys_register_domains(int ndomains, struct mem_affinity *affinity,
623*22ce4affSfengbojiang     int *locality)
624*22ce4affSfengbojiang {
625*22ce4affSfengbojiang #ifdef NUMA
626*22ce4affSfengbojiang 	int d, i;
627*22ce4affSfengbojiang 
628*22ce4affSfengbojiang 	/*
629*22ce4affSfengbojiang 	 * For now the only override value that we support is 1, which
630*22ce4affSfengbojiang 	 * effectively disables NUMA-awareness in the allocators.
631*22ce4affSfengbojiang 	 */
632*22ce4affSfengbojiang 	d = 0;
633*22ce4affSfengbojiang 	TUNABLE_INT_FETCH("vm.numa.disabled", &d);
634*22ce4affSfengbojiang 	if (d)
635*22ce4affSfengbojiang 		ndomains = 1;
636*22ce4affSfengbojiang 
637*22ce4affSfengbojiang 	if (ndomains > 1) {
638*22ce4affSfengbojiang 		vm_ndomains = ndomains;
639*22ce4affSfengbojiang 		mem_affinity = affinity;
640*22ce4affSfengbojiang 		mem_locality = locality;
641*22ce4affSfengbojiang 	}
642*22ce4affSfengbojiang 
643*22ce4affSfengbojiang 	for (i = 0; i < vm_ndomains; i++)
644*22ce4affSfengbojiang 		DOMAINSET_SET(i, &all_domains);
645*22ce4affSfengbojiang #else
646*22ce4affSfengbojiang 	(void)ndomains;
647*22ce4affSfengbojiang 	(void)affinity;
648*22ce4affSfengbojiang 	(void)locality;
649*22ce4affSfengbojiang #endif
650*22ce4affSfengbojiang }
651*22ce4affSfengbojiang 
652*22ce4affSfengbojiang /*
653a9643ea8Slogwang  * Split a contiguous, power of two-sized set of physical pages.
654*22ce4affSfengbojiang  *
655*22ce4affSfengbojiang  * When this function is called by a page allocation function, the caller
656*22ce4affSfengbojiang  * should request insertion at the head unless the order [order, oind) queues
657*22ce4affSfengbojiang  * are known to be empty.  The objective being to reduce the likelihood of
658*22ce4affSfengbojiang  * long-term fragmentation by promoting contemporaneous allocation and
659*22ce4affSfengbojiang  * (hopefully) deallocation.
660a9643ea8Slogwang  */
661a9643ea8Slogwang static __inline void
vm_phys_split_pages(vm_page_t m,int oind,struct vm_freelist * fl,int order,int tail)662*22ce4affSfengbojiang vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order,
663*22ce4affSfengbojiang     int tail)
664a9643ea8Slogwang {
665a9643ea8Slogwang 	vm_page_t m_buddy;
666a9643ea8Slogwang 
667a9643ea8Slogwang 	while (oind > order) {
668a9643ea8Slogwang 		oind--;
669a9643ea8Slogwang 		m_buddy = &m[1 << oind];
670a9643ea8Slogwang 		KASSERT(m_buddy->order == VM_NFREEORDER,
671a9643ea8Slogwang 		    ("vm_phys_split_pages: page %p has unexpected order %d",
672a9643ea8Slogwang 		    m_buddy, m_buddy->order));
673*22ce4affSfengbojiang 		vm_freelist_add(fl, m_buddy, oind, tail);
674a9643ea8Slogwang         }
675a9643ea8Slogwang }
676a9643ea8Slogwang 
677a9643ea8Slogwang /*
678*22ce4affSfengbojiang  * Add the physical pages [m, m + npages) at the end of a power-of-two aligned
679*22ce4affSfengbojiang  * and sized set to the specified free list.
680*22ce4affSfengbojiang  *
681*22ce4affSfengbojiang  * When this function is called by a page allocation function, the caller
682*22ce4affSfengbojiang  * should request insertion at the head unless the lower-order queues are
683*22ce4affSfengbojiang  * known to be empty.  The objective being to reduce the likelihood of long-
684*22ce4affSfengbojiang  * term fragmentation by promoting contemporaneous allocation and (hopefully)
685*22ce4affSfengbojiang  * deallocation.
686*22ce4affSfengbojiang  *
687*22ce4affSfengbojiang  * The physical page m's buddy must not be free.
688a9643ea8Slogwang  */
689*22ce4affSfengbojiang static void
vm_phys_enq_range(vm_page_t m,u_int npages,struct vm_freelist * fl,int tail)690*22ce4affSfengbojiang vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail)
691a9643ea8Slogwang {
692*22ce4affSfengbojiang 	u_int n;
693*22ce4affSfengbojiang 	int order;
694a9643ea8Slogwang 
695*22ce4affSfengbojiang 	KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0"));
696*22ce4affSfengbojiang 	KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) &
697*22ce4affSfengbojiang 	    ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0,
698*22ce4affSfengbojiang 	    ("vm_phys_enq_range: page %p and npages %u are misaligned",
699*22ce4affSfengbojiang 	    m, npages));
700*22ce4affSfengbojiang 	do {
701a9643ea8Slogwang 		KASSERT(m->order == VM_NFREEORDER,
702*22ce4affSfengbojiang 		    ("vm_phys_enq_range: page %p has unexpected order %d",
703a9643ea8Slogwang 		    m, m->order));
704*22ce4affSfengbojiang 		order = ffs(npages) - 1;
705*22ce4affSfengbojiang 		KASSERT(order < VM_NFREEORDER,
706*22ce4affSfengbojiang 		    ("vm_phys_enq_range: order %d is out of range", order));
707*22ce4affSfengbojiang 		vm_freelist_add(fl, m, order, tail);
708*22ce4affSfengbojiang 		n = 1 << order;
709*22ce4affSfengbojiang 		m += n;
710*22ce4affSfengbojiang 		npages -= n;
711*22ce4affSfengbojiang 	} while (npages > 0);
712*22ce4affSfengbojiang }
713*22ce4affSfengbojiang 
714*22ce4affSfengbojiang /*
715*22ce4affSfengbojiang  * Tries to allocate the specified number of pages from the specified pool
716*22ce4affSfengbojiang  * within the specified domain.  Returns the actual number of allocated pages
717*22ce4affSfengbojiang  * and a pointer to each page through the array ma[].
718*22ce4affSfengbojiang  *
719*22ce4affSfengbojiang  * The returned pages may not be physically contiguous.  However, in contrast
720*22ce4affSfengbojiang  * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0),
721*22ce4affSfengbojiang  * calling this function once to allocate the desired number of pages will
722*22ce4affSfengbojiang  * avoid wasted time in vm_phys_split_pages().
723*22ce4affSfengbojiang  *
724*22ce4affSfengbojiang  * The free page queues for the specified domain must be locked.
725*22ce4affSfengbojiang  */
726*22ce4affSfengbojiang int
vm_phys_alloc_npages(int domain,int pool,int npages,vm_page_t ma[])727*22ce4affSfengbojiang vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[])
728*22ce4affSfengbojiang {
729*22ce4affSfengbojiang 	struct vm_freelist *alt, *fl;
730*22ce4affSfengbojiang 	vm_page_t m;
731*22ce4affSfengbojiang 	int avail, end, flind, freelist, i, need, oind, pind;
732*22ce4affSfengbojiang 
733*22ce4affSfengbojiang 	KASSERT(domain >= 0 && domain < vm_ndomains,
734*22ce4affSfengbojiang 	    ("vm_phys_alloc_npages: domain %d is out of range", domain));
735*22ce4affSfengbojiang 	KASSERT(pool < VM_NFREEPOOL,
736*22ce4affSfengbojiang 	    ("vm_phys_alloc_npages: pool %d is out of range", pool));
737*22ce4affSfengbojiang 	KASSERT(npages <= 1 << (VM_NFREEORDER - 1),
738*22ce4affSfengbojiang 	    ("vm_phys_alloc_npages: npages %d is out of range", npages));
739*22ce4affSfengbojiang 	vm_domain_free_assert_locked(VM_DOMAIN(domain));
740*22ce4affSfengbojiang 	i = 0;
741*22ce4affSfengbojiang 	for (freelist = 0; freelist < VM_NFREELIST; freelist++) {
742*22ce4affSfengbojiang 		flind = vm_freelist_to_flind[freelist];
743*22ce4affSfengbojiang 		if (flind < 0)
744*22ce4affSfengbojiang 			continue;
745*22ce4affSfengbojiang 		fl = vm_phys_free_queues[domain][flind][pool];
746*22ce4affSfengbojiang 		for (oind = 0; oind < VM_NFREEORDER; oind++) {
747*22ce4affSfengbojiang 			while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) {
748*22ce4affSfengbojiang 				vm_freelist_rem(fl, m, oind);
749*22ce4affSfengbojiang 				avail = 1 << oind;
750*22ce4affSfengbojiang 				need = imin(npages - i, avail);
751*22ce4affSfengbojiang 				for (end = i + need; i < end;)
752*22ce4affSfengbojiang 					ma[i++] = m++;
753*22ce4affSfengbojiang 				if (need < avail) {
754*22ce4affSfengbojiang 					/*
755*22ce4affSfengbojiang 					 * Return excess pages to fl.  Its
756*22ce4affSfengbojiang 					 * order [0, oind) queues are empty.
757*22ce4affSfengbojiang 					 */
758*22ce4affSfengbojiang 					vm_phys_enq_range(m, avail - need, fl,
759*22ce4affSfengbojiang 					    1);
760*22ce4affSfengbojiang 					return (npages);
761*22ce4affSfengbojiang 				} else if (i == npages)
762*22ce4affSfengbojiang 					return (npages);
763*22ce4affSfengbojiang 			}
764*22ce4affSfengbojiang 		}
765*22ce4affSfengbojiang 		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
766*22ce4affSfengbojiang 			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
767*22ce4affSfengbojiang 				alt = vm_phys_free_queues[domain][flind][pind];
768*22ce4affSfengbojiang 				while ((m = TAILQ_FIRST(&alt[oind].pl)) !=
769*22ce4affSfengbojiang 				    NULL) {
770*22ce4affSfengbojiang 					vm_freelist_rem(alt, m, oind);
771*22ce4affSfengbojiang 					vm_phys_set_pool(pool, m, oind);
772*22ce4affSfengbojiang 					avail = 1 << oind;
773*22ce4affSfengbojiang 					need = imin(npages - i, avail);
774*22ce4affSfengbojiang 					for (end = i + need; i < end;)
775*22ce4affSfengbojiang 						ma[i++] = m++;
776*22ce4affSfengbojiang 					if (need < avail) {
777*22ce4affSfengbojiang 						/*
778*22ce4affSfengbojiang 						 * Return excess pages to fl.
779*22ce4affSfengbojiang 						 * Its order [0, oind) queues
780*22ce4affSfengbojiang 						 * are empty.
781*22ce4affSfengbojiang 						 */
782*22ce4affSfengbojiang 						vm_phys_enq_range(m, avail -
783*22ce4affSfengbojiang 						    need, fl, 1);
784*22ce4affSfengbojiang 						return (npages);
785*22ce4affSfengbojiang 					} else if (i == npages)
786*22ce4affSfengbojiang 						return (npages);
787*22ce4affSfengbojiang 				}
788*22ce4affSfengbojiang 			}
789*22ce4affSfengbojiang 		}
790*22ce4affSfengbojiang 	}
791*22ce4affSfengbojiang 	return (i);
792a9643ea8Slogwang }
793a9643ea8Slogwang 
794a9643ea8Slogwang /*
795a9643ea8Slogwang  * Allocate a contiguous, power of two-sized set of physical pages
796a9643ea8Slogwang  * from the free lists.
797a9643ea8Slogwang  *
798a9643ea8Slogwang  * The free page queues must be locked.
799a9643ea8Slogwang  */
800a9643ea8Slogwang vm_page_t
vm_phys_alloc_pages(int domain,int pool,int order)801*22ce4affSfengbojiang vm_phys_alloc_pages(int domain, int pool, int order)
802a9643ea8Slogwang {
803a9643ea8Slogwang 	vm_page_t m;
804*22ce4affSfengbojiang 	int freelist;
805a9643ea8Slogwang 
806*22ce4affSfengbojiang 	for (freelist = 0; freelist < VM_NFREELIST; freelist++) {
807*22ce4affSfengbojiang 		m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order);
808a9643ea8Slogwang 		if (m != NULL)
809a9643ea8Slogwang 			return (m);
810a9643ea8Slogwang 	}
811a9643ea8Slogwang 	return (NULL);
812a9643ea8Slogwang }
813a9643ea8Slogwang 
814a9643ea8Slogwang /*
815a9643ea8Slogwang  * Allocate a contiguous, power of two-sized set of physical pages from the
816a9643ea8Slogwang  * specified free list.  The free list must be specified using one of the
817a9643ea8Slogwang  * manifest constants VM_FREELIST_*.
818a9643ea8Slogwang  *
819a9643ea8Slogwang  * The free page queues must be locked.
820a9643ea8Slogwang  */
821a9643ea8Slogwang vm_page_t
vm_phys_alloc_freelist_pages(int domain,int freelist,int pool,int order)822*22ce4affSfengbojiang vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
823a9643ea8Slogwang {
824*22ce4affSfengbojiang 	struct vm_freelist *alt, *fl;
825a9643ea8Slogwang 	vm_page_t m;
826*22ce4affSfengbojiang 	int oind, pind, flind;
827a9643ea8Slogwang 
828*22ce4affSfengbojiang 	KASSERT(domain >= 0 && domain < vm_ndomains,
829*22ce4affSfengbojiang 	    ("vm_phys_alloc_freelist_pages: domain %d is out of range",
830*22ce4affSfengbojiang 	    domain));
831a9643ea8Slogwang 	KASSERT(freelist < VM_NFREELIST,
832a9643ea8Slogwang 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
833a9643ea8Slogwang 	    freelist));
834a9643ea8Slogwang 	KASSERT(pool < VM_NFREEPOOL,
835a9643ea8Slogwang 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
836a9643ea8Slogwang 	KASSERT(order < VM_NFREEORDER,
837a9643ea8Slogwang 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
838a9643ea8Slogwang 
839*22ce4affSfengbojiang 	flind = vm_freelist_to_flind[freelist];
840*22ce4affSfengbojiang 	/* Check if freelist is present */
841*22ce4affSfengbojiang 	if (flind < 0)
842a9643ea8Slogwang 		return (NULL);
843a9643ea8Slogwang 
844*22ce4affSfengbojiang 	vm_domain_free_assert_locked(VM_DOMAIN(domain));
845a9643ea8Slogwang 	fl = &vm_phys_free_queues[domain][flind][pool][0];
846a9643ea8Slogwang 	for (oind = order; oind < VM_NFREEORDER; oind++) {
847a9643ea8Slogwang 		m = TAILQ_FIRST(&fl[oind].pl);
848a9643ea8Slogwang 		if (m != NULL) {
849a9643ea8Slogwang 			vm_freelist_rem(fl, m, oind);
850*22ce4affSfengbojiang 			/* The order [order, oind) queues are empty. */
851*22ce4affSfengbojiang 			vm_phys_split_pages(m, oind, fl, order, 1);
852a9643ea8Slogwang 			return (m);
853a9643ea8Slogwang 		}
854a9643ea8Slogwang 	}
855a9643ea8Slogwang 
856a9643ea8Slogwang 	/*
857a9643ea8Slogwang 	 * The given pool was empty.  Find the largest
858a9643ea8Slogwang 	 * contiguous, power-of-two-sized set of pages in any
859a9643ea8Slogwang 	 * pool.  Transfer these pages to the given pool, and
860a9643ea8Slogwang 	 * use them to satisfy the allocation.
861a9643ea8Slogwang 	 */
862a9643ea8Slogwang 	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
863a9643ea8Slogwang 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
864a9643ea8Slogwang 			alt = &vm_phys_free_queues[domain][flind][pind][0];
865a9643ea8Slogwang 			m = TAILQ_FIRST(&alt[oind].pl);
866a9643ea8Slogwang 			if (m != NULL) {
867a9643ea8Slogwang 				vm_freelist_rem(alt, m, oind);
868a9643ea8Slogwang 				vm_phys_set_pool(pool, m, oind);
869*22ce4affSfengbojiang 				/* The order [order, oind) queues are empty. */
870*22ce4affSfengbojiang 				vm_phys_split_pages(m, oind, fl, order, 1);
871a9643ea8Slogwang 				return (m);
872a9643ea8Slogwang 			}
873a9643ea8Slogwang 		}
874a9643ea8Slogwang 	}
875a9643ea8Slogwang 	return (NULL);
876a9643ea8Slogwang }
877a9643ea8Slogwang 
878a9643ea8Slogwang /*
879a9643ea8Slogwang  * Find the vm_page corresponding to the given physical address.
880a9643ea8Slogwang  */
881a9643ea8Slogwang vm_page_t
vm_phys_paddr_to_vm_page(vm_paddr_t pa)882a9643ea8Slogwang vm_phys_paddr_to_vm_page(vm_paddr_t pa)
883a9643ea8Slogwang {
884a9643ea8Slogwang 	struct vm_phys_seg *seg;
885a9643ea8Slogwang 	int segind;
886a9643ea8Slogwang 
887a9643ea8Slogwang 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
888a9643ea8Slogwang 		seg = &vm_phys_segs[segind];
889a9643ea8Slogwang 		if (pa >= seg->start && pa < seg->end)
890a9643ea8Slogwang 			return (&seg->first_page[atop(pa - seg->start)]);
891a9643ea8Slogwang 	}
892a9643ea8Slogwang 	return (NULL);
893a9643ea8Slogwang }
894a9643ea8Slogwang 
895a9643ea8Slogwang vm_page_t
vm_phys_fictitious_to_vm_page(vm_paddr_t pa)896a9643ea8Slogwang vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
897a9643ea8Slogwang {
898a9643ea8Slogwang 	struct vm_phys_fictitious_seg tmp, *seg;
899a9643ea8Slogwang 	vm_page_t m;
900a9643ea8Slogwang 
901a9643ea8Slogwang 	m = NULL;
902a9643ea8Slogwang 	tmp.start = pa;
903a9643ea8Slogwang 	tmp.end = 0;
904a9643ea8Slogwang 
905a9643ea8Slogwang 	rw_rlock(&vm_phys_fictitious_reg_lock);
906a9643ea8Slogwang 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
907a9643ea8Slogwang 	rw_runlock(&vm_phys_fictitious_reg_lock);
908a9643ea8Slogwang 	if (seg == NULL)
909a9643ea8Slogwang 		return (NULL);
910a9643ea8Slogwang 
911a9643ea8Slogwang 	m = &seg->first_page[atop(pa - seg->start)];
912a9643ea8Slogwang 	KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
913a9643ea8Slogwang 
914a9643ea8Slogwang 	return (m);
915a9643ea8Slogwang }
916a9643ea8Slogwang 
917a9643ea8Slogwang static inline void
vm_phys_fictitious_init_range(vm_page_t range,vm_paddr_t start,long page_count,vm_memattr_t memattr)918a9643ea8Slogwang vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
919a9643ea8Slogwang     long page_count, vm_memattr_t memattr)
920a9643ea8Slogwang {
921a9643ea8Slogwang 	long i;
922a9643ea8Slogwang 
923*22ce4affSfengbojiang 	bzero(range, page_count * sizeof(*range));
924a9643ea8Slogwang 	for (i = 0; i < page_count; i++) {
925a9643ea8Slogwang 		vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
926a9643ea8Slogwang 		range[i].oflags &= ~VPO_UNMANAGED;
927a9643ea8Slogwang 		range[i].busy_lock = VPB_UNBUSIED;
928a9643ea8Slogwang 	}
929a9643ea8Slogwang }
930a9643ea8Slogwang 
931a9643ea8Slogwang int
vm_phys_fictitious_reg_range(vm_paddr_t start,vm_paddr_t end,vm_memattr_t memattr)932a9643ea8Slogwang vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
933a9643ea8Slogwang     vm_memattr_t memattr)
934a9643ea8Slogwang {
935a9643ea8Slogwang 	struct vm_phys_fictitious_seg *seg;
936a9643ea8Slogwang 	vm_page_t fp;
937a9643ea8Slogwang 	long page_count;
938a9643ea8Slogwang #ifdef VM_PHYSSEG_DENSE
939a9643ea8Slogwang 	long pi, pe;
940a9643ea8Slogwang 	long dpage_count;
941a9643ea8Slogwang #endif
942a9643ea8Slogwang 
943a9643ea8Slogwang 	KASSERT(start < end,
944a9643ea8Slogwang 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
945a9643ea8Slogwang 	    (uintmax_t)start, (uintmax_t)end));
946a9643ea8Slogwang 
947a9643ea8Slogwang 	page_count = (end - start) / PAGE_SIZE;
948a9643ea8Slogwang 
949a9643ea8Slogwang #ifdef VM_PHYSSEG_DENSE
950a9643ea8Slogwang 	pi = atop(start);
951a9643ea8Slogwang 	pe = atop(end);
952a9643ea8Slogwang 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
953a9643ea8Slogwang 		fp = &vm_page_array[pi - first_page];
954a9643ea8Slogwang 		if ((pe - first_page) > vm_page_array_size) {
955a9643ea8Slogwang 			/*
956a9643ea8Slogwang 			 * We have a segment that starts inside
957a9643ea8Slogwang 			 * of vm_page_array, but ends outside of it.
958a9643ea8Slogwang 			 *
959a9643ea8Slogwang 			 * Use vm_page_array pages for those that are
960a9643ea8Slogwang 			 * inside of the vm_page_array range, and
961a9643ea8Slogwang 			 * allocate the remaining ones.
962a9643ea8Slogwang 			 */
963a9643ea8Slogwang 			dpage_count = vm_page_array_size - (pi - first_page);
964a9643ea8Slogwang 			vm_phys_fictitious_init_range(fp, start, dpage_count,
965a9643ea8Slogwang 			    memattr);
966a9643ea8Slogwang 			page_count -= dpage_count;
967a9643ea8Slogwang 			start += ptoa(dpage_count);
968a9643ea8Slogwang 			goto alloc;
969a9643ea8Slogwang 		}
970a9643ea8Slogwang 		/*
971a9643ea8Slogwang 		 * We can allocate the full range from vm_page_array,
972a9643ea8Slogwang 		 * so there's no need to register the range in the tree.
973a9643ea8Slogwang 		 */
974a9643ea8Slogwang 		vm_phys_fictitious_init_range(fp, start, page_count, memattr);
975a9643ea8Slogwang 		return (0);
976a9643ea8Slogwang 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
977a9643ea8Slogwang 		/*
978a9643ea8Slogwang 		 * We have a segment that ends inside of vm_page_array,
979a9643ea8Slogwang 		 * but starts outside of it.
980a9643ea8Slogwang 		 */
981a9643ea8Slogwang 		fp = &vm_page_array[0];
982a9643ea8Slogwang 		dpage_count = pe - first_page;
983a9643ea8Slogwang 		vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count,
984a9643ea8Slogwang 		    memattr);
985a9643ea8Slogwang 		end -= ptoa(dpage_count);
986a9643ea8Slogwang 		page_count -= dpage_count;
987a9643ea8Slogwang 		goto alloc;
988a9643ea8Slogwang 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
989a9643ea8Slogwang 		/*
990a9643ea8Slogwang 		 * Trying to register a fictitious range that expands before
991a9643ea8Slogwang 		 * and after vm_page_array.
992a9643ea8Slogwang 		 */
993a9643ea8Slogwang 		return (EINVAL);
994a9643ea8Slogwang 	} else {
995a9643ea8Slogwang alloc:
996a9643ea8Slogwang #endif
997a9643ea8Slogwang 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
998*22ce4affSfengbojiang 		    M_WAITOK);
999a9643ea8Slogwang #ifdef VM_PHYSSEG_DENSE
1000a9643ea8Slogwang 	}
1001a9643ea8Slogwang #endif
1002a9643ea8Slogwang 	vm_phys_fictitious_init_range(fp, start, page_count, memattr);
1003a9643ea8Slogwang 
1004a9643ea8Slogwang 	seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
1005a9643ea8Slogwang 	seg->start = start;
1006a9643ea8Slogwang 	seg->end = end;
1007a9643ea8Slogwang 	seg->first_page = fp;
1008a9643ea8Slogwang 
1009a9643ea8Slogwang 	rw_wlock(&vm_phys_fictitious_reg_lock);
1010a9643ea8Slogwang 	RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
1011a9643ea8Slogwang 	rw_wunlock(&vm_phys_fictitious_reg_lock);
1012a9643ea8Slogwang 
1013a9643ea8Slogwang 	return (0);
1014a9643ea8Slogwang }
1015a9643ea8Slogwang 
1016a9643ea8Slogwang void
vm_phys_fictitious_unreg_range(vm_paddr_t start,vm_paddr_t end)1017a9643ea8Slogwang vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
1018a9643ea8Slogwang {
1019a9643ea8Slogwang 	struct vm_phys_fictitious_seg *seg, tmp;
1020a9643ea8Slogwang #ifdef VM_PHYSSEG_DENSE
1021a9643ea8Slogwang 	long pi, pe;
1022a9643ea8Slogwang #endif
1023a9643ea8Slogwang 
1024a9643ea8Slogwang 	KASSERT(start < end,
1025a9643ea8Slogwang 	    ("Start of segment isn't less than end (start: %jx end: %jx)",
1026a9643ea8Slogwang 	    (uintmax_t)start, (uintmax_t)end));
1027a9643ea8Slogwang 
1028a9643ea8Slogwang #ifdef VM_PHYSSEG_DENSE
1029a9643ea8Slogwang 	pi = atop(start);
1030a9643ea8Slogwang 	pe = atop(end);
1031a9643ea8Slogwang 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
1032a9643ea8Slogwang 		if ((pe - first_page) <= vm_page_array_size) {
1033a9643ea8Slogwang 			/*
1034a9643ea8Slogwang 			 * This segment was allocated using vm_page_array
1035a9643ea8Slogwang 			 * only, there's nothing to do since those pages
1036a9643ea8Slogwang 			 * were never added to the tree.
1037a9643ea8Slogwang 			 */
1038a9643ea8Slogwang 			return;
1039a9643ea8Slogwang 		}
1040a9643ea8Slogwang 		/*
1041a9643ea8Slogwang 		 * We have a segment that starts inside
1042a9643ea8Slogwang 		 * of vm_page_array, but ends outside of it.
1043a9643ea8Slogwang 		 *
1044a9643ea8Slogwang 		 * Calculate how many pages were added to the
1045a9643ea8Slogwang 		 * tree and free them.
1046a9643ea8Slogwang 		 */
1047a9643ea8Slogwang 		start = ptoa(first_page + vm_page_array_size);
1048a9643ea8Slogwang 	} else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
1049a9643ea8Slogwang 		/*
1050a9643ea8Slogwang 		 * We have a segment that ends inside of vm_page_array,
1051a9643ea8Slogwang 		 * but starts outside of it.
1052a9643ea8Slogwang 		 */
1053a9643ea8Slogwang 		end = ptoa(first_page);
1054a9643ea8Slogwang 	} else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
1055a9643ea8Slogwang 		/* Since it's not possible to register such a range, panic. */
1056a9643ea8Slogwang 		panic(
1057a9643ea8Slogwang 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
1058a9643ea8Slogwang 		    (uintmax_t)start, (uintmax_t)end);
1059a9643ea8Slogwang 	}
1060a9643ea8Slogwang #endif
1061a9643ea8Slogwang 	tmp.start = start;
1062a9643ea8Slogwang 	tmp.end = 0;
1063a9643ea8Slogwang 
1064a9643ea8Slogwang 	rw_wlock(&vm_phys_fictitious_reg_lock);
1065a9643ea8Slogwang 	seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
1066a9643ea8Slogwang 	if (seg->start != start || seg->end != end) {
1067a9643ea8Slogwang 		rw_wunlock(&vm_phys_fictitious_reg_lock);
1068a9643ea8Slogwang 		panic(
1069a9643ea8Slogwang 		    "Unregistering not registered fictitious range [%#jx:%#jx]",
1070a9643ea8Slogwang 		    (uintmax_t)start, (uintmax_t)end);
1071a9643ea8Slogwang 	}
1072a9643ea8Slogwang 	RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
1073a9643ea8Slogwang 	rw_wunlock(&vm_phys_fictitious_reg_lock);
1074a9643ea8Slogwang 	free(seg->first_page, M_FICT_PAGES);
1075a9643ea8Slogwang 	free(seg, M_FICT_PAGES);
1076a9643ea8Slogwang }
1077a9643ea8Slogwang 
1078a9643ea8Slogwang /*
1079a9643ea8Slogwang  * Free a contiguous, power of two-sized set of physical pages.
1080a9643ea8Slogwang  *
1081a9643ea8Slogwang  * The free page queues must be locked.
1082a9643ea8Slogwang  */
1083a9643ea8Slogwang void
vm_phys_free_pages(vm_page_t m,int order)1084a9643ea8Slogwang vm_phys_free_pages(vm_page_t m, int order)
1085a9643ea8Slogwang {
1086a9643ea8Slogwang 	struct vm_freelist *fl;
1087a9643ea8Slogwang 	struct vm_phys_seg *seg;
1088a9643ea8Slogwang 	vm_paddr_t pa;
1089a9643ea8Slogwang 	vm_page_t m_buddy;
1090a9643ea8Slogwang 
1091a9643ea8Slogwang 	KASSERT(m->order == VM_NFREEORDER,
1092a9643ea8Slogwang 	    ("vm_phys_free_pages: page %p has unexpected order %d",
1093a9643ea8Slogwang 	    m, m->order));
1094a9643ea8Slogwang 	KASSERT(m->pool < VM_NFREEPOOL,
1095a9643ea8Slogwang 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
1096a9643ea8Slogwang 	    m, m->pool));
1097a9643ea8Slogwang 	KASSERT(order < VM_NFREEORDER,
1098a9643ea8Slogwang 	    ("vm_phys_free_pages: order %d is out of range", order));
1099a9643ea8Slogwang 	seg = &vm_phys_segs[m->segind];
1100*22ce4affSfengbojiang 	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
1101a9643ea8Slogwang 	if (order < VM_NFREEORDER - 1) {
1102a9643ea8Slogwang 		pa = VM_PAGE_TO_PHYS(m);
1103a9643ea8Slogwang 		do {
1104a9643ea8Slogwang 			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
1105a9643ea8Slogwang 			if (pa < seg->start || pa >= seg->end)
1106a9643ea8Slogwang 				break;
1107a9643ea8Slogwang 			m_buddy = &seg->first_page[atop(pa - seg->start)];
1108a9643ea8Slogwang 			if (m_buddy->order != order)
1109a9643ea8Slogwang 				break;
1110a9643ea8Slogwang 			fl = (*seg->free_queues)[m_buddy->pool];
1111a9643ea8Slogwang 			vm_freelist_rem(fl, m_buddy, order);
1112a9643ea8Slogwang 			if (m_buddy->pool != m->pool)
1113a9643ea8Slogwang 				vm_phys_set_pool(m->pool, m_buddy, order);
1114a9643ea8Slogwang 			order++;
1115a9643ea8Slogwang 			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
1116a9643ea8Slogwang 			m = &seg->first_page[atop(pa - seg->start)];
1117a9643ea8Slogwang 		} while (order < VM_NFREEORDER - 1);
1118a9643ea8Slogwang 	}
1119a9643ea8Slogwang 	fl = (*seg->free_queues)[m->pool];
1120a9643ea8Slogwang 	vm_freelist_add(fl, m, order, 1);
1121a9643ea8Slogwang }
1122a9643ea8Slogwang 
1123a9643ea8Slogwang /*
1124*22ce4affSfengbojiang  * Return the largest possible order of a set of pages starting at m.
1125a9643ea8Slogwang  */
1126*22ce4affSfengbojiang static int
max_order(vm_page_t m)1127*22ce4affSfengbojiang max_order(vm_page_t m)
1128a9643ea8Slogwang {
1129a9643ea8Slogwang 
1130a9643ea8Slogwang 	/*
1131a9643ea8Slogwang 	 * Unsigned "min" is used here so that "order" is assigned
1132a9643ea8Slogwang 	 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
1133a9643ea8Slogwang 	 * or the low-order bits of its physical address are zero
1134a9643ea8Slogwang 	 * because the size of a physical address exceeds the size of
1135a9643ea8Slogwang 	 * a long.
1136a9643ea8Slogwang 	 */
1137*22ce4affSfengbojiang 	return (min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
1138*22ce4affSfengbojiang 	    VM_NFREEORDER - 1));
1139a9643ea8Slogwang }
1140*22ce4affSfengbojiang 
1141*22ce4affSfengbojiang /*
1142*22ce4affSfengbojiang  * Free a contiguous, arbitrarily sized set of physical pages, without
1143*22ce4affSfengbojiang  * merging across set boundaries.
1144*22ce4affSfengbojiang  *
1145*22ce4affSfengbojiang  * The free page queues must be locked.
1146*22ce4affSfengbojiang  */
1147*22ce4affSfengbojiang void
vm_phys_enqueue_contig(vm_page_t m,u_long npages)1148*22ce4affSfengbojiang vm_phys_enqueue_contig(vm_page_t m, u_long npages)
1149*22ce4affSfengbojiang {
1150*22ce4affSfengbojiang 	struct vm_freelist *fl;
1151*22ce4affSfengbojiang 	struct vm_phys_seg *seg;
1152*22ce4affSfengbojiang 	vm_page_t m_end;
1153*22ce4affSfengbojiang 	int order;
1154*22ce4affSfengbojiang 
1155*22ce4affSfengbojiang 	/*
1156*22ce4affSfengbojiang 	 * Avoid unnecessary coalescing by freeing the pages in the largest
1157*22ce4affSfengbojiang 	 * possible power-of-two-sized subsets.
1158*22ce4affSfengbojiang 	 */
1159*22ce4affSfengbojiang 	vm_domain_free_assert_locked(vm_pagequeue_domain(m));
1160*22ce4affSfengbojiang 	seg = &vm_phys_segs[m->segind];
1161*22ce4affSfengbojiang 	fl = (*seg->free_queues)[m->pool];
1162*22ce4affSfengbojiang 	m_end = m + npages;
1163*22ce4affSfengbojiang 	/* Free blocks of increasing size. */
1164*22ce4affSfengbojiang 	while ((order = max_order(m)) < VM_NFREEORDER - 1 &&
1165*22ce4affSfengbojiang 	    m + (1 << order) <= m_end) {
1166*22ce4affSfengbojiang 		KASSERT(seg == &vm_phys_segs[m->segind],
1167*22ce4affSfengbojiang 		    ("%s: page range [%p,%p) spans multiple segments",
1168*22ce4affSfengbojiang 		    __func__, m_end - npages, m));
1169*22ce4affSfengbojiang 		vm_freelist_add(fl, m, order, 1);
1170*22ce4affSfengbojiang 		m += 1 << order;
1171a9643ea8Slogwang 	}
1172*22ce4affSfengbojiang 	/* Free blocks of maximum size. */
1173*22ce4affSfengbojiang 	while (m + (1 << order) <= m_end) {
1174*22ce4affSfengbojiang 		KASSERT(seg == &vm_phys_segs[m->segind],
1175*22ce4affSfengbojiang 		    ("%s: page range [%p,%p) spans multiple segments",
1176*22ce4affSfengbojiang 		    __func__, m_end - npages, m));
1177*22ce4affSfengbojiang 		vm_freelist_add(fl, m, order, 1);
1178*22ce4affSfengbojiang 		m += 1 << order;
1179*22ce4affSfengbojiang 	}
1180*22ce4affSfengbojiang 	/* Free blocks of diminishing size. */
1181*22ce4affSfengbojiang 	while (m < m_end) {
1182*22ce4affSfengbojiang 		KASSERT(seg == &vm_phys_segs[m->segind],
1183*22ce4affSfengbojiang 		    ("%s: page range [%p,%p) spans multiple segments",
1184*22ce4affSfengbojiang 		    __func__, m_end - npages, m));
1185*22ce4affSfengbojiang 		order = flsl(m_end - m) - 1;
1186*22ce4affSfengbojiang 		vm_freelist_add(fl, m, order, 1);
1187*22ce4affSfengbojiang 		m += 1 << order;
1188*22ce4affSfengbojiang 	}
1189*22ce4affSfengbojiang }
1190*22ce4affSfengbojiang 
1191*22ce4affSfengbojiang /*
1192*22ce4affSfengbojiang  * Free a contiguous, arbitrarily sized set of physical pages.
1193*22ce4affSfengbojiang  *
1194*22ce4affSfengbojiang  * The free page queues must be locked.
1195*22ce4affSfengbojiang  */
1196*22ce4affSfengbojiang void
vm_phys_free_contig(vm_page_t m,u_long npages)1197*22ce4affSfengbojiang vm_phys_free_contig(vm_page_t m, u_long npages)
1198*22ce4affSfengbojiang {
1199*22ce4affSfengbojiang 	int order_start, order_end;
1200*22ce4affSfengbojiang 	vm_page_t m_start, m_end;
1201*22ce4affSfengbojiang 
1202*22ce4affSfengbojiang 	vm_domain_free_assert_locked(vm_pagequeue_domain(m));
1203*22ce4affSfengbojiang 
1204*22ce4affSfengbojiang 	m_start = m;
1205*22ce4affSfengbojiang 	order_start = max_order(m_start);
1206*22ce4affSfengbojiang 	if (order_start < VM_NFREEORDER - 1)
1207*22ce4affSfengbojiang 		m_start += 1 << order_start;
1208*22ce4affSfengbojiang 	m_end = m + npages;
1209*22ce4affSfengbojiang 	order_end = max_order(m_end);
1210*22ce4affSfengbojiang 	if (order_end < VM_NFREEORDER - 1)
1211*22ce4affSfengbojiang 		m_end -= 1 << order_end;
1212*22ce4affSfengbojiang 	/*
1213*22ce4affSfengbojiang 	 * Avoid unnecessary coalescing by freeing the pages at the start and
1214*22ce4affSfengbojiang 	 * end of the range last.
1215*22ce4affSfengbojiang 	 */
1216*22ce4affSfengbojiang 	if (m_start < m_end)
1217*22ce4affSfengbojiang 		vm_phys_enqueue_contig(m_start, m_end - m_start);
1218*22ce4affSfengbojiang 	if (order_start < VM_NFREEORDER - 1)
1219*22ce4affSfengbojiang 		vm_phys_free_pages(m, order_start);
1220*22ce4affSfengbojiang 	if (order_end < VM_NFREEORDER - 1)
1221*22ce4affSfengbojiang 		vm_phys_free_pages(m_end, order_end);
1222a9643ea8Slogwang }
1223a9643ea8Slogwang 
1224a9643ea8Slogwang /*
1225a9643ea8Slogwang  * Scan physical memory between the specified addresses "low" and "high" for a
1226a9643ea8Slogwang  * run of contiguous physical pages that satisfy the specified conditions, and
1227a9643ea8Slogwang  * return the lowest page in the run.  The specified "alignment" determines
1228a9643ea8Slogwang  * the alignment of the lowest physical page in the run.  If the specified
1229a9643ea8Slogwang  * "boundary" is non-zero, then the run of physical pages cannot span a
1230a9643ea8Slogwang  * physical address that is a multiple of "boundary".
1231a9643ea8Slogwang  *
1232a9643ea8Slogwang  * "npages" must be greater than zero.  Both "alignment" and "boundary" must
1233a9643ea8Slogwang  * be a power of two.
1234a9643ea8Slogwang  */
1235a9643ea8Slogwang vm_page_t
vm_phys_scan_contig(int domain,u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary,int options)1236*22ce4affSfengbojiang vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
1237a9643ea8Slogwang     u_long alignment, vm_paddr_t boundary, int options)
1238a9643ea8Slogwang {
1239a9643ea8Slogwang 	vm_paddr_t pa_end;
1240a9643ea8Slogwang 	vm_page_t m_end, m_run, m_start;
1241a9643ea8Slogwang 	struct vm_phys_seg *seg;
1242a9643ea8Slogwang 	int segind;
1243a9643ea8Slogwang 
1244a9643ea8Slogwang 	KASSERT(npages > 0, ("npages is 0"));
1245a9643ea8Slogwang 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1246a9643ea8Slogwang 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1247a9643ea8Slogwang 	if (low >= high)
1248a9643ea8Slogwang 		return (NULL);
1249a9643ea8Slogwang 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
1250a9643ea8Slogwang 		seg = &vm_phys_segs[segind];
1251*22ce4affSfengbojiang 		if (seg->domain != domain)
1252*22ce4affSfengbojiang 			continue;
1253a9643ea8Slogwang 		if (seg->start >= high)
1254a9643ea8Slogwang 			break;
1255a9643ea8Slogwang 		if (low >= seg->end)
1256a9643ea8Slogwang 			continue;
1257a9643ea8Slogwang 		if (low <= seg->start)
1258a9643ea8Slogwang 			m_start = seg->first_page;
1259a9643ea8Slogwang 		else
1260a9643ea8Slogwang 			m_start = &seg->first_page[atop(low - seg->start)];
1261a9643ea8Slogwang 		if (high < seg->end)
1262a9643ea8Slogwang 			pa_end = high;
1263a9643ea8Slogwang 		else
1264a9643ea8Slogwang 			pa_end = seg->end;
1265a9643ea8Slogwang 		if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages))
1266a9643ea8Slogwang 			continue;
1267a9643ea8Slogwang 		m_end = &seg->first_page[atop(pa_end - seg->start)];
1268a9643ea8Slogwang 		m_run = vm_page_scan_contig(npages, m_start, m_end,
1269a9643ea8Slogwang 		    alignment, boundary, options);
1270a9643ea8Slogwang 		if (m_run != NULL)
1271a9643ea8Slogwang 			return (m_run);
1272a9643ea8Slogwang 	}
1273a9643ea8Slogwang 	return (NULL);
1274a9643ea8Slogwang }
1275a9643ea8Slogwang 
1276a9643ea8Slogwang /*
1277a9643ea8Slogwang  * Set the pool for a contiguous, power of two-sized set of physical pages.
1278a9643ea8Slogwang  */
1279a9643ea8Slogwang void
vm_phys_set_pool(int pool,vm_page_t m,int order)1280a9643ea8Slogwang vm_phys_set_pool(int pool, vm_page_t m, int order)
1281a9643ea8Slogwang {
1282a9643ea8Slogwang 	vm_page_t m_tmp;
1283a9643ea8Slogwang 
1284a9643ea8Slogwang 	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
1285a9643ea8Slogwang 		m_tmp->pool = pool;
1286a9643ea8Slogwang }
1287a9643ea8Slogwang 
1288a9643ea8Slogwang /*
1289a9643ea8Slogwang  * Search for the given physical page "m" in the free lists.  If the search
1290a9643ea8Slogwang  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
1291a9643ea8Slogwang  * FALSE, indicating that "m" is not in the free lists.
1292a9643ea8Slogwang  *
1293a9643ea8Slogwang  * The free page queues must be locked.
1294a9643ea8Slogwang  */
1295a9643ea8Slogwang boolean_t
vm_phys_unfree_page(vm_page_t m)1296a9643ea8Slogwang vm_phys_unfree_page(vm_page_t m)
1297a9643ea8Slogwang {
1298a9643ea8Slogwang 	struct vm_freelist *fl;
1299a9643ea8Slogwang 	struct vm_phys_seg *seg;
1300a9643ea8Slogwang 	vm_paddr_t pa, pa_half;
1301a9643ea8Slogwang 	vm_page_t m_set, m_tmp;
1302a9643ea8Slogwang 	int order;
1303a9643ea8Slogwang 
1304a9643ea8Slogwang 	/*
1305a9643ea8Slogwang 	 * First, find the contiguous, power of two-sized set of free
1306a9643ea8Slogwang 	 * physical pages containing the given physical page "m" and
1307a9643ea8Slogwang 	 * assign it to "m_set".
1308a9643ea8Slogwang 	 */
1309a9643ea8Slogwang 	seg = &vm_phys_segs[m->segind];
1310*22ce4affSfengbojiang 	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
1311a9643ea8Slogwang 	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
1312a9643ea8Slogwang 	    order < VM_NFREEORDER - 1; ) {
1313a9643ea8Slogwang 		order++;
1314a9643ea8Slogwang 		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
1315a9643ea8Slogwang 		if (pa >= seg->start)
1316a9643ea8Slogwang 			m_set = &seg->first_page[atop(pa - seg->start)];
1317a9643ea8Slogwang 		else
1318a9643ea8Slogwang 			return (FALSE);
1319a9643ea8Slogwang 	}
1320a9643ea8Slogwang 	if (m_set->order < order)
1321a9643ea8Slogwang 		return (FALSE);
1322a9643ea8Slogwang 	if (m_set->order == VM_NFREEORDER)
1323a9643ea8Slogwang 		return (FALSE);
1324a9643ea8Slogwang 	KASSERT(m_set->order < VM_NFREEORDER,
1325a9643ea8Slogwang 	    ("vm_phys_unfree_page: page %p has unexpected order %d",
1326a9643ea8Slogwang 	    m_set, m_set->order));
1327a9643ea8Slogwang 
1328a9643ea8Slogwang 	/*
1329a9643ea8Slogwang 	 * Next, remove "m_set" from the free lists.  Finally, extract
1330a9643ea8Slogwang 	 * "m" from "m_set" using an iterative algorithm: While "m_set"
1331a9643ea8Slogwang 	 * is larger than a page, shrink "m_set" by returning the half
1332a9643ea8Slogwang 	 * of "m_set" that does not contain "m" to the free lists.
1333a9643ea8Slogwang 	 */
1334a9643ea8Slogwang 	fl = (*seg->free_queues)[m_set->pool];
1335a9643ea8Slogwang 	order = m_set->order;
1336a9643ea8Slogwang 	vm_freelist_rem(fl, m_set, order);
1337a9643ea8Slogwang 	while (order > 0) {
1338a9643ea8Slogwang 		order--;
1339a9643ea8Slogwang 		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
1340a9643ea8Slogwang 		if (m->phys_addr < pa_half)
1341a9643ea8Slogwang 			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
1342a9643ea8Slogwang 		else {
1343a9643ea8Slogwang 			m_tmp = m_set;
1344a9643ea8Slogwang 			m_set = &seg->first_page[atop(pa_half - seg->start)];
1345a9643ea8Slogwang 		}
1346a9643ea8Slogwang 		vm_freelist_add(fl, m_tmp, order, 0);
1347a9643ea8Slogwang 	}
1348a9643ea8Slogwang 	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
1349a9643ea8Slogwang 	return (TRUE);
1350a9643ea8Slogwang }
1351a9643ea8Slogwang 
1352a9643ea8Slogwang /*
1353a9643ea8Slogwang  * Allocate a contiguous set of physical pages of the given size
1354a9643ea8Slogwang  * "npages" from the free lists.  All of the physical pages must be at
1355a9643ea8Slogwang  * or above the given physical address "low" and below the given
1356a9643ea8Slogwang  * physical address "high".  The given value "alignment" determines the
1357a9643ea8Slogwang  * alignment of the first physical page in the set.  If the given value
1358a9643ea8Slogwang  * "boundary" is non-zero, then the set of physical pages cannot cross
1359a9643ea8Slogwang  * any physical address boundary that is a multiple of that value.  Both
1360a9643ea8Slogwang  * "alignment" and "boundary" must be a power of two.
1361a9643ea8Slogwang  */
1362a9643ea8Slogwang vm_page_t
vm_phys_alloc_contig(int domain,u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary)1363*22ce4affSfengbojiang vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
1364a9643ea8Slogwang     u_long alignment, vm_paddr_t boundary)
1365a9643ea8Slogwang {
1366a9643ea8Slogwang 	vm_paddr_t pa_end, pa_start;
1367a9643ea8Slogwang 	vm_page_t m_run;
1368a9643ea8Slogwang 	struct vm_phys_seg *seg;
1369*22ce4affSfengbojiang 	int segind;
1370a9643ea8Slogwang 
1371a9643ea8Slogwang 	KASSERT(npages > 0, ("npages is 0"));
1372a9643ea8Slogwang 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1373a9643ea8Slogwang 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1374*22ce4affSfengbojiang 	vm_domain_free_assert_locked(VM_DOMAIN(domain));
1375a9643ea8Slogwang 	if (low >= high)
1376a9643ea8Slogwang 		return (NULL);
1377a9643ea8Slogwang 	m_run = NULL;
1378a9643ea8Slogwang 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
1379a9643ea8Slogwang 		seg = &vm_phys_segs[segind];
1380a9643ea8Slogwang 		if (seg->start >= high || seg->domain != domain)
1381a9643ea8Slogwang 			continue;
1382a9643ea8Slogwang 		if (low >= seg->end)
1383a9643ea8Slogwang 			break;
1384a9643ea8Slogwang 		if (low <= seg->start)
1385a9643ea8Slogwang 			pa_start = seg->start;
1386a9643ea8Slogwang 		else
1387a9643ea8Slogwang 			pa_start = low;
1388a9643ea8Slogwang 		if (high < seg->end)
1389a9643ea8Slogwang 			pa_end = high;
1390a9643ea8Slogwang 		else
1391a9643ea8Slogwang 			pa_end = seg->end;
1392a9643ea8Slogwang 		if (pa_end - pa_start < ptoa(npages))
1393a9643ea8Slogwang 			continue;
1394a9643ea8Slogwang 		m_run = vm_phys_alloc_seg_contig(seg, npages, low, high,
1395a9643ea8Slogwang 		    alignment, boundary);
1396a9643ea8Slogwang 		if (m_run != NULL)
1397a9643ea8Slogwang 			break;
1398a9643ea8Slogwang 	}
1399a9643ea8Slogwang 	return (m_run);
1400a9643ea8Slogwang }
1401a9643ea8Slogwang 
1402a9643ea8Slogwang /*
1403a9643ea8Slogwang  * Allocate a run of contiguous physical pages from the free list for the
1404a9643ea8Slogwang  * specified segment.
1405a9643ea8Slogwang  */
1406a9643ea8Slogwang static vm_page_t
vm_phys_alloc_seg_contig(struct vm_phys_seg * seg,u_long npages,vm_paddr_t low,vm_paddr_t high,u_long alignment,vm_paddr_t boundary)1407a9643ea8Slogwang vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages,
1408a9643ea8Slogwang     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
1409a9643ea8Slogwang {
1410a9643ea8Slogwang 	struct vm_freelist *fl;
1411a9643ea8Slogwang 	vm_paddr_t pa, pa_end, size;
1412a9643ea8Slogwang 	vm_page_t m, m_ret;
1413a9643ea8Slogwang 	u_long npages_end;
1414a9643ea8Slogwang 	int oind, order, pind;
1415a9643ea8Slogwang 
1416a9643ea8Slogwang 	KASSERT(npages > 0, ("npages is 0"));
1417a9643ea8Slogwang 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
1418a9643ea8Slogwang 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
1419*22ce4affSfengbojiang 	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
1420a9643ea8Slogwang 	/* Compute the queue that is the best fit for npages. */
1421*22ce4affSfengbojiang 	order = flsl(npages - 1);
1422a9643ea8Slogwang 	/* Search for a run satisfying the specified conditions. */
1423a9643ea8Slogwang 	size = npages << PAGE_SHIFT;
1424a9643ea8Slogwang 	for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER;
1425a9643ea8Slogwang 	    oind++) {
1426a9643ea8Slogwang 		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1427a9643ea8Slogwang 			fl = (*seg->free_queues)[pind];
1428*22ce4affSfengbojiang 			TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
1429a9643ea8Slogwang 				/*
1430a9643ea8Slogwang 				 * Is the size of this allocation request
1431a9643ea8Slogwang 				 * larger than the largest block size?
1432a9643ea8Slogwang 				 */
1433a9643ea8Slogwang 				if (order >= VM_NFREEORDER) {
1434a9643ea8Slogwang 					/*
1435a9643ea8Slogwang 					 * Determine if a sufficient number of
1436a9643ea8Slogwang 					 * subsequent blocks to satisfy the
1437a9643ea8Slogwang 					 * allocation request are free.
1438a9643ea8Slogwang 					 */
1439a9643ea8Slogwang 					pa = VM_PAGE_TO_PHYS(m_ret);
1440a9643ea8Slogwang 					pa_end = pa + size;
1441*22ce4affSfengbojiang 					if (pa_end < pa)
1442*22ce4affSfengbojiang 						continue;
1443a9643ea8Slogwang 					for (;;) {
1444a9643ea8Slogwang 						pa += 1 << (PAGE_SHIFT +
1445a9643ea8Slogwang 						    VM_NFREEORDER - 1);
1446a9643ea8Slogwang 						if (pa >= pa_end ||
1447a9643ea8Slogwang 						    pa < seg->start ||
1448a9643ea8Slogwang 						    pa >= seg->end)
1449a9643ea8Slogwang 							break;
1450a9643ea8Slogwang 						m = &seg->first_page[atop(pa -
1451a9643ea8Slogwang 						    seg->start)];
1452a9643ea8Slogwang 						if (m->order != VM_NFREEORDER -
1453a9643ea8Slogwang 						    1)
1454a9643ea8Slogwang 							break;
1455a9643ea8Slogwang 					}
1456a9643ea8Slogwang 					/* If not, go to the next block. */
1457a9643ea8Slogwang 					if (pa < pa_end)
1458a9643ea8Slogwang 						continue;
1459a9643ea8Slogwang 				}
1460a9643ea8Slogwang 
1461a9643ea8Slogwang 				/*
1462a9643ea8Slogwang 				 * Determine if the blocks are within the
1463a9643ea8Slogwang 				 * given range, satisfy the given alignment,
1464a9643ea8Slogwang 				 * and do not cross the given boundary.
1465a9643ea8Slogwang 				 */
1466a9643ea8Slogwang 				pa = VM_PAGE_TO_PHYS(m_ret);
1467a9643ea8Slogwang 				pa_end = pa + size;
1468a9643ea8Slogwang 				if (pa >= low && pa_end <= high &&
1469a9643ea8Slogwang 				    (pa & (alignment - 1)) == 0 &&
1470a9643ea8Slogwang 				    rounddown2(pa ^ (pa_end - 1), boundary) == 0)
1471a9643ea8Slogwang 					goto done;
1472a9643ea8Slogwang 			}
1473a9643ea8Slogwang 		}
1474a9643ea8Slogwang 	}
1475a9643ea8Slogwang 	return (NULL);
1476a9643ea8Slogwang done:
1477a9643ea8Slogwang 	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
1478a9643ea8Slogwang 		fl = (*seg->free_queues)[m->pool];
1479*22ce4affSfengbojiang 		vm_freelist_rem(fl, m, oind);
1480*22ce4affSfengbojiang 		if (m->pool != VM_FREEPOOL_DEFAULT)
1481*22ce4affSfengbojiang 			vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind);
1482a9643ea8Slogwang 	}
1483a9643ea8Slogwang 	/* Return excess pages to the free lists. */
1484*22ce4affSfengbojiang 	npages_end = roundup2(npages, 1 << oind);
1485*22ce4affSfengbojiang 	if (npages < npages_end) {
1486*22ce4affSfengbojiang 		fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT];
1487*22ce4affSfengbojiang 		vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0);
1488*22ce4affSfengbojiang 	}
1489a9643ea8Slogwang 	return (m_ret);
1490a9643ea8Slogwang }
1491a9643ea8Slogwang 
1492*22ce4affSfengbojiang /*
1493*22ce4affSfengbojiang  * Return the index of the first unused slot which may be the terminating
1494*22ce4affSfengbojiang  * entry.
1495*22ce4affSfengbojiang  */
1496*22ce4affSfengbojiang static int
vm_phys_avail_count(void)1497*22ce4affSfengbojiang vm_phys_avail_count(void)
1498*22ce4affSfengbojiang {
1499*22ce4affSfengbojiang 	int i;
1500*22ce4affSfengbojiang 
1501*22ce4affSfengbojiang 	for (i = 0; phys_avail[i + 1]; i += 2)
1502*22ce4affSfengbojiang 		continue;
1503*22ce4affSfengbojiang 	if (i > PHYS_AVAIL_ENTRIES)
1504*22ce4affSfengbojiang 		panic("Improperly terminated phys_avail %d entries", i);
1505*22ce4affSfengbojiang 
1506*22ce4affSfengbojiang 	return (i);
1507*22ce4affSfengbojiang }
1508*22ce4affSfengbojiang 
1509*22ce4affSfengbojiang /*
1510*22ce4affSfengbojiang  * Assert that a phys_avail entry is valid.
1511*22ce4affSfengbojiang  */
1512*22ce4affSfengbojiang static void
vm_phys_avail_check(int i)1513*22ce4affSfengbojiang vm_phys_avail_check(int i)
1514*22ce4affSfengbojiang {
1515*22ce4affSfengbojiang 	if (phys_avail[i] & PAGE_MASK)
1516*22ce4affSfengbojiang 		panic("Unaligned phys_avail[%d]: %#jx", i,
1517*22ce4affSfengbojiang 		    (intmax_t)phys_avail[i]);
1518*22ce4affSfengbojiang 	if (phys_avail[i+1] & PAGE_MASK)
1519*22ce4affSfengbojiang 		panic("Unaligned phys_avail[%d + 1]: %#jx", i,
1520*22ce4affSfengbojiang 		    (intmax_t)phys_avail[i]);
1521*22ce4affSfengbojiang 	if (phys_avail[i + 1] < phys_avail[i])
1522*22ce4affSfengbojiang 		panic("phys_avail[%d] start %#jx < end %#jx", i,
1523*22ce4affSfengbojiang 		    (intmax_t)phys_avail[i], (intmax_t)phys_avail[i+1]);
1524*22ce4affSfengbojiang }
1525*22ce4affSfengbojiang 
1526*22ce4affSfengbojiang /*
1527*22ce4affSfengbojiang  * Return the index of an overlapping phys_avail entry or -1.
1528*22ce4affSfengbojiang  */
1529*22ce4affSfengbojiang #ifdef NUMA
1530*22ce4affSfengbojiang static int
vm_phys_avail_find(vm_paddr_t pa)1531*22ce4affSfengbojiang vm_phys_avail_find(vm_paddr_t pa)
1532*22ce4affSfengbojiang {
1533*22ce4affSfengbojiang 	int i;
1534*22ce4affSfengbojiang 
1535*22ce4affSfengbojiang 	for (i = 0; phys_avail[i + 1]; i += 2)
1536*22ce4affSfengbojiang 		if (phys_avail[i] <= pa && phys_avail[i + 1] > pa)
1537*22ce4affSfengbojiang 			return (i);
1538*22ce4affSfengbojiang 	return (-1);
1539*22ce4affSfengbojiang }
1540*22ce4affSfengbojiang #endif
1541*22ce4affSfengbojiang 
1542*22ce4affSfengbojiang /*
1543*22ce4affSfengbojiang  * Return the index of the largest entry.
1544*22ce4affSfengbojiang  */
1545*22ce4affSfengbojiang int
vm_phys_avail_largest(void)1546*22ce4affSfengbojiang vm_phys_avail_largest(void)
1547*22ce4affSfengbojiang {
1548*22ce4affSfengbojiang 	vm_paddr_t sz, largesz;
1549*22ce4affSfengbojiang 	int largest;
1550*22ce4affSfengbojiang 	int i;
1551*22ce4affSfengbojiang 
1552*22ce4affSfengbojiang 	largest = 0;
1553*22ce4affSfengbojiang 	largesz = 0;
1554*22ce4affSfengbojiang 	for (i = 0; phys_avail[i + 1]; i += 2) {
1555*22ce4affSfengbojiang 		sz = vm_phys_avail_size(i);
1556*22ce4affSfengbojiang 		if (sz > largesz) {
1557*22ce4affSfengbojiang 			largesz = sz;
1558*22ce4affSfengbojiang 			largest = i;
1559*22ce4affSfengbojiang 		}
1560*22ce4affSfengbojiang 	}
1561*22ce4affSfengbojiang 
1562*22ce4affSfengbojiang 	return (largest);
1563*22ce4affSfengbojiang }
1564*22ce4affSfengbojiang 
1565*22ce4affSfengbojiang vm_paddr_t
vm_phys_avail_size(int i)1566*22ce4affSfengbojiang vm_phys_avail_size(int i)
1567*22ce4affSfengbojiang {
1568*22ce4affSfengbojiang 
1569*22ce4affSfengbojiang 	return (phys_avail[i + 1] - phys_avail[i]);
1570*22ce4affSfengbojiang }
1571*22ce4affSfengbojiang 
1572*22ce4affSfengbojiang /*
1573*22ce4affSfengbojiang  * Split an entry at the address 'pa'.  Return zero on success or errno.
1574*22ce4affSfengbojiang  */
1575*22ce4affSfengbojiang static int
vm_phys_avail_split(vm_paddr_t pa,int i)1576*22ce4affSfengbojiang vm_phys_avail_split(vm_paddr_t pa, int i)
1577*22ce4affSfengbojiang {
1578*22ce4affSfengbojiang 	int cnt;
1579*22ce4affSfengbojiang 
1580*22ce4affSfengbojiang 	vm_phys_avail_check(i);
1581*22ce4affSfengbojiang 	if (pa <= phys_avail[i] || pa >= phys_avail[i + 1])
1582*22ce4affSfengbojiang 		panic("vm_phys_avail_split: invalid address");
1583*22ce4affSfengbojiang 	cnt = vm_phys_avail_count();
1584*22ce4affSfengbojiang 	if (cnt >= PHYS_AVAIL_ENTRIES)
1585*22ce4affSfengbojiang 		return (ENOSPC);
1586*22ce4affSfengbojiang 	memmove(&phys_avail[i + 2], &phys_avail[i],
1587*22ce4affSfengbojiang 	    (cnt - i) * sizeof(phys_avail[0]));
1588*22ce4affSfengbojiang 	phys_avail[i + 1] = pa;
1589*22ce4affSfengbojiang 	phys_avail[i + 2] = pa;
1590*22ce4affSfengbojiang 	vm_phys_avail_check(i);
1591*22ce4affSfengbojiang 	vm_phys_avail_check(i+2);
1592*22ce4affSfengbojiang 
1593*22ce4affSfengbojiang 	return (0);
1594*22ce4affSfengbojiang }
1595*22ce4affSfengbojiang 
1596*22ce4affSfengbojiang void
vm_phys_early_add_seg(vm_paddr_t start,vm_paddr_t end)1597*22ce4affSfengbojiang vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end)
1598*22ce4affSfengbojiang {
1599*22ce4affSfengbojiang 	struct vm_phys_seg *seg;
1600*22ce4affSfengbojiang 
1601*22ce4affSfengbojiang 	if (vm_phys_early_nsegs == -1)
1602*22ce4affSfengbojiang 		panic("%s: called after initialization", __func__);
1603*22ce4affSfengbojiang 	if (vm_phys_early_nsegs == nitems(vm_phys_early_segs))
1604*22ce4affSfengbojiang 		panic("%s: ran out of early segments", __func__);
1605*22ce4affSfengbojiang 
1606*22ce4affSfengbojiang 	seg = &vm_phys_early_segs[vm_phys_early_nsegs++];
1607*22ce4affSfengbojiang 	seg->start = start;
1608*22ce4affSfengbojiang 	seg->end = end;
1609*22ce4affSfengbojiang }
1610*22ce4affSfengbojiang 
1611*22ce4affSfengbojiang /*
1612*22ce4affSfengbojiang  * This routine allocates NUMA node specific memory before the page
1613*22ce4affSfengbojiang  * allocator is bootstrapped.
1614*22ce4affSfengbojiang  */
1615*22ce4affSfengbojiang vm_paddr_t
vm_phys_early_alloc(int domain,size_t alloc_size)1616*22ce4affSfengbojiang vm_phys_early_alloc(int domain, size_t alloc_size)
1617*22ce4affSfengbojiang {
1618*22ce4affSfengbojiang 	int i, mem_index, biggestone;
1619*22ce4affSfengbojiang 	vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align;
1620*22ce4affSfengbojiang 
1621*22ce4affSfengbojiang 	KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains),
1622*22ce4affSfengbojiang 	    ("%s: invalid domain index %d", __func__, domain));
1623*22ce4affSfengbojiang 
1624*22ce4affSfengbojiang 	/*
1625*22ce4affSfengbojiang 	 * Search the mem_affinity array for the biggest address
1626*22ce4affSfengbojiang 	 * range in the desired domain.  This is used to constrain
1627*22ce4affSfengbojiang 	 * the phys_avail selection below.
1628*22ce4affSfengbojiang 	 */
1629*22ce4affSfengbojiang 	biggestsize = 0;
1630*22ce4affSfengbojiang 	mem_index = 0;
1631*22ce4affSfengbojiang 	mem_start = 0;
1632*22ce4affSfengbojiang 	mem_end = -1;
1633*22ce4affSfengbojiang #ifdef NUMA
1634*22ce4affSfengbojiang 	if (mem_affinity != NULL) {
1635*22ce4affSfengbojiang 		for (i = 0;; i++) {
1636*22ce4affSfengbojiang 			size = mem_affinity[i].end - mem_affinity[i].start;
1637*22ce4affSfengbojiang 			if (size == 0)
1638*22ce4affSfengbojiang 				break;
1639*22ce4affSfengbojiang 			if (domain != -1 && mem_affinity[i].domain != domain)
1640*22ce4affSfengbojiang 				continue;
1641*22ce4affSfengbojiang 			if (size > biggestsize) {
1642*22ce4affSfengbojiang 				mem_index = i;
1643*22ce4affSfengbojiang 				biggestsize = size;
1644*22ce4affSfengbojiang 			}
1645*22ce4affSfengbojiang 		}
1646*22ce4affSfengbojiang 		mem_start = mem_affinity[mem_index].start;
1647*22ce4affSfengbojiang 		mem_end = mem_affinity[mem_index].end;
1648*22ce4affSfengbojiang 	}
1649*22ce4affSfengbojiang #endif
1650*22ce4affSfengbojiang 
1651*22ce4affSfengbojiang 	/*
1652*22ce4affSfengbojiang 	 * Now find biggest physical segment in within the desired
1653*22ce4affSfengbojiang 	 * numa domain.
1654*22ce4affSfengbojiang 	 */
1655*22ce4affSfengbojiang 	biggestsize = 0;
1656*22ce4affSfengbojiang 	biggestone = 0;
1657*22ce4affSfengbojiang 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1658*22ce4affSfengbojiang 		/* skip regions that are out of range */
1659*22ce4affSfengbojiang 		if (phys_avail[i+1] - alloc_size < mem_start ||
1660*22ce4affSfengbojiang 		    phys_avail[i+1] > mem_end)
1661*22ce4affSfengbojiang 			continue;
1662*22ce4affSfengbojiang 		size = vm_phys_avail_size(i);
1663*22ce4affSfengbojiang 		if (size > biggestsize) {
1664*22ce4affSfengbojiang 			biggestone = i;
1665*22ce4affSfengbojiang 			biggestsize = size;
1666*22ce4affSfengbojiang 		}
1667*22ce4affSfengbojiang 	}
1668*22ce4affSfengbojiang 	alloc_size = round_page(alloc_size);
1669*22ce4affSfengbojiang 
1670*22ce4affSfengbojiang 	/*
1671*22ce4affSfengbojiang 	 * Grab single pages from the front to reduce fragmentation.
1672*22ce4affSfengbojiang 	 */
1673*22ce4affSfengbojiang 	if (alloc_size == PAGE_SIZE) {
1674*22ce4affSfengbojiang 		pa = phys_avail[biggestone];
1675*22ce4affSfengbojiang 		phys_avail[biggestone] += PAGE_SIZE;
1676*22ce4affSfengbojiang 		vm_phys_avail_check(biggestone);
1677*22ce4affSfengbojiang 		return (pa);
1678*22ce4affSfengbojiang 	}
1679*22ce4affSfengbojiang 
1680*22ce4affSfengbojiang 	/*
1681*22ce4affSfengbojiang 	 * Naturally align large allocations.
1682*22ce4affSfengbojiang 	 */
1683*22ce4affSfengbojiang 	align = phys_avail[biggestone + 1] & (alloc_size - 1);
1684*22ce4affSfengbojiang 	if (alloc_size + align > biggestsize)
1685*22ce4affSfengbojiang 		panic("cannot find a large enough size\n");
1686*22ce4affSfengbojiang 	if (align != 0 &&
1687*22ce4affSfengbojiang 	    vm_phys_avail_split(phys_avail[biggestone + 1] - align,
1688*22ce4affSfengbojiang 	    biggestone) != 0)
1689*22ce4affSfengbojiang 		/* Wasting memory. */
1690*22ce4affSfengbojiang 		phys_avail[biggestone + 1] -= align;
1691*22ce4affSfengbojiang 
1692*22ce4affSfengbojiang 	phys_avail[biggestone + 1] -= alloc_size;
1693*22ce4affSfengbojiang 	vm_phys_avail_check(biggestone);
1694*22ce4affSfengbojiang 	pa = phys_avail[biggestone + 1];
1695*22ce4affSfengbojiang 	return (pa);
1696*22ce4affSfengbojiang }
1697*22ce4affSfengbojiang 
1698*22ce4affSfengbojiang void
vm_phys_early_startup(void)1699*22ce4affSfengbojiang vm_phys_early_startup(void)
1700*22ce4affSfengbojiang {
1701*22ce4affSfengbojiang 	struct vm_phys_seg *seg;
1702*22ce4affSfengbojiang 	int i;
1703*22ce4affSfengbojiang 
1704*22ce4affSfengbojiang 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1705*22ce4affSfengbojiang 		phys_avail[i] = round_page(phys_avail[i]);
1706*22ce4affSfengbojiang 		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
1707*22ce4affSfengbojiang 	}
1708*22ce4affSfengbojiang 
1709*22ce4affSfengbojiang 	for (i = 0; i < vm_phys_early_nsegs; i++) {
1710*22ce4affSfengbojiang 		seg = &vm_phys_early_segs[i];
1711*22ce4affSfengbojiang 		vm_phys_add_seg(seg->start, seg->end);
1712*22ce4affSfengbojiang 	}
1713*22ce4affSfengbojiang 	vm_phys_early_nsegs = -1;
1714*22ce4affSfengbojiang 
1715*22ce4affSfengbojiang #ifdef NUMA
1716*22ce4affSfengbojiang 	/* Force phys_avail to be split by domain. */
1717*22ce4affSfengbojiang 	if (mem_affinity != NULL) {
1718*22ce4affSfengbojiang 		int idx;
1719*22ce4affSfengbojiang 
1720*22ce4affSfengbojiang 		for (i = 0; mem_affinity[i].end != 0; i++) {
1721*22ce4affSfengbojiang 			idx = vm_phys_avail_find(mem_affinity[i].start);
1722*22ce4affSfengbojiang 			if (idx != -1 &&
1723*22ce4affSfengbojiang 			    phys_avail[idx] != mem_affinity[i].start)
1724*22ce4affSfengbojiang 				vm_phys_avail_split(mem_affinity[i].start, idx);
1725*22ce4affSfengbojiang 			idx = vm_phys_avail_find(mem_affinity[i].end);
1726*22ce4affSfengbojiang 			if (idx != -1 &&
1727*22ce4affSfengbojiang 			    phys_avail[idx] != mem_affinity[i].end)
1728*22ce4affSfengbojiang 				vm_phys_avail_split(mem_affinity[i].end, idx);
1729*22ce4affSfengbojiang 		}
1730*22ce4affSfengbojiang 	}
1731*22ce4affSfengbojiang #endif
1732*22ce4affSfengbojiang }
1733*22ce4affSfengbojiang 
1734a9643ea8Slogwang #ifdef DDB
1735a9643ea8Slogwang /*
1736a9643ea8Slogwang  * Show the number of physical pages in each of the free lists.
1737a9643ea8Slogwang  */
DB_SHOW_COMMAND(freepages,db_show_freepages)1738a9643ea8Slogwang DB_SHOW_COMMAND(freepages, db_show_freepages)
1739a9643ea8Slogwang {
1740a9643ea8Slogwang 	struct vm_freelist *fl;
1741a9643ea8Slogwang 	int flind, oind, pind, dom;
1742a9643ea8Slogwang 
1743a9643ea8Slogwang 	for (dom = 0; dom < vm_ndomains; dom++) {
1744a9643ea8Slogwang 		db_printf("DOMAIN: %d\n", dom);
1745a9643ea8Slogwang 		for (flind = 0; flind < vm_nfreelists; flind++) {
1746a9643ea8Slogwang 			db_printf("FREE LIST %d:\n"
1747a9643ea8Slogwang 			    "\n  ORDER (SIZE)  |  NUMBER"
1748a9643ea8Slogwang 			    "\n              ", flind);
1749a9643ea8Slogwang 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
1750a9643ea8Slogwang 				db_printf("  |  POOL %d", pind);
1751a9643ea8Slogwang 			db_printf("\n--            ");
1752a9643ea8Slogwang 			for (pind = 0; pind < VM_NFREEPOOL; pind++)
1753a9643ea8Slogwang 				db_printf("-- --      ");
1754a9643ea8Slogwang 			db_printf("--\n");
1755a9643ea8Slogwang 			for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
1756a9643ea8Slogwang 				db_printf("  %2.2d (%6.6dK)", oind,
1757a9643ea8Slogwang 				    1 << (PAGE_SHIFT - 10 + oind));
1758a9643ea8Slogwang 				for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1759a9643ea8Slogwang 				fl = vm_phys_free_queues[dom][flind][pind];
1760a9643ea8Slogwang 					db_printf("  |  %6.6d", fl[oind].lcnt);
1761a9643ea8Slogwang 				}
1762a9643ea8Slogwang 				db_printf("\n");
1763a9643ea8Slogwang 			}
1764a9643ea8Slogwang 			db_printf("\n");
1765a9643ea8Slogwang 		}
1766a9643ea8Slogwang 		db_printf("\n");
1767a9643ea8Slogwang 	}
1768a9643ea8Slogwang }
1769a9643ea8Slogwang #endif
1770