1a9643ea8Slogwang /*-
2*22ce4affSfengbojiang * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
3*22ce4affSfengbojiang *
4a9643ea8Slogwang * Copyright (c) 1991, 1993
5a9643ea8Slogwang * The Regents of the University of California. All rights reserved.
6a9643ea8Slogwang *
7a9643ea8Slogwang * This code is derived from software contributed to Berkeley by
8a9643ea8Slogwang * The Mach Operating System project at Carnegie-Mellon University.
9a9643ea8Slogwang *
10a9643ea8Slogwang * Redistribution and use in source and binary forms, with or without
11a9643ea8Slogwang * modification, are permitted provided that the following conditions
12a9643ea8Slogwang * are met:
13a9643ea8Slogwang * 1. Redistributions of source code must retain the above copyright
14a9643ea8Slogwang * notice, this list of conditions and the following disclaimer.
15a9643ea8Slogwang * 2. Redistributions in binary form must reproduce the above copyright
16a9643ea8Slogwang * notice, this list of conditions and the following disclaimer in the
17a9643ea8Slogwang * documentation and/or other materials provided with the distribution.
18*22ce4affSfengbojiang * 3. Neither the name of the University nor the names of its contributors
19a9643ea8Slogwang * may be used to endorse or promote products derived from this software
20a9643ea8Slogwang * without specific prior written permission.
21a9643ea8Slogwang *
22a9643ea8Slogwang * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23a9643ea8Slogwang * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24a9643ea8Slogwang * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25a9643ea8Slogwang * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26a9643ea8Slogwang * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27a9643ea8Slogwang * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28a9643ea8Slogwang * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29a9643ea8Slogwang * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30a9643ea8Slogwang * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31a9643ea8Slogwang * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32a9643ea8Slogwang * SUCH DAMAGE.
33a9643ea8Slogwang *
34a9643ea8Slogwang * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94
35a9643ea8Slogwang *
36a9643ea8Slogwang *
37a9643ea8Slogwang * Copyright (c) 1987, 1990 Carnegie-Mellon University.
38a9643ea8Slogwang * All rights reserved.
39a9643ea8Slogwang *
40a9643ea8Slogwang * Authors: Avadis Tevanian, Jr., Michael Wayne Young
41a9643ea8Slogwang *
42a9643ea8Slogwang * Permission to use, copy, modify and distribute this software and
43a9643ea8Slogwang * its documentation is hereby granted, provided that both the copyright
44a9643ea8Slogwang * notice and this permission notice appear in all copies of the
45a9643ea8Slogwang * software, derivative works or modified versions, and any portions
46a9643ea8Slogwang * thereof, and that both notices appear in supporting documentation.
47a9643ea8Slogwang *
48a9643ea8Slogwang * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
49a9643ea8Slogwang * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
50a9643ea8Slogwang * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
51a9643ea8Slogwang *
52a9643ea8Slogwang * Carnegie Mellon requests users of this software to return to
53a9643ea8Slogwang *
54a9643ea8Slogwang * Software Distribution Coordinator or [email protected]
55a9643ea8Slogwang * School of Computer Science
56a9643ea8Slogwang * Carnegie Mellon University
57a9643ea8Slogwang * Pittsburgh PA 15213-3890
58a9643ea8Slogwang *
59a9643ea8Slogwang * any improvements or extensions that they make and grant Carnegie the
60a9643ea8Slogwang * rights to redistribute these changes.
61a9643ea8Slogwang */
62a9643ea8Slogwang
63a9643ea8Slogwang /*
64a9643ea8Slogwang * Virtual memory object module.
65a9643ea8Slogwang */
66a9643ea8Slogwang
67a9643ea8Slogwang #include <sys/cdefs.h>
68a9643ea8Slogwang __FBSDID("$FreeBSD$");
69a9643ea8Slogwang
70a9643ea8Slogwang #include "opt_vm.h"
71a9643ea8Slogwang
72a9643ea8Slogwang #include <sys/param.h>
73a9643ea8Slogwang #include <sys/systm.h>
74*22ce4affSfengbojiang #include <sys/blockcount.h>
75*22ce4affSfengbojiang #include <sys/cpuset.h>
76a9643ea8Slogwang #include <sys/lock.h>
77a9643ea8Slogwang #include <sys/mman.h>
78a9643ea8Slogwang #include <sys/mount.h>
79a9643ea8Slogwang #include <sys/kernel.h>
80*22ce4affSfengbojiang #include <sys/pctrie.h>
81a9643ea8Slogwang #include <sys/sysctl.h>
82a9643ea8Slogwang #include <sys/mutex.h>
83a9643ea8Slogwang #include <sys/proc.h> /* for curproc, pageproc */
84*22ce4affSfengbojiang #include <sys/refcount.h>
85a9643ea8Slogwang #include <sys/socket.h>
86a9643ea8Slogwang #include <sys/resourcevar.h>
87*22ce4affSfengbojiang #include <sys/refcount.h>
88a9643ea8Slogwang #include <sys/rwlock.h>
89a9643ea8Slogwang #include <sys/user.h>
90a9643ea8Slogwang #include <sys/vnode.h>
91a9643ea8Slogwang #include <sys/vmmeter.h>
92a9643ea8Slogwang #include <sys/sx.h>
93a9643ea8Slogwang
94a9643ea8Slogwang #include <vm/vm.h>
95a9643ea8Slogwang #include <vm/vm_param.h>
96a9643ea8Slogwang #include <vm/pmap.h>
97a9643ea8Slogwang #include <vm/vm_map.h>
98a9643ea8Slogwang #include <vm/vm_object.h>
99a9643ea8Slogwang #include <vm/vm_page.h>
100a9643ea8Slogwang #include <vm/vm_pageout.h>
101a9643ea8Slogwang #include <vm/vm_pager.h>
102*22ce4affSfengbojiang #include <vm/vm_phys.h>
103*22ce4affSfengbojiang #include <vm/vm_pagequeue.h>
104a9643ea8Slogwang #include <vm/swap_pager.h>
105a9643ea8Slogwang #include <vm/vm_kern.h>
106a9643ea8Slogwang #include <vm/vm_extern.h>
107a9643ea8Slogwang #include <vm/vm_radix.h>
108a9643ea8Slogwang #include <vm/vm_reserv.h>
109a9643ea8Slogwang #include <vm/uma.h>
110a9643ea8Slogwang
111a9643ea8Slogwang static int old_msync;
112a9643ea8Slogwang SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
113a9643ea8Slogwang "Use old (insecure) msync behavior");
114a9643ea8Slogwang
115a9643ea8Slogwang static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
116*22ce4affSfengbojiang int pagerflags, int flags, boolean_t *allclean,
117a9643ea8Slogwang boolean_t *eio);
118a9643ea8Slogwang static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
119*22ce4affSfengbojiang boolean_t *allclean);
120*22ce4affSfengbojiang static void vm_object_backing_remove(vm_object_t object);
121a9643ea8Slogwang
122a9643ea8Slogwang /*
123a9643ea8Slogwang * Virtual memory objects maintain the actual data
124a9643ea8Slogwang * associated with allocated virtual memory. A given
125a9643ea8Slogwang * page of memory exists within exactly one object.
126a9643ea8Slogwang *
127a9643ea8Slogwang * An object is only deallocated when all "references"
128a9643ea8Slogwang * are given up. Only one "reference" to a given
129a9643ea8Slogwang * region of an object should be writeable.
130a9643ea8Slogwang *
131a9643ea8Slogwang * Associated with each object is a list of all resident
132a9643ea8Slogwang * memory pages belonging to that object; this list is
133a9643ea8Slogwang * maintained by the "vm_page" module, and locked by the object's
134a9643ea8Slogwang * lock.
135a9643ea8Slogwang *
136a9643ea8Slogwang * Each object also records a "pager" routine which is
137a9643ea8Slogwang * used to retrieve (and store) pages to the proper backing
138a9643ea8Slogwang * storage. In addition, objects may be backed by other
139a9643ea8Slogwang * objects from which they were virtual-copied.
140a9643ea8Slogwang *
141a9643ea8Slogwang * The only items within the object structure which are
142a9643ea8Slogwang * modified after time of creation are:
143a9643ea8Slogwang * reference count locked by object's lock
144a9643ea8Slogwang * pager routine locked by object's lock
145a9643ea8Slogwang *
146a9643ea8Slogwang */
147a9643ea8Slogwang
148a9643ea8Slogwang struct object_q vm_object_list;
149a9643ea8Slogwang struct mtx vm_object_list_mtx; /* lock for object list and count */
150a9643ea8Slogwang
151a9643ea8Slogwang struct vm_object kernel_object_store;
152a9643ea8Slogwang
153*22ce4affSfengbojiang static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
154a9643ea8Slogwang "VM object stats");
155a9643ea8Slogwang
156*22ce4affSfengbojiang static COUNTER_U64_DEFINE_EARLY(object_collapses);
157*22ce4affSfengbojiang SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
158*22ce4affSfengbojiang &object_collapses,
159*22ce4affSfengbojiang "VM object collapses");
160a9643ea8Slogwang
161*22ce4affSfengbojiang static COUNTER_U64_DEFINE_EARLY(object_bypasses);
162*22ce4affSfengbojiang SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
163*22ce4affSfengbojiang &object_bypasses,
164*22ce4affSfengbojiang "VM object bypasses");
165*22ce4affSfengbojiang
166*22ce4affSfengbojiang static COUNTER_U64_DEFINE_EARLY(object_collapse_waits);
167*22ce4affSfengbojiang SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapse_waits, CTLFLAG_RD,
168*22ce4affSfengbojiang &object_collapse_waits,
169*22ce4affSfengbojiang "Number of sleeps for collapse");
170a9643ea8Slogwang
171a9643ea8Slogwang static uma_zone_t obj_zone;
172a9643ea8Slogwang
173a9643ea8Slogwang static int vm_object_zinit(void *mem, int size, int flags);
174a9643ea8Slogwang
175a9643ea8Slogwang #ifdef INVARIANTS
176a9643ea8Slogwang static void vm_object_zdtor(void *mem, int size, void *arg);
177a9643ea8Slogwang
178a9643ea8Slogwang static void
vm_object_zdtor(void * mem,int size,void * arg)179a9643ea8Slogwang vm_object_zdtor(void *mem, int size, void *arg)
180a9643ea8Slogwang {
181a9643ea8Slogwang vm_object_t object;
182a9643ea8Slogwang
183a9643ea8Slogwang object = (vm_object_t)mem;
184a9643ea8Slogwang KASSERT(object->ref_count == 0,
185a9643ea8Slogwang ("object %p ref_count = %d", object, object->ref_count));
186a9643ea8Slogwang KASSERT(TAILQ_EMPTY(&object->memq),
187a9643ea8Slogwang ("object %p has resident pages in its memq", object));
188a9643ea8Slogwang KASSERT(vm_radix_is_empty(&object->rtree),
189a9643ea8Slogwang ("object %p has resident pages in its trie", object));
190a9643ea8Slogwang #if VM_NRESERVLEVEL > 0
191a9643ea8Slogwang KASSERT(LIST_EMPTY(&object->rvq),
192a9643ea8Slogwang ("object %p has reservations",
193a9643ea8Slogwang object));
194a9643ea8Slogwang #endif
195*22ce4affSfengbojiang KASSERT(!vm_object_busied(object),
196*22ce4affSfengbojiang ("object %p busy = %d", object, blockcount_read(&object->busy)));
197a9643ea8Slogwang KASSERT(object->resident_page_count == 0,
198a9643ea8Slogwang ("object %p resident_page_count = %d",
199a9643ea8Slogwang object, object->resident_page_count));
200a9643ea8Slogwang KASSERT(object->shadow_count == 0,
201a9643ea8Slogwang ("object %p shadow_count = %d",
202a9643ea8Slogwang object, object->shadow_count));
203a9643ea8Slogwang KASSERT(object->type == OBJT_DEAD,
204a9643ea8Slogwang ("object %p has non-dead type %d",
205a9643ea8Slogwang object, object->type));
206a9643ea8Slogwang }
207a9643ea8Slogwang #endif
208a9643ea8Slogwang
209a9643ea8Slogwang static int
vm_object_zinit(void * mem,int size,int flags)210a9643ea8Slogwang vm_object_zinit(void *mem, int size, int flags)
211a9643ea8Slogwang {
212a9643ea8Slogwang vm_object_t object;
213a9643ea8Slogwang
214a9643ea8Slogwang object = (vm_object_t)mem;
215a9643ea8Slogwang rw_init_flags(&object->lock, "vm object", RW_DUPOK | RW_NEW);
216a9643ea8Slogwang
217a9643ea8Slogwang /* These are true for any object that has been freed */
218a9643ea8Slogwang object->type = OBJT_DEAD;
219*22ce4affSfengbojiang vm_radix_init(&object->rtree);
220*22ce4affSfengbojiang refcount_init(&object->ref_count, 0);
221*22ce4affSfengbojiang blockcount_init(&object->paging_in_progress);
222*22ce4affSfengbojiang blockcount_init(&object->busy);
223a9643ea8Slogwang object->resident_page_count = 0;
224a9643ea8Slogwang object->shadow_count = 0;
225*22ce4affSfengbojiang object->flags = OBJ_DEAD;
226a9643ea8Slogwang
227a9643ea8Slogwang mtx_lock(&vm_object_list_mtx);
228a9643ea8Slogwang TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
229a9643ea8Slogwang mtx_unlock(&vm_object_list_mtx);
230a9643ea8Slogwang return (0);
231a9643ea8Slogwang }
232a9643ea8Slogwang
233a9643ea8Slogwang static void
_vm_object_allocate(objtype_t type,vm_pindex_t size,u_short flags,vm_object_t object,void * handle)234*22ce4affSfengbojiang _vm_object_allocate(objtype_t type, vm_pindex_t size, u_short flags,
235*22ce4affSfengbojiang vm_object_t object, void *handle)
236a9643ea8Slogwang {
237a9643ea8Slogwang
238a9643ea8Slogwang TAILQ_INIT(&object->memq);
239a9643ea8Slogwang LIST_INIT(&object->shadow_head);
240a9643ea8Slogwang
241a9643ea8Slogwang object->type = type;
242*22ce4affSfengbojiang if (type == OBJT_SWAP)
243*22ce4affSfengbojiang pctrie_init(&object->un_pager.swp.swp_blks);
244*22ce4affSfengbojiang
245*22ce4affSfengbojiang /*
246*22ce4affSfengbojiang * Ensure that swap_pager_swapoff() iteration over object_list
247*22ce4affSfengbojiang * sees up to date type and pctrie head if it observed
248*22ce4affSfengbojiang * non-dead object.
249*22ce4affSfengbojiang */
250*22ce4affSfengbojiang atomic_thread_fence_rel();
251*22ce4affSfengbojiang
252*22ce4affSfengbojiang object->pg_color = 0;
253*22ce4affSfengbojiang object->flags = flags;
254a9643ea8Slogwang object->size = size;
255*22ce4affSfengbojiang object->domain.dr_policy = NULL;
256a9643ea8Slogwang object->generation = 1;
257*22ce4affSfengbojiang object->cleangeneration = 1;
258*22ce4affSfengbojiang refcount_init(&object->ref_count, 1);
259a9643ea8Slogwang object->memattr = VM_MEMATTR_DEFAULT;
260a9643ea8Slogwang object->cred = NULL;
261a9643ea8Slogwang object->charge = 0;
262*22ce4affSfengbojiang object->handle = handle;
263a9643ea8Slogwang object->backing_object = NULL;
264a9643ea8Slogwang object->backing_object_offset = (vm_ooffset_t) 0;
265a9643ea8Slogwang #if VM_NRESERVLEVEL > 0
266a9643ea8Slogwang LIST_INIT(&object->rvq);
267a9643ea8Slogwang #endif
268a9643ea8Slogwang umtx_shm_object_init(object);
269a9643ea8Slogwang }
270a9643ea8Slogwang
271a9643ea8Slogwang /*
272a9643ea8Slogwang * vm_object_init:
273a9643ea8Slogwang *
274a9643ea8Slogwang * Initialize the VM objects module.
275a9643ea8Slogwang */
276a9643ea8Slogwang void
vm_object_init(void)277a9643ea8Slogwang vm_object_init(void)
278a9643ea8Slogwang {
279a9643ea8Slogwang TAILQ_INIT(&vm_object_list);
280a9643ea8Slogwang mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
281a9643ea8Slogwang
282a9643ea8Slogwang rw_init(&kernel_object->lock, "kernel vm object");
283*22ce4affSfengbojiang _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS -
284*22ce4affSfengbojiang VM_MIN_KERNEL_ADDRESS), OBJ_UNMANAGED, kernel_object, NULL);
285a9643ea8Slogwang #if VM_NRESERVLEVEL > 0
286a9643ea8Slogwang kernel_object->flags |= OBJ_COLORED;
287a9643ea8Slogwang kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
288a9643ea8Slogwang #endif
289*22ce4affSfengbojiang kernel_object->un_pager.phys.ops = &default_phys_pg_ops;
290a9643ea8Slogwang
291a9643ea8Slogwang /*
292a9643ea8Slogwang * The lock portion of struct vm_object must be type stable due
293a9643ea8Slogwang * to vm_pageout_fallback_object_lock locking a vm object
294a9643ea8Slogwang * without holding any references to it.
295*22ce4affSfengbojiang *
296*22ce4affSfengbojiang * paging_in_progress is valid always. Lockless references to
297*22ce4affSfengbojiang * the objects may acquire pip and then check OBJ_DEAD.
298a9643ea8Slogwang */
299a9643ea8Slogwang obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
300a9643ea8Slogwang #ifdef INVARIANTS
301a9643ea8Slogwang vm_object_zdtor,
302a9643ea8Slogwang #else
303a9643ea8Slogwang NULL,
304a9643ea8Slogwang #endif
305a9643ea8Slogwang vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
306a9643ea8Slogwang
307*22ce4affSfengbojiang vm_radix_zinit();
308a9643ea8Slogwang }
309a9643ea8Slogwang
310a9643ea8Slogwang void
vm_object_clear_flag(vm_object_t object,u_short bits)311a9643ea8Slogwang vm_object_clear_flag(vm_object_t object, u_short bits)
312a9643ea8Slogwang {
313a9643ea8Slogwang
314a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
315a9643ea8Slogwang object->flags &= ~bits;
316a9643ea8Slogwang }
317a9643ea8Slogwang
318a9643ea8Slogwang /*
319a9643ea8Slogwang * Sets the default memory attribute for the specified object. Pages
320a9643ea8Slogwang * that are allocated to this object are by default assigned this memory
321a9643ea8Slogwang * attribute.
322a9643ea8Slogwang *
323a9643ea8Slogwang * Presently, this function must be called before any pages are allocated
324a9643ea8Slogwang * to the object. In the future, this requirement may be relaxed for
325a9643ea8Slogwang * "default" and "swap" objects.
326a9643ea8Slogwang */
327a9643ea8Slogwang int
vm_object_set_memattr(vm_object_t object,vm_memattr_t memattr)328a9643ea8Slogwang vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr)
329a9643ea8Slogwang {
330a9643ea8Slogwang
331a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
332a9643ea8Slogwang switch (object->type) {
333a9643ea8Slogwang case OBJT_DEFAULT:
334a9643ea8Slogwang case OBJT_DEVICE:
335a9643ea8Slogwang case OBJT_MGTDEVICE:
336a9643ea8Slogwang case OBJT_PHYS:
337a9643ea8Slogwang case OBJT_SG:
338a9643ea8Slogwang case OBJT_SWAP:
339a9643ea8Slogwang case OBJT_VNODE:
340a9643ea8Slogwang if (!TAILQ_EMPTY(&object->memq))
341a9643ea8Slogwang return (KERN_FAILURE);
342a9643ea8Slogwang break;
343a9643ea8Slogwang case OBJT_DEAD:
344a9643ea8Slogwang return (KERN_INVALID_ARGUMENT);
345a9643ea8Slogwang default:
346a9643ea8Slogwang panic("vm_object_set_memattr: object %p is of undefined type",
347a9643ea8Slogwang object);
348a9643ea8Slogwang }
349a9643ea8Slogwang object->memattr = memattr;
350a9643ea8Slogwang return (KERN_SUCCESS);
351a9643ea8Slogwang }
352a9643ea8Slogwang
353a9643ea8Slogwang void
vm_object_pip_add(vm_object_t object,short i)354a9643ea8Slogwang vm_object_pip_add(vm_object_t object, short i)
355a9643ea8Slogwang {
356a9643ea8Slogwang
357*22ce4affSfengbojiang if (i > 0)
358*22ce4affSfengbojiang blockcount_acquire(&object->paging_in_progress, i);
359a9643ea8Slogwang }
360a9643ea8Slogwang
361a9643ea8Slogwang void
vm_object_pip_wakeup(vm_object_t object)362a9643ea8Slogwang vm_object_pip_wakeup(vm_object_t object)
363a9643ea8Slogwang {
364a9643ea8Slogwang
365*22ce4affSfengbojiang vm_object_pip_wakeupn(object, 1);
366a9643ea8Slogwang }
367a9643ea8Slogwang
368a9643ea8Slogwang void
vm_object_pip_wakeupn(vm_object_t object,short i)369a9643ea8Slogwang vm_object_pip_wakeupn(vm_object_t object, short i)
370a9643ea8Slogwang {
371a9643ea8Slogwang
372*22ce4affSfengbojiang if (i > 0)
373*22ce4affSfengbojiang blockcount_release(&object->paging_in_progress, i);
374a9643ea8Slogwang }
375*22ce4affSfengbojiang
376*22ce4affSfengbojiang /*
377*22ce4affSfengbojiang * Atomically drop the object lock and wait for pip to drain. This protects
378*22ce4affSfengbojiang * from sleep/wakeup races due to identity changes. The lock is not re-acquired
379*22ce4affSfengbojiang * on return.
380*22ce4affSfengbojiang */
381*22ce4affSfengbojiang static void
vm_object_pip_sleep(vm_object_t object,const char * waitid)382*22ce4affSfengbojiang vm_object_pip_sleep(vm_object_t object, const char *waitid)
383*22ce4affSfengbojiang {
384*22ce4affSfengbojiang
385*22ce4affSfengbojiang (void)blockcount_sleep(&object->paging_in_progress, &object->lock,
386*22ce4affSfengbojiang waitid, PVM | PDROP);
387a9643ea8Slogwang }
388a9643ea8Slogwang
389a9643ea8Slogwang void
vm_object_pip_wait(vm_object_t object,const char * waitid)390*22ce4affSfengbojiang vm_object_pip_wait(vm_object_t object, const char *waitid)
391a9643ea8Slogwang {
392a9643ea8Slogwang
393a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
394*22ce4affSfengbojiang
395*22ce4affSfengbojiang blockcount_wait(&object->paging_in_progress, &object->lock, waitid,
396*22ce4affSfengbojiang PVM);
397a9643ea8Slogwang }
398*22ce4affSfengbojiang
399*22ce4affSfengbojiang void
vm_object_pip_wait_unlocked(vm_object_t object,const char * waitid)400*22ce4affSfengbojiang vm_object_pip_wait_unlocked(vm_object_t object, const char *waitid)
401*22ce4affSfengbojiang {
402*22ce4affSfengbojiang
403*22ce4affSfengbojiang VM_OBJECT_ASSERT_UNLOCKED(object);
404*22ce4affSfengbojiang
405*22ce4affSfengbojiang blockcount_wait(&object->paging_in_progress, NULL, waitid, PVM);
406a9643ea8Slogwang }
407a9643ea8Slogwang
408a9643ea8Slogwang /*
409a9643ea8Slogwang * vm_object_allocate:
410a9643ea8Slogwang *
411a9643ea8Slogwang * Returns a new object with the given size.
412a9643ea8Slogwang */
413a9643ea8Slogwang vm_object_t
vm_object_allocate(objtype_t type,vm_pindex_t size)414a9643ea8Slogwang vm_object_allocate(objtype_t type, vm_pindex_t size)
415a9643ea8Slogwang {
416a9643ea8Slogwang vm_object_t object;
417*22ce4affSfengbojiang u_short flags;
418a9643ea8Slogwang
419*22ce4affSfengbojiang switch (type) {
420*22ce4affSfengbojiang case OBJT_DEAD:
421*22ce4affSfengbojiang panic("vm_object_allocate: can't create OBJT_DEAD");
422*22ce4affSfengbojiang case OBJT_DEFAULT:
423*22ce4affSfengbojiang case OBJT_SWAP:
424*22ce4affSfengbojiang flags = OBJ_COLORED;
425*22ce4affSfengbojiang break;
426*22ce4affSfengbojiang case OBJT_DEVICE:
427*22ce4affSfengbojiang case OBJT_SG:
428*22ce4affSfengbojiang flags = OBJ_FICTITIOUS | OBJ_UNMANAGED;
429*22ce4affSfengbojiang break;
430*22ce4affSfengbojiang case OBJT_MGTDEVICE:
431*22ce4affSfengbojiang flags = OBJ_FICTITIOUS;
432*22ce4affSfengbojiang break;
433*22ce4affSfengbojiang case OBJT_PHYS:
434*22ce4affSfengbojiang flags = OBJ_UNMANAGED;
435*22ce4affSfengbojiang break;
436*22ce4affSfengbojiang case OBJT_VNODE:
437*22ce4affSfengbojiang flags = 0;
438*22ce4affSfengbojiang break;
439*22ce4affSfengbojiang default:
440*22ce4affSfengbojiang panic("vm_object_allocate: type %d is undefined", type);
441*22ce4affSfengbojiang }
442a9643ea8Slogwang object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
443*22ce4affSfengbojiang _vm_object_allocate(type, size, flags, object, NULL);
444*22ce4affSfengbojiang
445a9643ea8Slogwang return (object);
446a9643ea8Slogwang }
447a9643ea8Slogwang
448*22ce4affSfengbojiang /*
449*22ce4affSfengbojiang * vm_object_allocate_anon:
450*22ce4affSfengbojiang *
451*22ce4affSfengbojiang * Returns a new default object of the given size and marked as
452*22ce4affSfengbojiang * anonymous memory for special split/collapse handling. Color
453*22ce4affSfengbojiang * to be initialized by the caller.
454*22ce4affSfengbojiang */
455*22ce4affSfengbojiang vm_object_t
vm_object_allocate_anon(vm_pindex_t size,vm_object_t backing_object,struct ucred * cred,vm_size_t charge)456*22ce4affSfengbojiang vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object,
457*22ce4affSfengbojiang struct ucred *cred, vm_size_t charge)
458*22ce4affSfengbojiang {
459*22ce4affSfengbojiang vm_object_t handle, object;
460*22ce4affSfengbojiang
461*22ce4affSfengbojiang if (backing_object == NULL)
462*22ce4affSfengbojiang handle = NULL;
463*22ce4affSfengbojiang else if ((backing_object->flags & OBJ_ANON) != 0)
464*22ce4affSfengbojiang handle = backing_object->handle;
465*22ce4affSfengbojiang else
466*22ce4affSfengbojiang handle = backing_object;
467*22ce4affSfengbojiang object = uma_zalloc(obj_zone, M_WAITOK);
468*22ce4affSfengbojiang _vm_object_allocate(OBJT_DEFAULT, size, OBJ_ANON | OBJ_ONEMAPPING,
469*22ce4affSfengbojiang object, handle);
470*22ce4affSfengbojiang object->cred = cred;
471*22ce4affSfengbojiang object->charge = cred != NULL ? charge : 0;
472*22ce4affSfengbojiang return (object);
473*22ce4affSfengbojiang }
474*22ce4affSfengbojiang
475*22ce4affSfengbojiang static void
vm_object_reference_vnode(vm_object_t object)476*22ce4affSfengbojiang vm_object_reference_vnode(vm_object_t object)
477*22ce4affSfengbojiang {
478*22ce4affSfengbojiang u_int old;
479*22ce4affSfengbojiang
480*22ce4affSfengbojiang /*
481*22ce4affSfengbojiang * vnode objects need the lock for the first reference
482*22ce4affSfengbojiang * to serialize with vnode_object_deallocate().
483*22ce4affSfengbojiang */
484*22ce4affSfengbojiang if (!refcount_acquire_if_gt(&object->ref_count, 0)) {
485*22ce4affSfengbojiang VM_OBJECT_RLOCK(object);
486*22ce4affSfengbojiang old = refcount_acquire(&object->ref_count);
487*22ce4affSfengbojiang if (object->type == OBJT_VNODE && old == 0)
488*22ce4affSfengbojiang vref(object->handle);
489*22ce4affSfengbojiang VM_OBJECT_RUNLOCK(object);
490*22ce4affSfengbojiang }
491*22ce4affSfengbojiang }
492a9643ea8Slogwang
493a9643ea8Slogwang /*
494a9643ea8Slogwang * vm_object_reference:
495a9643ea8Slogwang *
496*22ce4affSfengbojiang * Acquires a reference to the given object.
497a9643ea8Slogwang */
498a9643ea8Slogwang void
vm_object_reference(vm_object_t object)499a9643ea8Slogwang vm_object_reference(vm_object_t object)
500a9643ea8Slogwang {
501*22ce4affSfengbojiang
502a9643ea8Slogwang if (object == NULL)
503a9643ea8Slogwang return;
504*22ce4affSfengbojiang
505*22ce4affSfengbojiang if (object->type == OBJT_VNODE)
506*22ce4affSfengbojiang vm_object_reference_vnode(object);
507*22ce4affSfengbojiang else
508*22ce4affSfengbojiang refcount_acquire(&object->ref_count);
509*22ce4affSfengbojiang KASSERT((object->flags & OBJ_DEAD) == 0,
510*22ce4affSfengbojiang ("vm_object_reference: Referenced dead object."));
511a9643ea8Slogwang }
512a9643ea8Slogwang
513a9643ea8Slogwang /*
514a9643ea8Slogwang * vm_object_reference_locked:
515a9643ea8Slogwang *
516a9643ea8Slogwang * Gets another reference to the given object.
517a9643ea8Slogwang *
518a9643ea8Slogwang * The object must be locked.
519a9643ea8Slogwang */
520a9643ea8Slogwang void
vm_object_reference_locked(vm_object_t object)521a9643ea8Slogwang vm_object_reference_locked(vm_object_t object)
522a9643ea8Slogwang {
523*22ce4affSfengbojiang u_int old;
524a9643ea8Slogwang
525*22ce4affSfengbojiang VM_OBJECT_ASSERT_LOCKED(object);
526*22ce4affSfengbojiang old = refcount_acquire(&object->ref_count);
527*22ce4affSfengbojiang if (object->type == OBJT_VNODE && old == 0)
528*22ce4affSfengbojiang vref(object->handle);
529*22ce4affSfengbojiang KASSERT((object->flags & OBJ_DEAD) == 0,
530*22ce4affSfengbojiang ("vm_object_reference: Referenced dead object."));
531a9643ea8Slogwang }
532a9643ea8Slogwang
533a9643ea8Slogwang /*
534a9643ea8Slogwang * Handle deallocating an object of type OBJT_VNODE.
535a9643ea8Slogwang */
536a9643ea8Slogwang static void
vm_object_deallocate_vnode(vm_object_t object)537*22ce4affSfengbojiang vm_object_deallocate_vnode(vm_object_t object)
538a9643ea8Slogwang {
539a9643ea8Slogwang struct vnode *vp = (struct vnode *) object->handle;
540*22ce4affSfengbojiang bool last;
541a9643ea8Slogwang
542a9643ea8Slogwang KASSERT(object->type == OBJT_VNODE,
543*22ce4affSfengbojiang ("vm_object_deallocate_vnode: not a vnode object"));
544*22ce4affSfengbojiang KASSERT(vp != NULL, ("vm_object_deallocate_vnode: missing vp"));
545a9643ea8Slogwang
546*22ce4affSfengbojiang /* Object lock to protect handle lookup. */
547*22ce4affSfengbojiang last = refcount_release(&object->ref_count);
548*22ce4affSfengbojiang VM_OBJECT_RUNLOCK(object);
549*22ce4affSfengbojiang
550*22ce4affSfengbojiang if (!last)
551*22ce4affSfengbojiang return;
552*22ce4affSfengbojiang
553*22ce4affSfengbojiang if (!umtx_shm_vnobj_persistent)
554a9643ea8Slogwang umtx_shm_object_terminated(object);
555a9643ea8Slogwang
556a9643ea8Slogwang /* vrele may need the vnode lock. */
557a9643ea8Slogwang vrele(vp);
558*22ce4affSfengbojiang }
559*22ce4affSfengbojiang
560*22ce4affSfengbojiang /*
561*22ce4affSfengbojiang * We dropped a reference on an object and discovered that it had a
562*22ce4affSfengbojiang * single remaining shadow. This is a sibling of the reference we
563*22ce4affSfengbojiang * dropped. Attempt to collapse the sibling and backing object.
564*22ce4affSfengbojiang */
565*22ce4affSfengbojiang static vm_object_t
vm_object_deallocate_anon(vm_object_t backing_object)566*22ce4affSfengbojiang vm_object_deallocate_anon(vm_object_t backing_object)
567*22ce4affSfengbojiang {
568*22ce4affSfengbojiang vm_object_t object;
569*22ce4affSfengbojiang
570*22ce4affSfengbojiang /* Fetch the final shadow. */
571*22ce4affSfengbojiang object = LIST_FIRST(&backing_object->shadow_head);
572*22ce4affSfengbojiang KASSERT(object != NULL && backing_object->shadow_count == 1,
573*22ce4affSfengbojiang ("vm_object_anon_deallocate: ref_count: %d, shadow_count: %d",
574*22ce4affSfengbojiang backing_object->ref_count, backing_object->shadow_count));
575*22ce4affSfengbojiang KASSERT((object->flags & (OBJ_TMPFS_NODE | OBJ_ANON)) == OBJ_ANON,
576*22ce4affSfengbojiang ("invalid shadow object %p", object));
577*22ce4affSfengbojiang
578*22ce4affSfengbojiang if (!VM_OBJECT_TRYWLOCK(object)) {
579*22ce4affSfengbojiang /*
580*22ce4affSfengbojiang * Prevent object from disappearing since we do not have a
581*22ce4affSfengbojiang * reference.
582*22ce4affSfengbojiang */
583*22ce4affSfengbojiang vm_object_pip_add(object, 1);
584*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(backing_object);
585a9643ea8Slogwang VM_OBJECT_WLOCK(object);
586*22ce4affSfengbojiang vm_object_pip_wakeup(object);
587*22ce4affSfengbojiang } else
588*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(backing_object);
589*22ce4affSfengbojiang
590*22ce4affSfengbojiang /*
591*22ce4affSfengbojiang * Check for a collapse/terminate race with the last reference holder.
592*22ce4affSfengbojiang */
593*22ce4affSfengbojiang if ((object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) != 0 ||
594*22ce4affSfengbojiang !refcount_acquire_if_not_zero(&object->ref_count)) {
595a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
596*22ce4affSfengbojiang return (NULL);
597*22ce4affSfengbojiang }
598*22ce4affSfengbojiang backing_object = object->backing_object;
599*22ce4affSfengbojiang if (backing_object != NULL && (backing_object->flags & OBJ_ANON) != 0)
600*22ce4affSfengbojiang vm_object_collapse(object);
601a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
602*22ce4affSfengbojiang
603*22ce4affSfengbojiang return (object);
604a9643ea8Slogwang }
605a9643ea8Slogwang
606a9643ea8Slogwang /*
607a9643ea8Slogwang * vm_object_deallocate:
608a9643ea8Slogwang *
609a9643ea8Slogwang * Release a reference to the specified object,
610a9643ea8Slogwang * gained either through a vm_object_allocate
611a9643ea8Slogwang * or a vm_object_reference call. When all references
612a9643ea8Slogwang * are gone, storage associated with this object
613a9643ea8Slogwang * may be relinquished.
614a9643ea8Slogwang *
615a9643ea8Slogwang * No object may be locked.
616a9643ea8Slogwang */
617a9643ea8Slogwang void
vm_object_deallocate(vm_object_t object)618a9643ea8Slogwang vm_object_deallocate(vm_object_t object)
619a9643ea8Slogwang {
620a9643ea8Slogwang vm_object_t temp;
621*22ce4affSfengbojiang bool released;
622a9643ea8Slogwang
623a9643ea8Slogwang while (object != NULL) {
624a9643ea8Slogwang /*
625a9643ea8Slogwang * If the reference count goes to 0 we start calling
626*22ce4affSfengbojiang * vm_object_terminate() on the object chain. A ref count
627*22ce4affSfengbojiang * of 1 may be a special case depending on the shadow count
628*22ce4affSfengbojiang * being 0 or 1. These cases require a write lock on the
629*22ce4affSfengbojiang * object.
630a9643ea8Slogwang */
631*22ce4affSfengbojiang if ((object->flags & OBJ_ANON) == 0)
632*22ce4affSfengbojiang released = refcount_release_if_gt(&object->ref_count, 1);
633*22ce4affSfengbojiang else
634*22ce4affSfengbojiang released = refcount_release_if_gt(&object->ref_count, 2);
635*22ce4affSfengbojiang if (released)
636a9643ea8Slogwang return;
637a9643ea8Slogwang
638*22ce4affSfengbojiang if (object->type == OBJT_VNODE) {
639*22ce4affSfengbojiang VM_OBJECT_RLOCK(object);
640*22ce4affSfengbojiang if (object->type == OBJT_VNODE) {
641*22ce4affSfengbojiang vm_object_deallocate_vnode(object);
642*22ce4affSfengbojiang return;
643*22ce4affSfengbojiang }
644*22ce4affSfengbojiang VM_OBJECT_RUNLOCK(object);
645*22ce4affSfengbojiang }
646*22ce4affSfengbojiang
647*22ce4affSfengbojiang VM_OBJECT_WLOCK(object);
648*22ce4affSfengbojiang KASSERT(object->ref_count > 0,
649*22ce4affSfengbojiang ("vm_object_deallocate: object deallocated too many times: %d",
650*22ce4affSfengbojiang object->type));
651*22ce4affSfengbojiang
652a9643ea8Slogwang /*
653*22ce4affSfengbojiang * If this is not the final reference to an anonymous
654*22ce4affSfengbojiang * object we may need to collapse the shadow chain.
655a9643ea8Slogwang */
656*22ce4affSfengbojiang if (!refcount_release(&object->ref_count)) {
657*22ce4affSfengbojiang if (object->ref_count > 1 ||
658*22ce4affSfengbojiang object->shadow_count == 0) {
659*22ce4affSfengbojiang if ((object->flags & OBJ_ANON) != 0 &&
660*22ce4affSfengbojiang object->ref_count == 1)
661*22ce4affSfengbojiang vm_object_set_flag(object,
662*22ce4affSfengbojiang OBJ_ONEMAPPING);
663a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
664*22ce4affSfengbojiang return;
665*22ce4affSfengbojiang }
666*22ce4affSfengbojiang
667*22ce4affSfengbojiang /* Handle collapsing last ref on anonymous objects. */
668*22ce4affSfengbojiang object = vm_object_deallocate_anon(object);
669a9643ea8Slogwang continue;
670a9643ea8Slogwang }
671*22ce4affSfengbojiang
672a9643ea8Slogwang /*
673*22ce4affSfengbojiang * Handle the final reference to an object. We restart
674*22ce4affSfengbojiang * the loop with the backing object to avoid recursion.
675a9643ea8Slogwang */
676a9643ea8Slogwang umtx_shm_object_terminated(object);
677a9643ea8Slogwang temp = object->backing_object;
678a9643ea8Slogwang if (temp != NULL) {
679a9643ea8Slogwang KASSERT((object->flags & OBJ_TMPFS_NODE) == 0,
680a9643ea8Slogwang ("shadowed tmpfs v_object 2 %p", object));
681*22ce4affSfengbojiang vm_object_backing_remove(object);
682a9643ea8Slogwang }
683*22ce4affSfengbojiang
684*22ce4affSfengbojiang KASSERT((object->flags & OBJ_DEAD) == 0,
685*22ce4affSfengbojiang ("vm_object_deallocate: Terminating dead object."));
686*22ce4affSfengbojiang vm_object_set_flag(object, OBJ_DEAD);
687a9643ea8Slogwang vm_object_terminate(object);
688a9643ea8Slogwang object = temp;
689a9643ea8Slogwang }
690a9643ea8Slogwang }
691a9643ea8Slogwang
692a9643ea8Slogwang /*
693a9643ea8Slogwang * vm_object_destroy removes the object from the global object list
694a9643ea8Slogwang * and frees the space for the object.
695a9643ea8Slogwang */
696a9643ea8Slogwang void
vm_object_destroy(vm_object_t object)697a9643ea8Slogwang vm_object_destroy(vm_object_t object)
698a9643ea8Slogwang {
699a9643ea8Slogwang
700a9643ea8Slogwang /*
701a9643ea8Slogwang * Release the allocation charge.
702a9643ea8Slogwang */
703a9643ea8Slogwang if (object->cred != NULL) {
704a9643ea8Slogwang swap_release_by_cred(object->charge, object->cred);
705a9643ea8Slogwang object->charge = 0;
706a9643ea8Slogwang crfree(object->cred);
707a9643ea8Slogwang object->cred = NULL;
708a9643ea8Slogwang }
709a9643ea8Slogwang
710a9643ea8Slogwang /*
711a9643ea8Slogwang * Free the space for the object.
712a9643ea8Slogwang */
713a9643ea8Slogwang uma_zfree(obj_zone, object);
714a9643ea8Slogwang }
715a9643ea8Slogwang
716*22ce4affSfengbojiang static void
vm_object_backing_remove_locked(vm_object_t object)717*22ce4affSfengbojiang vm_object_backing_remove_locked(vm_object_t object)
718*22ce4affSfengbojiang {
719*22ce4affSfengbojiang vm_object_t backing_object;
720*22ce4affSfengbojiang
721*22ce4affSfengbojiang backing_object = object->backing_object;
722*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(object);
723*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(backing_object);
724*22ce4affSfengbojiang
725*22ce4affSfengbojiang KASSERT((object->flags & OBJ_COLLAPSING) == 0,
726*22ce4affSfengbojiang ("vm_object_backing_remove: Removing collapsing object."));
727*22ce4affSfengbojiang
728*22ce4affSfengbojiang if ((object->flags & OBJ_SHADOWLIST) != 0) {
729*22ce4affSfengbojiang LIST_REMOVE(object, shadow_list);
730*22ce4affSfengbojiang backing_object->shadow_count--;
731*22ce4affSfengbojiang object->flags &= ~OBJ_SHADOWLIST;
732*22ce4affSfengbojiang }
733*22ce4affSfengbojiang object->backing_object = NULL;
734*22ce4affSfengbojiang }
735*22ce4affSfengbojiang
736*22ce4affSfengbojiang static void
vm_object_backing_remove(vm_object_t object)737*22ce4affSfengbojiang vm_object_backing_remove(vm_object_t object)
738*22ce4affSfengbojiang {
739*22ce4affSfengbojiang vm_object_t backing_object;
740*22ce4affSfengbojiang
741*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(object);
742*22ce4affSfengbojiang
743*22ce4affSfengbojiang if ((object->flags & OBJ_SHADOWLIST) != 0) {
744*22ce4affSfengbojiang backing_object = object->backing_object;
745*22ce4affSfengbojiang VM_OBJECT_WLOCK(backing_object);
746*22ce4affSfengbojiang vm_object_backing_remove_locked(object);
747*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(backing_object);
748*22ce4affSfengbojiang } else
749*22ce4affSfengbojiang object->backing_object = NULL;
750*22ce4affSfengbojiang }
751*22ce4affSfengbojiang
752*22ce4affSfengbojiang static void
vm_object_backing_insert_locked(vm_object_t object,vm_object_t backing_object)753*22ce4affSfengbojiang vm_object_backing_insert_locked(vm_object_t object, vm_object_t backing_object)
754*22ce4affSfengbojiang {
755*22ce4affSfengbojiang
756*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(object);
757*22ce4affSfengbojiang
758*22ce4affSfengbojiang if ((backing_object->flags & OBJ_ANON) != 0) {
759*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(backing_object);
760*22ce4affSfengbojiang LIST_INSERT_HEAD(&backing_object->shadow_head, object,
761*22ce4affSfengbojiang shadow_list);
762*22ce4affSfengbojiang backing_object->shadow_count++;
763*22ce4affSfengbojiang object->flags |= OBJ_SHADOWLIST;
764*22ce4affSfengbojiang }
765*22ce4affSfengbojiang object->backing_object = backing_object;
766*22ce4affSfengbojiang }
767*22ce4affSfengbojiang
768*22ce4affSfengbojiang static void
vm_object_backing_insert(vm_object_t object,vm_object_t backing_object)769*22ce4affSfengbojiang vm_object_backing_insert(vm_object_t object, vm_object_t backing_object)
770*22ce4affSfengbojiang {
771*22ce4affSfengbojiang
772*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(object);
773*22ce4affSfengbojiang
774*22ce4affSfengbojiang if ((backing_object->flags & OBJ_ANON) != 0) {
775*22ce4affSfengbojiang VM_OBJECT_WLOCK(backing_object);
776*22ce4affSfengbojiang vm_object_backing_insert_locked(object, backing_object);
777*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(backing_object);
778*22ce4affSfengbojiang } else
779*22ce4affSfengbojiang object->backing_object = backing_object;
780*22ce4affSfengbojiang }
781*22ce4affSfengbojiang
782a9643ea8Slogwang /*
783*22ce4affSfengbojiang * Insert an object into a backing_object's shadow list with an additional
784*22ce4affSfengbojiang * reference to the backing_object added.
785a9643ea8Slogwang */
786*22ce4affSfengbojiang static void
vm_object_backing_insert_ref(vm_object_t object,vm_object_t backing_object)787*22ce4affSfengbojiang vm_object_backing_insert_ref(vm_object_t object, vm_object_t backing_object)
788*22ce4affSfengbojiang {
789*22ce4affSfengbojiang
790*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(object);
791*22ce4affSfengbojiang
792*22ce4affSfengbojiang if ((backing_object->flags & OBJ_ANON) != 0) {
793*22ce4affSfengbojiang VM_OBJECT_WLOCK(backing_object);
794*22ce4affSfengbojiang KASSERT((backing_object->flags & OBJ_DEAD) == 0,
795*22ce4affSfengbojiang ("shadowing dead anonymous object"));
796*22ce4affSfengbojiang vm_object_reference_locked(backing_object);
797*22ce4affSfengbojiang vm_object_backing_insert_locked(object, backing_object);
798*22ce4affSfengbojiang vm_object_clear_flag(backing_object, OBJ_ONEMAPPING);
799*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(backing_object);
800*22ce4affSfengbojiang } else {
801*22ce4affSfengbojiang vm_object_reference(backing_object);
802*22ce4affSfengbojiang object->backing_object = backing_object;
803*22ce4affSfengbojiang }
804*22ce4affSfengbojiang }
805*22ce4affSfengbojiang
806*22ce4affSfengbojiang /*
807*22ce4affSfengbojiang * Transfer a backing reference from backing_object to object.
808*22ce4affSfengbojiang */
809*22ce4affSfengbojiang static void
vm_object_backing_transfer(vm_object_t object,vm_object_t backing_object)810*22ce4affSfengbojiang vm_object_backing_transfer(vm_object_t object, vm_object_t backing_object)
811*22ce4affSfengbojiang {
812*22ce4affSfengbojiang vm_object_t new_backing_object;
813*22ce4affSfengbojiang
814*22ce4affSfengbojiang /*
815*22ce4affSfengbojiang * Note that the reference to backing_object->backing_object
816*22ce4affSfengbojiang * moves from within backing_object to within object.
817*22ce4affSfengbojiang */
818*22ce4affSfengbojiang vm_object_backing_remove_locked(object);
819*22ce4affSfengbojiang new_backing_object = backing_object->backing_object;
820*22ce4affSfengbojiang if (new_backing_object == NULL)
821*22ce4affSfengbojiang return;
822*22ce4affSfengbojiang if ((new_backing_object->flags & OBJ_ANON) != 0) {
823*22ce4affSfengbojiang VM_OBJECT_WLOCK(new_backing_object);
824*22ce4affSfengbojiang vm_object_backing_remove_locked(backing_object);
825*22ce4affSfengbojiang vm_object_backing_insert_locked(object, new_backing_object);
826*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(new_backing_object);
827*22ce4affSfengbojiang } else {
828*22ce4affSfengbojiang object->backing_object = new_backing_object;
829*22ce4affSfengbojiang backing_object->backing_object = NULL;
830*22ce4affSfengbojiang }
831*22ce4affSfengbojiang }
832*22ce4affSfengbojiang
833*22ce4affSfengbojiang /*
834*22ce4affSfengbojiang * Wait for a concurrent collapse to settle.
835*22ce4affSfengbojiang */
836*22ce4affSfengbojiang static void
vm_object_collapse_wait(vm_object_t object)837*22ce4affSfengbojiang vm_object_collapse_wait(vm_object_t object)
838*22ce4affSfengbojiang {
839*22ce4affSfengbojiang
840*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(object);
841*22ce4affSfengbojiang
842*22ce4affSfengbojiang while ((object->flags & OBJ_COLLAPSING) != 0) {
843*22ce4affSfengbojiang vm_object_pip_wait(object, "vmcolwait");
844*22ce4affSfengbojiang counter_u64_add(object_collapse_waits, 1);
845*22ce4affSfengbojiang }
846*22ce4affSfengbojiang }
847*22ce4affSfengbojiang
848*22ce4affSfengbojiang /*
849*22ce4affSfengbojiang * Waits for a backing object to clear a pending collapse and returns
850*22ce4affSfengbojiang * it locked if it is an ANON object.
851*22ce4affSfengbojiang */
852*22ce4affSfengbojiang static vm_object_t
vm_object_backing_collapse_wait(vm_object_t object)853*22ce4affSfengbojiang vm_object_backing_collapse_wait(vm_object_t object)
854*22ce4affSfengbojiang {
855*22ce4affSfengbojiang vm_object_t backing_object;
856*22ce4affSfengbojiang
857*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(object);
858*22ce4affSfengbojiang
859*22ce4affSfengbojiang for (;;) {
860*22ce4affSfengbojiang backing_object = object->backing_object;
861*22ce4affSfengbojiang if (backing_object == NULL ||
862*22ce4affSfengbojiang (backing_object->flags & OBJ_ANON) == 0)
863*22ce4affSfengbojiang return (NULL);
864*22ce4affSfengbojiang VM_OBJECT_WLOCK(backing_object);
865*22ce4affSfengbojiang if ((backing_object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) == 0)
866*22ce4affSfengbojiang break;
867*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(object);
868*22ce4affSfengbojiang vm_object_pip_sleep(backing_object, "vmbckwait");
869*22ce4affSfengbojiang counter_u64_add(object_collapse_waits, 1);
870*22ce4affSfengbojiang VM_OBJECT_WLOCK(object);
871*22ce4affSfengbojiang }
872*22ce4affSfengbojiang return (backing_object);
873*22ce4affSfengbojiang }
874*22ce4affSfengbojiang
875*22ce4affSfengbojiang /*
876*22ce4affSfengbojiang * vm_object_terminate_pages removes any remaining pageable pages
877*22ce4affSfengbojiang * from the object and resets the object to an empty state.
878*22ce4affSfengbojiang */
879*22ce4affSfengbojiang static void
vm_object_terminate_pages(vm_object_t object)880*22ce4affSfengbojiang vm_object_terminate_pages(vm_object_t object)
881a9643ea8Slogwang {
882a9643ea8Slogwang vm_page_t p, p_next;
883a9643ea8Slogwang
884a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
885a9643ea8Slogwang
886a9643ea8Slogwang /*
887a9643ea8Slogwang * Free any remaining pageable pages. This also removes them from the
888a9643ea8Slogwang * paging queues. However, don't free wired pages, just remove them
889a9643ea8Slogwang * from the object. Rather than incrementally removing each page from
890a9643ea8Slogwang * the object, the page and object are reset to any empty state.
891a9643ea8Slogwang */
892a9643ea8Slogwang TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
893a9643ea8Slogwang vm_page_assert_unbusied(p);
894*22ce4affSfengbojiang KASSERT(p->object == object &&
895*22ce4affSfengbojiang (p->ref_count & VPRC_OBJREF) != 0,
896*22ce4affSfengbojiang ("vm_object_terminate_pages: page %p is inconsistent", p));
897*22ce4affSfengbojiang
898a9643ea8Slogwang p->object = NULL;
899*22ce4affSfengbojiang if (vm_page_drop(p, VPRC_OBJREF) == VPRC_OBJREF) {
900*22ce4affSfengbojiang VM_CNT_INC(v_pfree);
901a9643ea8Slogwang vm_page_free(p);
902a9643ea8Slogwang }
903a9643ea8Slogwang }
904*22ce4affSfengbojiang
905a9643ea8Slogwang /*
906a9643ea8Slogwang * If the object contained any pages, then reset it to an empty state.
907a9643ea8Slogwang * None of the object's fields, including "resident_page_count", were
908a9643ea8Slogwang * modified by the preceding loop.
909a9643ea8Slogwang */
910a9643ea8Slogwang if (object->resident_page_count != 0) {
911a9643ea8Slogwang vm_radix_reclaim_allnodes(&object->rtree);
912a9643ea8Slogwang TAILQ_INIT(&object->memq);
913a9643ea8Slogwang object->resident_page_count = 0;
914a9643ea8Slogwang if (object->type == OBJT_VNODE)
915a9643ea8Slogwang vdrop(object->handle);
916a9643ea8Slogwang }
917*22ce4affSfengbojiang }
918*22ce4affSfengbojiang
919*22ce4affSfengbojiang /*
920*22ce4affSfengbojiang * vm_object_terminate actually destroys the specified object, freeing
921*22ce4affSfengbojiang * up all previously used resources.
922*22ce4affSfengbojiang *
923*22ce4affSfengbojiang * The object must be locked.
924*22ce4affSfengbojiang * This routine may block.
925*22ce4affSfengbojiang */
926*22ce4affSfengbojiang void
vm_object_terminate(vm_object_t object)927*22ce4affSfengbojiang vm_object_terminate(vm_object_t object)
928*22ce4affSfengbojiang {
929*22ce4affSfengbojiang
930*22ce4affSfengbojiang VM_OBJECT_ASSERT_WLOCKED(object);
931*22ce4affSfengbojiang KASSERT((object->flags & OBJ_DEAD) != 0,
932*22ce4affSfengbojiang ("terminating non-dead obj %p", object));
933*22ce4affSfengbojiang KASSERT((object->flags & OBJ_COLLAPSING) == 0,
934*22ce4affSfengbojiang ("terminating collapsing obj %p", object));
935*22ce4affSfengbojiang KASSERT(object->backing_object == NULL,
936*22ce4affSfengbojiang ("terminating shadow obj %p", object));
937*22ce4affSfengbojiang
938*22ce4affSfengbojiang /*
939*22ce4affSfengbojiang * Wait for the pageout daemon and other current users to be
940*22ce4affSfengbojiang * done with the object. Note that new paging_in_progress
941*22ce4affSfengbojiang * users can come after this wait, but they must check
942*22ce4affSfengbojiang * OBJ_DEAD flag set (without unlocking the object), and avoid
943*22ce4affSfengbojiang * the object being terminated.
944*22ce4affSfengbojiang */
945*22ce4affSfengbojiang vm_object_pip_wait(object, "objtrm");
946*22ce4affSfengbojiang
947*22ce4affSfengbojiang KASSERT(object->ref_count == 0,
948*22ce4affSfengbojiang ("vm_object_terminate: object with references, ref_count=%d",
949*22ce4affSfengbojiang object->ref_count));
950*22ce4affSfengbojiang
951*22ce4affSfengbojiang if ((object->flags & OBJ_PG_DTOR) == 0)
952*22ce4affSfengbojiang vm_object_terminate_pages(object);
953a9643ea8Slogwang
954a9643ea8Slogwang #if VM_NRESERVLEVEL > 0
955a9643ea8Slogwang if (__predict_false(!LIST_EMPTY(&object->rvq)))
956a9643ea8Slogwang vm_reserv_break_all(object);
957a9643ea8Slogwang #endif
958a9643ea8Slogwang
959a9643ea8Slogwang KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT ||
960a9643ea8Slogwang object->type == OBJT_SWAP,
961a9643ea8Slogwang ("%s: non-swap obj %p has cred", __func__, object));
962a9643ea8Slogwang
963a9643ea8Slogwang /*
964a9643ea8Slogwang * Let the pager know object is dead.
965a9643ea8Slogwang */
966a9643ea8Slogwang vm_pager_deallocate(object);
967a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
968a9643ea8Slogwang
969a9643ea8Slogwang vm_object_destroy(object);
970a9643ea8Slogwang }
971a9643ea8Slogwang
972a9643ea8Slogwang /*
973a9643ea8Slogwang * Make the page read-only so that we can clear the object flags. However, if
974a9643ea8Slogwang * this is a nosync mmap then the object is likely to stay dirty so do not
975a9643ea8Slogwang * mess with the page and do not clear the object flags. Returns TRUE if the
976a9643ea8Slogwang * page should be flushed, and FALSE otherwise.
977a9643ea8Slogwang */
978a9643ea8Slogwang static boolean_t
vm_object_page_remove_write(vm_page_t p,int flags,boolean_t * allclean)979*22ce4affSfengbojiang vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *allclean)
980a9643ea8Slogwang {
981a9643ea8Slogwang
982*22ce4affSfengbojiang vm_page_assert_busied(p);
983*22ce4affSfengbojiang
984a9643ea8Slogwang /*
985a9643ea8Slogwang * If we have been asked to skip nosync pages and this is a
986a9643ea8Slogwang * nosync page, skip it. Note that the object flags were not
987a9643ea8Slogwang * cleared in this case so we do not have to set them.
988a9643ea8Slogwang */
989*22ce4affSfengbojiang if ((flags & OBJPC_NOSYNC) != 0 && (p->a.flags & PGA_NOSYNC) != 0) {
990*22ce4affSfengbojiang *allclean = FALSE;
991a9643ea8Slogwang return (FALSE);
992a9643ea8Slogwang } else {
993a9643ea8Slogwang pmap_remove_write(p);
994a9643ea8Slogwang return (p->dirty != 0);
995a9643ea8Slogwang }
996a9643ea8Slogwang }
997a9643ea8Slogwang
998a9643ea8Slogwang /*
999a9643ea8Slogwang * vm_object_page_clean
1000a9643ea8Slogwang *
1001a9643ea8Slogwang * Clean all dirty pages in the specified range of object. Leaves page
1002a9643ea8Slogwang * on whatever queue it is currently on. If NOSYNC is set then do not
1003*22ce4affSfengbojiang * write out pages with PGA_NOSYNC set (originally comes from MAP_NOSYNC),
1004a9643ea8Slogwang * leaving the object dirty.
1005a9643ea8Slogwang *
1006*22ce4affSfengbojiang * For swap objects backing tmpfs regular files, do not flush anything,
1007*22ce4affSfengbojiang * but remove write protection on the mapped pages to update mtime through
1008*22ce4affSfengbojiang * mmaped writes.
1009*22ce4affSfengbojiang *
1010a9643ea8Slogwang * When stuffing pages asynchronously, allow clustering. XXX we need a
1011a9643ea8Slogwang * synchronous clustering mode implementation.
1012a9643ea8Slogwang *
1013a9643ea8Slogwang * Odd semantics: if start == end, we clean everything.
1014a9643ea8Slogwang *
1015a9643ea8Slogwang * The object must be locked.
1016a9643ea8Slogwang *
1017a9643ea8Slogwang * Returns FALSE if some page from the range was not written, as
1018a9643ea8Slogwang * reported by the pager, and TRUE otherwise.
1019a9643ea8Slogwang */
1020a9643ea8Slogwang boolean_t
vm_object_page_clean(vm_object_t object,vm_ooffset_t start,vm_ooffset_t end,int flags)1021a9643ea8Slogwang vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
1022a9643ea8Slogwang int flags)
1023a9643ea8Slogwang {
1024a9643ea8Slogwang vm_page_t np, p;
1025a9643ea8Slogwang vm_pindex_t pi, tend, tstart;
1026a9643ea8Slogwang int curgeneration, n, pagerflags;
1027*22ce4affSfengbojiang boolean_t eio, res, allclean;
1028a9643ea8Slogwang
1029a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
1030a9643ea8Slogwang
1031*22ce4affSfengbojiang if (!vm_object_mightbedirty(object) || object->resident_page_count == 0)
1032a9643ea8Slogwang return (TRUE);
1033a9643ea8Slogwang
1034a9643ea8Slogwang pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ?
1035a9643ea8Slogwang VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
1036a9643ea8Slogwang pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0;
1037a9643ea8Slogwang
1038a9643ea8Slogwang tstart = OFF_TO_IDX(start);
1039a9643ea8Slogwang tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK);
1040*22ce4affSfengbojiang allclean = tstart == 0 && tend >= object->size;
1041a9643ea8Slogwang res = TRUE;
1042a9643ea8Slogwang
1043a9643ea8Slogwang rescan:
1044a9643ea8Slogwang curgeneration = object->generation;
1045a9643ea8Slogwang
1046a9643ea8Slogwang for (p = vm_page_find_least(object, tstart); p != NULL; p = np) {
1047a9643ea8Slogwang pi = p->pindex;
1048a9643ea8Slogwang if (pi >= tend)
1049a9643ea8Slogwang break;
1050a9643ea8Slogwang np = TAILQ_NEXT(p, listq);
1051*22ce4affSfengbojiang if (vm_page_none_valid(p))
1052a9643ea8Slogwang continue;
1053*22ce4affSfengbojiang if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) {
1054*22ce4affSfengbojiang if (object->generation != curgeneration &&
1055*22ce4affSfengbojiang (flags & OBJPC_SYNC) != 0)
1056a9643ea8Slogwang goto rescan;
1057a9643ea8Slogwang np = vm_page_find_least(object, pi);
1058a9643ea8Slogwang continue;
1059a9643ea8Slogwang }
1060*22ce4affSfengbojiang if (!vm_object_page_remove_write(p, flags, &allclean)) {
1061*22ce4affSfengbojiang vm_page_xunbusy(p);
1062a9643ea8Slogwang continue;
1063*22ce4affSfengbojiang }
1064*22ce4affSfengbojiang if (object->type == OBJT_VNODE) {
1065a9643ea8Slogwang n = vm_object_page_collect_flush(object, p, pagerflags,
1066*22ce4affSfengbojiang flags, &allclean, &eio);
1067a9643ea8Slogwang if (eio) {
1068a9643ea8Slogwang res = FALSE;
1069*22ce4affSfengbojiang allclean = FALSE;
1070a9643ea8Slogwang }
1071*22ce4affSfengbojiang if (object->generation != curgeneration &&
1072*22ce4affSfengbojiang (flags & OBJPC_SYNC) != 0)
1073a9643ea8Slogwang goto rescan;
1074a9643ea8Slogwang
1075a9643ea8Slogwang /*
1076a9643ea8Slogwang * If the VOP_PUTPAGES() did a truncated write, so
1077a9643ea8Slogwang * that even the first page of the run is not fully
1078a9643ea8Slogwang * written, vm_pageout_flush() returns 0 as the run
1079a9643ea8Slogwang * length. Since the condition that caused truncated
1080a9643ea8Slogwang * write may be permanent, e.g. exhausted free space,
1081a9643ea8Slogwang * accepting n == 0 would cause an infinite loop.
1082a9643ea8Slogwang *
1083a9643ea8Slogwang * Forwarding the iterator leaves the unwritten page
1084a9643ea8Slogwang * behind, but there is not much we can do there if
1085a9643ea8Slogwang * filesystem refuses to write it.
1086a9643ea8Slogwang */
1087a9643ea8Slogwang if (n == 0) {
1088a9643ea8Slogwang n = 1;
1089*22ce4affSfengbojiang allclean = FALSE;
1090*22ce4affSfengbojiang }
1091*22ce4affSfengbojiang } else {
1092*22ce4affSfengbojiang n = 1;
1093*22ce4affSfengbojiang vm_page_xunbusy(p);
1094a9643ea8Slogwang }
1095a9643ea8Slogwang np = vm_page_find_least(object, pi + n);
1096a9643ea8Slogwang }
1097a9643ea8Slogwang #if 0
1098a9643ea8Slogwang VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0);
1099a9643ea8Slogwang #endif
1100a9643ea8Slogwang
1101*22ce4affSfengbojiang /*
1102*22ce4affSfengbojiang * Leave updating cleangeneration for tmpfs objects to tmpfs
1103*22ce4affSfengbojiang * scan. It needs to update mtime, which happens for other
1104*22ce4affSfengbojiang * filesystems during page writeouts.
1105*22ce4affSfengbojiang */
1106*22ce4affSfengbojiang if (allclean && object->type == OBJT_VNODE)
1107*22ce4affSfengbojiang object->cleangeneration = curgeneration;
1108a9643ea8Slogwang return (res);
1109a9643ea8Slogwang }
1110a9643ea8Slogwang
1111a9643ea8Slogwang static int
vm_object_page_collect_flush(vm_object_t object,vm_page_t p,int pagerflags,int flags,boolean_t * allclean,boolean_t * eio)1112a9643ea8Slogwang vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
1113*22ce4affSfengbojiang int flags, boolean_t *allclean, boolean_t *eio)
1114a9643ea8Slogwang {
1115a9643ea8Slogwang vm_page_t ma[vm_pageout_page_count], p_first, tp;
1116a9643ea8Slogwang int count, i, mreq, runlen;
1117a9643ea8Slogwang
1118a9643ea8Slogwang vm_page_lock_assert(p, MA_NOTOWNED);
1119*22ce4affSfengbojiang vm_page_assert_xbusied(p);
1120a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
1121a9643ea8Slogwang
1122a9643ea8Slogwang count = 1;
1123a9643ea8Slogwang mreq = 0;
1124a9643ea8Slogwang
1125a9643ea8Slogwang for (tp = p; count < vm_pageout_page_count; count++) {
1126a9643ea8Slogwang tp = vm_page_next(tp);
1127*22ce4affSfengbojiang if (tp == NULL || vm_page_tryxbusy(tp) == 0)
1128a9643ea8Slogwang break;
1129*22ce4affSfengbojiang if (!vm_object_page_remove_write(tp, flags, allclean)) {
1130*22ce4affSfengbojiang vm_page_xunbusy(tp);
1131a9643ea8Slogwang break;
1132a9643ea8Slogwang }
1133*22ce4affSfengbojiang }
1134a9643ea8Slogwang
1135a9643ea8Slogwang for (p_first = p; count < vm_pageout_page_count; count++) {
1136a9643ea8Slogwang tp = vm_page_prev(p_first);
1137*22ce4affSfengbojiang if (tp == NULL || vm_page_tryxbusy(tp) == 0)
1138a9643ea8Slogwang break;
1139*22ce4affSfengbojiang if (!vm_object_page_remove_write(tp, flags, allclean)) {
1140*22ce4affSfengbojiang vm_page_xunbusy(tp);
1141a9643ea8Slogwang break;
1142*22ce4affSfengbojiang }
1143a9643ea8Slogwang p_first = tp;
1144a9643ea8Slogwang mreq++;
1145a9643ea8Slogwang }
1146a9643ea8Slogwang
1147a9643ea8Slogwang for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++)
1148a9643ea8Slogwang ma[i] = tp;
1149a9643ea8Slogwang
1150a9643ea8Slogwang vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio);
1151a9643ea8Slogwang return (runlen);
1152a9643ea8Slogwang }
1153a9643ea8Slogwang
1154a9643ea8Slogwang /*
1155a9643ea8Slogwang * Note that there is absolutely no sense in writing out
1156a9643ea8Slogwang * anonymous objects, so we track down the vnode object
1157a9643ea8Slogwang * to write out.
1158a9643ea8Slogwang * We invalidate (remove) all pages from the address space
1159a9643ea8Slogwang * for semantic correctness.
1160a9643ea8Slogwang *
1161a9643ea8Slogwang * If the backing object is a device object with unmanaged pages, then any
1162a9643ea8Slogwang * mappings to the specified range of pages must be removed before this
1163a9643ea8Slogwang * function is called.
1164a9643ea8Slogwang *
1165a9643ea8Slogwang * Note: certain anonymous maps, such as MAP_NOSYNC maps,
1166a9643ea8Slogwang * may start out with a NULL object.
1167a9643ea8Slogwang */
1168a9643ea8Slogwang boolean_t
vm_object_sync(vm_object_t object,vm_ooffset_t offset,vm_size_t size,boolean_t syncio,boolean_t invalidate)1169a9643ea8Slogwang vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
1170a9643ea8Slogwang boolean_t syncio, boolean_t invalidate)
1171a9643ea8Slogwang {
1172a9643ea8Slogwang vm_object_t backing_object;
1173a9643ea8Slogwang struct vnode *vp;
1174a9643ea8Slogwang struct mount *mp;
1175a9643ea8Slogwang int error, flags, fsync_after;
1176a9643ea8Slogwang boolean_t res;
1177a9643ea8Slogwang
1178a9643ea8Slogwang if (object == NULL)
1179a9643ea8Slogwang return (TRUE);
1180a9643ea8Slogwang res = TRUE;
1181a9643ea8Slogwang error = 0;
1182a9643ea8Slogwang VM_OBJECT_WLOCK(object);
1183a9643ea8Slogwang while ((backing_object = object->backing_object) != NULL) {
1184a9643ea8Slogwang VM_OBJECT_WLOCK(backing_object);
1185a9643ea8Slogwang offset += object->backing_object_offset;
1186a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
1187a9643ea8Slogwang object = backing_object;
1188a9643ea8Slogwang if (object->size < OFF_TO_IDX(offset + size))
1189a9643ea8Slogwang size = IDX_TO_OFF(object->size) - offset;
1190a9643ea8Slogwang }
1191a9643ea8Slogwang /*
1192a9643ea8Slogwang * Flush pages if writing is allowed, invalidate them
1193a9643ea8Slogwang * if invalidation requested. Pages undergoing I/O
1194a9643ea8Slogwang * will be ignored by vm_object_page_remove().
1195a9643ea8Slogwang *
1196a9643ea8Slogwang * We cannot lock the vnode and then wait for paging
1197a9643ea8Slogwang * to complete without deadlocking against vm_fault.
1198a9643ea8Slogwang * Instead we simply call vm_object_page_remove() and
1199a9643ea8Slogwang * allow it to block internally on a page-by-page
1200a9643ea8Slogwang * basis when it encounters pages undergoing async
1201a9643ea8Slogwang * I/O.
1202a9643ea8Slogwang */
1203a9643ea8Slogwang if (object->type == OBJT_VNODE &&
1204*22ce4affSfengbojiang vm_object_mightbedirty(object) != 0 &&
1205*22ce4affSfengbojiang ((vp = object->handle)->v_vflag & VV_NOSYNC) == 0) {
1206a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
1207a9643ea8Slogwang (void) vn_start_write(vp, &mp, V_WAIT);
1208a9643ea8Slogwang vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1209a9643ea8Slogwang if (syncio && !invalidate && offset == 0 &&
1210*22ce4affSfengbojiang atop(size) == object->size) {
1211a9643ea8Slogwang /*
1212a9643ea8Slogwang * If syncing the whole mapping of the file,
1213a9643ea8Slogwang * it is faster to schedule all the writes in
1214a9643ea8Slogwang * async mode, also allowing the clustering,
1215a9643ea8Slogwang * and then wait for i/o to complete.
1216a9643ea8Slogwang */
1217a9643ea8Slogwang flags = 0;
1218a9643ea8Slogwang fsync_after = TRUE;
1219a9643ea8Slogwang } else {
1220a9643ea8Slogwang flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1221a9643ea8Slogwang flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0;
1222a9643ea8Slogwang fsync_after = FALSE;
1223a9643ea8Slogwang }
1224a9643ea8Slogwang VM_OBJECT_WLOCK(object);
1225a9643ea8Slogwang res = vm_object_page_clean(object, offset, offset + size,
1226a9643ea8Slogwang flags);
1227a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
1228a9643ea8Slogwang if (fsync_after)
1229a9643ea8Slogwang error = VOP_FSYNC(vp, MNT_WAIT, curthread);
1230*22ce4affSfengbojiang VOP_UNLOCK(vp);
1231a9643ea8Slogwang vn_finished_write(mp);
1232a9643ea8Slogwang if (error != 0)
1233a9643ea8Slogwang res = FALSE;
1234a9643ea8Slogwang VM_OBJECT_WLOCK(object);
1235a9643ea8Slogwang }
1236a9643ea8Slogwang if ((object->type == OBJT_VNODE ||
1237a9643ea8Slogwang object->type == OBJT_DEVICE) && invalidate) {
1238a9643ea8Slogwang if (object->type == OBJT_DEVICE)
1239a9643ea8Slogwang /*
1240a9643ea8Slogwang * The option OBJPR_NOTMAPPED must be passed here
1241a9643ea8Slogwang * because vm_object_page_remove() cannot remove
1242a9643ea8Slogwang * unmanaged mappings.
1243a9643ea8Slogwang */
1244a9643ea8Slogwang flags = OBJPR_NOTMAPPED;
1245a9643ea8Slogwang else if (old_msync)
1246a9643ea8Slogwang flags = 0;
1247a9643ea8Slogwang else
1248a9643ea8Slogwang flags = OBJPR_CLEANONLY;
1249a9643ea8Slogwang vm_object_page_remove(object, OFF_TO_IDX(offset),
1250a9643ea8Slogwang OFF_TO_IDX(offset + size + PAGE_MASK), flags);
1251a9643ea8Slogwang }
1252a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
1253a9643ea8Slogwang return (res);
1254a9643ea8Slogwang }
1255a9643ea8Slogwang
1256a9643ea8Slogwang /*
1257*22ce4affSfengbojiang * Determine whether the given advice can be applied to the object. Advice is
1258*22ce4affSfengbojiang * not applied to unmanaged pages since they never belong to page queues, and
1259*22ce4affSfengbojiang * since MADV_FREE is destructive, it can apply only to anonymous pages that
1260*22ce4affSfengbojiang * have been mapped at most once.
1261*22ce4affSfengbojiang */
1262*22ce4affSfengbojiang static bool
vm_object_advice_applies(vm_object_t object,int advice)1263*22ce4affSfengbojiang vm_object_advice_applies(vm_object_t object, int advice)
1264*22ce4affSfengbojiang {
1265*22ce4affSfengbojiang
1266*22ce4affSfengbojiang if ((object->flags & OBJ_UNMANAGED) != 0)
1267*22ce4affSfengbojiang return (false);
1268*22ce4affSfengbojiang if (advice != MADV_FREE)
1269*22ce4affSfengbojiang return (true);
1270*22ce4affSfengbojiang return ((object->flags & (OBJ_ONEMAPPING | OBJ_ANON)) ==
1271*22ce4affSfengbojiang (OBJ_ONEMAPPING | OBJ_ANON));
1272*22ce4affSfengbojiang }
1273*22ce4affSfengbojiang
1274*22ce4affSfengbojiang static void
vm_object_madvise_freespace(vm_object_t object,int advice,vm_pindex_t pindex,vm_size_t size)1275*22ce4affSfengbojiang vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex,
1276*22ce4affSfengbojiang vm_size_t size)
1277*22ce4affSfengbojiang {
1278*22ce4affSfengbojiang
1279*22ce4affSfengbojiang if (advice == MADV_FREE && object->type == OBJT_SWAP)
1280*22ce4affSfengbojiang swap_pager_freespace(object, pindex, size);
1281*22ce4affSfengbojiang }
1282*22ce4affSfengbojiang
1283*22ce4affSfengbojiang /*
1284a9643ea8Slogwang * vm_object_madvise:
1285a9643ea8Slogwang *
1286a9643ea8Slogwang * Implements the madvise function at the object/page level.
1287a9643ea8Slogwang *
1288a9643ea8Slogwang * MADV_WILLNEED (any object)
1289a9643ea8Slogwang *
1290a9643ea8Slogwang * Activate the specified pages if they are resident.
1291a9643ea8Slogwang *
1292a9643ea8Slogwang * MADV_DONTNEED (any object)
1293a9643ea8Slogwang *
1294a9643ea8Slogwang * Deactivate the specified pages if they are resident.
1295a9643ea8Slogwang *
1296a9643ea8Slogwang * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects,
1297a9643ea8Slogwang * OBJ_ONEMAPPING only)
1298a9643ea8Slogwang *
1299a9643ea8Slogwang * Deactivate and clean the specified pages if they are
1300a9643ea8Slogwang * resident. This permits the process to reuse the pages
1301a9643ea8Slogwang * without faulting or the kernel to reclaim the pages
1302a9643ea8Slogwang * without I/O.
1303a9643ea8Slogwang */
1304a9643ea8Slogwang void
vm_object_madvise(vm_object_t object,vm_pindex_t pindex,vm_pindex_t end,int advice)1305a9643ea8Slogwang vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end,
1306*22ce4affSfengbojiang int advice)
1307a9643ea8Slogwang {
1308a9643ea8Slogwang vm_pindex_t tpindex;
1309a9643ea8Slogwang vm_object_t backing_object, tobject;
1310*22ce4affSfengbojiang vm_page_t m, tm;
1311a9643ea8Slogwang
1312a9643ea8Slogwang if (object == NULL)
1313a9643ea8Slogwang return;
1314*22ce4affSfengbojiang
1315a9643ea8Slogwang relookup:
1316*22ce4affSfengbojiang VM_OBJECT_WLOCK(object);
1317*22ce4affSfengbojiang if (!vm_object_advice_applies(object, advice)) {
1318*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(object);
1319*22ce4affSfengbojiang return;
1320*22ce4affSfengbojiang }
1321*22ce4affSfengbojiang for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) {
1322a9643ea8Slogwang tobject = object;
1323*22ce4affSfengbojiang
1324*22ce4affSfengbojiang /*
1325*22ce4affSfengbojiang * If the next page isn't resident in the top-level object, we
1326*22ce4affSfengbojiang * need to search the shadow chain. When applying MADV_FREE, we
1327*22ce4affSfengbojiang * take care to release any swap space used to store
1328*22ce4affSfengbojiang * non-resident pages.
1329*22ce4affSfengbojiang */
1330*22ce4affSfengbojiang if (m == NULL || pindex < m->pindex) {
1331*22ce4affSfengbojiang /*
1332*22ce4affSfengbojiang * Optimize a common case: if the top-level object has
1333*22ce4affSfengbojiang * no backing object, we can skip over the non-resident
1334*22ce4affSfengbojiang * range in constant time.
1335*22ce4affSfengbojiang */
1336*22ce4affSfengbojiang if (object->backing_object == NULL) {
1337*22ce4affSfengbojiang tpindex = (m != NULL && m->pindex < end) ?
1338*22ce4affSfengbojiang m->pindex : end;
1339*22ce4affSfengbojiang vm_object_madvise_freespace(object, advice,
1340*22ce4affSfengbojiang pindex, tpindex - pindex);
1341*22ce4affSfengbojiang if ((pindex = tpindex) == end)
1342*22ce4affSfengbojiang break;
1343*22ce4affSfengbojiang goto next_page;
1344*22ce4affSfengbojiang }
1345*22ce4affSfengbojiang
1346a9643ea8Slogwang tpindex = pindex;
1347*22ce4affSfengbojiang do {
1348*22ce4affSfengbojiang vm_object_madvise_freespace(tobject, advice,
1349*22ce4affSfengbojiang tpindex, 1);
1350a9643ea8Slogwang /*
1351*22ce4affSfengbojiang * Prepare to search the next object in the
1352*22ce4affSfengbojiang * chain.
1353a9643ea8Slogwang */
1354a9643ea8Slogwang backing_object = tobject->backing_object;
1355a9643ea8Slogwang if (backing_object == NULL)
1356*22ce4affSfengbojiang goto next_pindex;
1357a9643ea8Slogwang VM_OBJECT_WLOCK(backing_object);
1358*22ce4affSfengbojiang tpindex +=
1359*22ce4affSfengbojiang OFF_TO_IDX(tobject->backing_object_offset);
1360a9643ea8Slogwang if (tobject != object)
1361a9643ea8Slogwang VM_OBJECT_WUNLOCK(tobject);
1362a9643ea8Slogwang tobject = backing_object;
1363*22ce4affSfengbojiang if (!vm_object_advice_applies(tobject, advice))
1364*22ce4affSfengbojiang goto next_pindex;
1365*22ce4affSfengbojiang } while ((tm = vm_page_lookup(tobject, tpindex)) ==
1366*22ce4affSfengbojiang NULL);
1367*22ce4affSfengbojiang } else {
1368*22ce4affSfengbojiang next_page:
1369*22ce4affSfengbojiang tm = m;
1370*22ce4affSfengbojiang m = TAILQ_NEXT(m, listq);
1371a9643ea8Slogwang }
1372*22ce4affSfengbojiang
1373*22ce4affSfengbojiang /*
1374*22ce4affSfengbojiang * If the page is not in a normal state, skip it. The page
1375*22ce4affSfengbojiang * can not be invalidated while the object lock is held.
1376*22ce4affSfengbojiang */
1377*22ce4affSfengbojiang if (!vm_page_all_valid(tm) || vm_page_wired(tm))
1378*22ce4affSfengbojiang goto next_pindex;
1379*22ce4affSfengbojiang KASSERT((tm->flags & PG_FICTITIOUS) == 0,
1380*22ce4affSfengbojiang ("vm_object_madvise: page %p is fictitious", tm));
1381*22ce4affSfengbojiang KASSERT((tm->oflags & VPO_UNMANAGED) == 0,
1382*22ce4affSfengbojiang ("vm_object_madvise: page %p is not managed", tm));
1383*22ce4affSfengbojiang if (vm_page_tryxbusy(tm) == 0) {
1384*22ce4affSfengbojiang if (object != tobject)
1385*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(object);
1386*22ce4affSfengbojiang if (advice == MADV_WILLNEED) {
1387a9643ea8Slogwang /*
1388a9643ea8Slogwang * Reference the page before unlocking and
1389a9643ea8Slogwang * sleeping so that the page daemon is less
1390a9643ea8Slogwang * likely to reclaim it.
1391a9643ea8Slogwang */
1392*22ce4affSfengbojiang vm_page_aflag_set(tm, PGA_REFERENCED);
1393a9643ea8Slogwang }
1394*22ce4affSfengbojiang vm_page_busy_sleep(tm, "madvpo", false);
1395a9643ea8Slogwang goto relookup;
1396a9643ea8Slogwang }
1397*22ce4affSfengbojiang vm_page_advise(tm, advice);
1398*22ce4affSfengbojiang vm_page_xunbusy(tm);
1399*22ce4affSfengbojiang vm_object_madvise_freespace(tobject, advice, tm->pindex, 1);
1400*22ce4affSfengbojiang next_pindex:
1401a9643ea8Slogwang if (tobject != object)
1402a9643ea8Slogwang VM_OBJECT_WUNLOCK(tobject);
1403a9643ea8Slogwang }
1404a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
1405a9643ea8Slogwang }
1406a9643ea8Slogwang
1407a9643ea8Slogwang /*
1408a9643ea8Slogwang * vm_object_shadow:
1409a9643ea8Slogwang *
1410a9643ea8Slogwang * Create a new object which is backed by the
1411a9643ea8Slogwang * specified existing object range. The source
1412a9643ea8Slogwang * object reference is deallocated.
1413a9643ea8Slogwang *
1414a9643ea8Slogwang * The new object and offset into that object
1415a9643ea8Slogwang * are returned in the source parameters.
1416a9643ea8Slogwang */
1417a9643ea8Slogwang void
vm_object_shadow(vm_object_t * object,vm_ooffset_t * offset,vm_size_t length,struct ucred * cred,bool shared)1418*22ce4affSfengbojiang vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length,
1419*22ce4affSfengbojiang struct ucred *cred, bool shared)
1420a9643ea8Slogwang {
1421a9643ea8Slogwang vm_object_t source;
1422a9643ea8Slogwang vm_object_t result;
1423a9643ea8Slogwang
1424a9643ea8Slogwang source = *object;
1425a9643ea8Slogwang
1426a9643ea8Slogwang /*
1427a9643ea8Slogwang * Don't create the new object if the old object isn't shared.
1428*22ce4affSfengbojiang *
1429*22ce4affSfengbojiang * If we hold the only reference we can guarantee that it won't
1430*22ce4affSfengbojiang * increase while we have the map locked. Otherwise the race is
1431*22ce4affSfengbojiang * harmless and we will end up with an extra shadow object that
1432*22ce4affSfengbojiang * will be collapsed later.
1433a9643ea8Slogwang */
1434*22ce4affSfengbojiang if (source != NULL && source->ref_count == 1 &&
1435*22ce4affSfengbojiang (source->flags & OBJ_ANON) != 0)
1436a9643ea8Slogwang return;
1437a9643ea8Slogwang
1438a9643ea8Slogwang /*
1439a9643ea8Slogwang * Allocate a new object with the given length.
1440a9643ea8Slogwang */
1441*22ce4affSfengbojiang result = vm_object_allocate_anon(atop(length), source, cred, length);
1442a9643ea8Slogwang
1443a9643ea8Slogwang /*
1444a9643ea8Slogwang * Store the offset into the source object, and fix up the offset into
1445a9643ea8Slogwang * the new object.
1446a9643ea8Slogwang */
1447a9643ea8Slogwang result->backing_object_offset = *offset;
1448*22ce4affSfengbojiang
1449*22ce4affSfengbojiang if (shared || source != NULL) {
1450*22ce4affSfengbojiang VM_OBJECT_WLOCK(result);
1451*22ce4affSfengbojiang
1452*22ce4affSfengbojiang /*
1453*22ce4affSfengbojiang * The new object shadows the source object, adding a
1454*22ce4affSfengbojiang * reference to it. Our caller changes his reference
1455*22ce4affSfengbojiang * to point to the new object, removing a reference to
1456*22ce4affSfengbojiang * the source object. Net result: no change of
1457*22ce4affSfengbojiang * reference count, unless the caller needs to add one
1458*22ce4affSfengbojiang * more reference due to forking a shared map entry.
1459*22ce4affSfengbojiang */
1460*22ce4affSfengbojiang if (shared) {
1461*22ce4affSfengbojiang vm_object_reference_locked(result);
1462*22ce4affSfengbojiang vm_object_clear_flag(result, OBJ_ONEMAPPING);
1463a9643ea8Slogwang }
1464a9643ea8Slogwang
1465*22ce4affSfengbojiang /*
1466*22ce4affSfengbojiang * Try to optimize the result object's page color when
1467*22ce4affSfengbojiang * shadowing in order to maintain page coloring
1468*22ce4affSfengbojiang * consistency in the combined shadowed object.
1469*22ce4affSfengbojiang */
1470*22ce4affSfengbojiang if (source != NULL) {
1471*22ce4affSfengbojiang vm_object_backing_insert(result, source);
1472*22ce4affSfengbojiang result->domain = source->domain;
1473*22ce4affSfengbojiang #if VM_NRESERVLEVEL > 0
1474*22ce4affSfengbojiang result->flags |= source->flags & OBJ_COLORED;
1475*22ce4affSfengbojiang result->pg_color = (source->pg_color +
1476*22ce4affSfengbojiang OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER -
1477*22ce4affSfengbojiang 1)) - 1);
1478*22ce4affSfengbojiang #endif
1479*22ce4affSfengbojiang }
1480*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(result);
1481*22ce4affSfengbojiang }
1482a9643ea8Slogwang
1483a9643ea8Slogwang /*
1484a9643ea8Slogwang * Return the new things
1485a9643ea8Slogwang */
1486a9643ea8Slogwang *offset = 0;
1487a9643ea8Slogwang *object = result;
1488a9643ea8Slogwang }
1489a9643ea8Slogwang
1490a9643ea8Slogwang /*
1491a9643ea8Slogwang * vm_object_split:
1492a9643ea8Slogwang *
1493a9643ea8Slogwang * Split the pages in a map entry into a new object. This affords
1494a9643ea8Slogwang * easier removal of unused pages, and keeps object inheritance from
1495a9643ea8Slogwang * being a negative impact on memory usage.
1496a9643ea8Slogwang */
1497a9643ea8Slogwang void
vm_object_split(vm_map_entry_t entry)1498a9643ea8Slogwang vm_object_split(vm_map_entry_t entry)
1499a9643ea8Slogwang {
1500*22ce4affSfengbojiang vm_page_t m, m_busy, m_next;
1501*22ce4affSfengbojiang vm_object_t orig_object, new_object, backing_object;
1502a9643ea8Slogwang vm_pindex_t idx, offidxstart;
1503a9643ea8Slogwang vm_size_t size;
1504a9643ea8Slogwang
1505a9643ea8Slogwang orig_object = entry->object.vm_object;
1506*22ce4affSfengbojiang KASSERT((orig_object->flags & OBJ_ONEMAPPING) != 0,
1507*22ce4affSfengbojiang ("vm_object_split: Splitting object with multiple mappings."));
1508*22ce4affSfengbojiang if ((orig_object->flags & OBJ_ANON) == 0)
1509a9643ea8Slogwang return;
1510a9643ea8Slogwang if (orig_object->ref_count <= 1)
1511a9643ea8Slogwang return;
1512a9643ea8Slogwang VM_OBJECT_WUNLOCK(orig_object);
1513a9643ea8Slogwang
1514a9643ea8Slogwang offidxstart = OFF_TO_IDX(entry->offset);
1515a9643ea8Slogwang size = atop(entry->end - entry->start);
1516a9643ea8Slogwang
1517a9643ea8Slogwang /*
1518a9643ea8Slogwang * If swap_pager_copy() is later called, it will convert new_object
1519a9643ea8Slogwang * into a swap object.
1520a9643ea8Slogwang */
1521*22ce4affSfengbojiang new_object = vm_object_allocate_anon(size, orig_object,
1522*22ce4affSfengbojiang orig_object->cred, ptoa(size));
1523*22ce4affSfengbojiang
1524*22ce4affSfengbojiang /*
1525*22ce4affSfengbojiang * We must wait for the orig_object to complete any in-progress
1526*22ce4affSfengbojiang * collapse so that the swap blocks are stable below. The
1527*22ce4affSfengbojiang * additional reference on backing_object by new object will
1528*22ce4affSfengbojiang * prevent further collapse operations until split completes.
1529*22ce4affSfengbojiang */
1530*22ce4affSfengbojiang VM_OBJECT_WLOCK(orig_object);
1531*22ce4affSfengbojiang vm_object_collapse_wait(orig_object);
1532a9643ea8Slogwang
1533a9643ea8Slogwang /*
1534a9643ea8Slogwang * At this point, the new object is still private, so the order in
1535a9643ea8Slogwang * which the original and new objects are locked does not matter.
1536a9643ea8Slogwang */
1537a9643ea8Slogwang VM_OBJECT_WLOCK(new_object);
1538*22ce4affSfengbojiang new_object->domain = orig_object->domain;
1539*22ce4affSfengbojiang backing_object = orig_object->backing_object;
1540*22ce4affSfengbojiang if (backing_object != NULL) {
1541*22ce4affSfengbojiang vm_object_backing_insert_ref(new_object, backing_object);
1542a9643ea8Slogwang new_object->backing_object_offset =
1543a9643ea8Slogwang orig_object->backing_object_offset + entry->offset;
1544a9643ea8Slogwang }
1545a9643ea8Slogwang if (orig_object->cred != NULL) {
1546a9643ea8Slogwang crhold(orig_object->cred);
1547a9643ea8Slogwang KASSERT(orig_object->charge >= ptoa(size),
1548a9643ea8Slogwang ("orig_object->charge < 0"));
1549a9643ea8Slogwang orig_object->charge -= ptoa(size);
1550a9643ea8Slogwang }
1551*22ce4affSfengbojiang
1552*22ce4affSfengbojiang /*
1553*22ce4affSfengbojiang * Mark the split operation so that swap_pager_getpages() knows
1554*22ce4affSfengbojiang * that the object is in transition.
1555*22ce4affSfengbojiang */
1556*22ce4affSfengbojiang vm_object_set_flag(orig_object, OBJ_SPLIT);
1557*22ce4affSfengbojiang m_busy = NULL;
1558*22ce4affSfengbojiang #ifdef INVARIANTS
1559*22ce4affSfengbojiang idx = 0;
1560*22ce4affSfengbojiang #endif
1561a9643ea8Slogwang retry:
1562a9643ea8Slogwang m = vm_page_find_least(orig_object, offidxstart);
1563*22ce4affSfengbojiang KASSERT(m == NULL || idx <= m->pindex - offidxstart,
1564*22ce4affSfengbojiang ("%s: object %p was repopulated", __func__, orig_object));
1565a9643ea8Slogwang for (; m != NULL && (idx = m->pindex - offidxstart) < size;
1566a9643ea8Slogwang m = m_next) {
1567a9643ea8Slogwang m_next = TAILQ_NEXT(m, listq);
1568a9643ea8Slogwang
1569a9643ea8Slogwang /*
1570a9643ea8Slogwang * We must wait for pending I/O to complete before we can
1571a9643ea8Slogwang * rename the page.
1572a9643ea8Slogwang *
1573a9643ea8Slogwang * We do not have to VM_PROT_NONE the page as mappings should
1574a9643ea8Slogwang * not be changed by this operation.
1575a9643ea8Slogwang */
1576*22ce4affSfengbojiang if (vm_page_tryxbusy(m) == 0) {
1577a9643ea8Slogwang VM_OBJECT_WUNLOCK(new_object);
1578*22ce4affSfengbojiang vm_page_sleep_if_busy(m, "spltwt");
1579*22ce4affSfengbojiang VM_OBJECT_WLOCK(new_object);
1580*22ce4affSfengbojiang goto retry;
1581*22ce4affSfengbojiang }
1582*22ce4affSfengbojiang
1583*22ce4affSfengbojiang /*
1584*22ce4affSfengbojiang * The page was left invalid. Likely placed there by
1585*22ce4affSfengbojiang * an incomplete fault. Just remove and ignore.
1586*22ce4affSfengbojiang */
1587*22ce4affSfengbojiang if (vm_page_none_valid(m)) {
1588*22ce4affSfengbojiang if (vm_page_remove(m))
1589*22ce4affSfengbojiang vm_page_free(m);
1590*22ce4affSfengbojiang continue;
1591*22ce4affSfengbojiang }
1592*22ce4affSfengbojiang
1593*22ce4affSfengbojiang /* vm_page_rename() will dirty the page. */
1594*22ce4affSfengbojiang if (vm_page_rename(m, new_object, idx)) {
1595*22ce4affSfengbojiang vm_page_xunbusy(m);
1596*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(new_object);
1597a9643ea8Slogwang VM_OBJECT_WUNLOCK(orig_object);
1598*22ce4affSfengbojiang vm_radix_wait();
1599a9643ea8Slogwang VM_OBJECT_WLOCK(orig_object);
1600a9643ea8Slogwang VM_OBJECT_WLOCK(new_object);
1601a9643ea8Slogwang goto retry;
1602a9643ea8Slogwang }
1603a9643ea8Slogwang
1604a9643ea8Slogwang #if VM_NRESERVLEVEL > 0
1605a9643ea8Slogwang /*
1606a9643ea8Slogwang * If some of the reservation's allocated pages remain with
1607a9643ea8Slogwang * the original object, then transferring the reservation to
1608a9643ea8Slogwang * the new object is neither particularly beneficial nor
1609a9643ea8Slogwang * particularly harmful as compared to leaving the reservation
1610a9643ea8Slogwang * with the original object. If, however, all of the
1611a9643ea8Slogwang * reservation's allocated pages are transferred to the new
1612a9643ea8Slogwang * object, then transferring the reservation is typically
1613a9643ea8Slogwang * beneficial. Determining which of these two cases applies
1614a9643ea8Slogwang * would be more costly than unconditionally renaming the
1615a9643ea8Slogwang * reservation.
1616a9643ea8Slogwang */
1617a9643ea8Slogwang vm_reserv_rename(m, new_object, orig_object, offidxstart);
1618a9643ea8Slogwang #endif
1619*22ce4affSfengbojiang
1620*22ce4affSfengbojiang /*
1621*22ce4affSfengbojiang * orig_object's type may change while sleeping, so keep track
1622*22ce4affSfengbojiang * of the beginning of the busied range.
1623*22ce4affSfengbojiang */
1624*22ce4affSfengbojiang if (orig_object->type != OBJT_SWAP)
1625*22ce4affSfengbojiang vm_page_xunbusy(m);
1626*22ce4affSfengbojiang else if (m_busy == NULL)
1627*22ce4affSfengbojiang m_busy = m;
1628a9643ea8Slogwang }
1629a9643ea8Slogwang if (orig_object->type == OBJT_SWAP) {
1630a9643ea8Slogwang /*
1631a9643ea8Slogwang * swap_pager_copy() can sleep, in which case the orig_object's
1632a9643ea8Slogwang * and new_object's locks are released and reacquired.
1633a9643ea8Slogwang */
1634a9643ea8Slogwang swap_pager_copy(orig_object, new_object, offidxstart, 0);
1635*22ce4affSfengbojiang if (m_busy != NULL)
1636*22ce4affSfengbojiang TAILQ_FOREACH_FROM(m_busy, &new_object->memq, listq)
1637*22ce4affSfengbojiang vm_page_xunbusy(m_busy);
1638a9643ea8Slogwang }
1639*22ce4affSfengbojiang vm_object_clear_flag(orig_object, OBJ_SPLIT);
1640a9643ea8Slogwang VM_OBJECT_WUNLOCK(orig_object);
1641a9643ea8Slogwang VM_OBJECT_WUNLOCK(new_object);
1642a9643ea8Slogwang entry->object.vm_object = new_object;
1643a9643ea8Slogwang entry->offset = 0LL;
1644a9643ea8Slogwang vm_object_deallocate(orig_object);
1645a9643ea8Slogwang VM_OBJECT_WLOCK(new_object);
1646a9643ea8Slogwang }
1647a9643ea8Slogwang
1648a9643ea8Slogwang static vm_page_t
vm_object_collapse_scan_wait(vm_object_t object,vm_page_t p)1649*22ce4affSfengbojiang vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p)
1650a9643ea8Slogwang {
1651a9643ea8Slogwang vm_object_t backing_object;
1652a9643ea8Slogwang
1653a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
1654a9643ea8Slogwang backing_object = object->backing_object;
1655a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(backing_object);
1656a9643ea8Slogwang
1657a9643ea8Slogwang KASSERT(p == NULL || p->object == object || p->object == backing_object,
1658a9643ea8Slogwang ("invalid ownership %p %p %p", p, object, backing_object));
1659*22ce4affSfengbojiang /* The page is only NULL when rename fails. */
1660*22ce4affSfengbojiang if (p == NULL) {
1661a9643ea8Slogwang VM_OBJECT_WUNLOCK(object);
1662a9643ea8Slogwang VM_OBJECT_WUNLOCK(backing_object);
1663*22ce4affSfengbojiang vm_radix_wait();
1664*22ce4affSfengbojiang } else {
1665*22ce4affSfengbojiang if (p->object == object)
1666*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(backing_object);
1667a9643ea8Slogwang else
1668*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(object);
1669*22ce4affSfengbojiang vm_page_busy_sleep(p, "vmocol", false);
1670*22ce4affSfengbojiang }
1671a9643ea8Slogwang VM_OBJECT_WLOCK(object);
1672a9643ea8Slogwang VM_OBJECT_WLOCK(backing_object);
1673a9643ea8Slogwang return (TAILQ_FIRST(&backing_object->memq));
1674a9643ea8Slogwang }
1675a9643ea8Slogwang
1676a9643ea8Slogwang static bool
vm_object_scan_all_shadowed(vm_object_t object)1677a9643ea8Slogwang vm_object_scan_all_shadowed(vm_object_t object)
1678a9643ea8Slogwang {
1679a9643ea8Slogwang vm_object_t backing_object;
1680a9643ea8Slogwang vm_page_t p, pp;
1681*22ce4affSfengbojiang vm_pindex_t backing_offset_index, new_pindex, pi, ps;
1682a9643ea8Slogwang
1683a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
1684a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
1685a9643ea8Slogwang
1686a9643ea8Slogwang backing_object = object->backing_object;
1687a9643ea8Slogwang
1688*22ce4affSfengbojiang if ((backing_object->flags & OBJ_ANON) == 0)
1689a9643ea8Slogwang return (false);
1690a9643ea8Slogwang
1691*22ce4affSfengbojiang pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1692*22ce4affSfengbojiang p = vm_page_find_least(backing_object, pi);
1693*22ce4affSfengbojiang ps = swap_pager_find_least(backing_object, pi);
1694a9643ea8Slogwang
1695a9643ea8Slogwang /*
1696*22ce4affSfengbojiang * Only check pages inside the parent object's range and
1697*22ce4affSfengbojiang * inside the parent object's mapping of the backing object.
1698a9643ea8Slogwang */
1699*22ce4affSfengbojiang for (;; pi++) {
1700*22ce4affSfengbojiang if (p != NULL && p->pindex < pi)
1701*22ce4affSfengbojiang p = TAILQ_NEXT(p, listq);
1702*22ce4affSfengbojiang if (ps < pi)
1703*22ce4affSfengbojiang ps = swap_pager_find_least(backing_object, pi);
1704*22ce4affSfengbojiang if (p == NULL && ps >= backing_object->size)
1705*22ce4affSfengbojiang break;
1706*22ce4affSfengbojiang else if (p == NULL)
1707*22ce4affSfengbojiang pi = ps;
1708*22ce4affSfengbojiang else
1709*22ce4affSfengbojiang pi = MIN(p->pindex, ps);
1710*22ce4affSfengbojiang
1711*22ce4affSfengbojiang new_pindex = pi - backing_offset_index;
1712*22ce4affSfengbojiang if (new_pindex >= object->size)
1713*22ce4affSfengbojiang break;
1714*22ce4affSfengbojiang
1715*22ce4affSfengbojiang if (p != NULL) {
1716*22ce4affSfengbojiang /*
1717*22ce4affSfengbojiang * If the backing object page is busy a
1718*22ce4affSfengbojiang * grandparent or older page may still be
1719*22ce4affSfengbojiang * undergoing CoW. It is not safe to collapse
1720*22ce4affSfengbojiang * the backing object until it is quiesced.
1721*22ce4affSfengbojiang */
1722*22ce4affSfengbojiang if (vm_page_tryxbusy(p) == 0)
1723*22ce4affSfengbojiang return (false);
1724*22ce4affSfengbojiang
1725*22ce4affSfengbojiang /*
1726*22ce4affSfengbojiang * We raced with the fault handler that left
1727*22ce4affSfengbojiang * newly allocated invalid page on the object
1728*22ce4affSfengbojiang * queue and retried.
1729*22ce4affSfengbojiang */
1730*22ce4affSfengbojiang if (!vm_page_all_valid(p))
1731*22ce4affSfengbojiang goto unbusy_ret;
1732*22ce4affSfengbojiang }
1733a9643ea8Slogwang
1734a9643ea8Slogwang /*
1735a9643ea8Slogwang * See if the parent has the page or if the parent's object
1736a9643ea8Slogwang * pager has the page. If the parent has the page but the page
1737a9643ea8Slogwang * is not valid, the parent's object pager must have the page.
1738a9643ea8Slogwang *
1739a9643ea8Slogwang * If this fails, the parent does not completely shadow the
1740a9643ea8Slogwang * object and we might as well give up now.
1741a9643ea8Slogwang */
1742a9643ea8Slogwang pp = vm_page_lookup(object, new_pindex);
1743*22ce4affSfengbojiang
1744*22ce4affSfengbojiang /*
1745*22ce4affSfengbojiang * The valid check here is stable due to object lock
1746*22ce4affSfengbojiang * being required to clear valid and initiate paging.
1747*22ce4affSfengbojiang * Busy of p disallows fault handler to validate pp.
1748*22ce4affSfengbojiang */
1749*22ce4affSfengbojiang if ((pp == NULL || vm_page_none_valid(pp)) &&
1750a9643ea8Slogwang !vm_pager_has_page(object, new_pindex, NULL, NULL))
1751*22ce4affSfengbojiang goto unbusy_ret;
1752*22ce4affSfengbojiang if (p != NULL)
1753*22ce4affSfengbojiang vm_page_xunbusy(p);
1754a9643ea8Slogwang }
1755a9643ea8Slogwang return (true);
1756*22ce4affSfengbojiang
1757*22ce4affSfengbojiang unbusy_ret:
1758*22ce4affSfengbojiang if (p != NULL)
1759*22ce4affSfengbojiang vm_page_xunbusy(p);
1760*22ce4affSfengbojiang return (false);
1761a9643ea8Slogwang }
1762a9643ea8Slogwang
1763*22ce4affSfengbojiang static void
vm_object_collapse_scan(vm_object_t object)1764*22ce4affSfengbojiang vm_object_collapse_scan(vm_object_t object)
1765a9643ea8Slogwang {
1766a9643ea8Slogwang vm_object_t backing_object;
1767a9643ea8Slogwang vm_page_t next, p, pp;
1768a9643ea8Slogwang vm_pindex_t backing_offset_index, new_pindex;
1769a9643ea8Slogwang
1770a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
1771a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
1772a9643ea8Slogwang
1773a9643ea8Slogwang backing_object = object->backing_object;
1774a9643ea8Slogwang backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1775a9643ea8Slogwang
1776a9643ea8Slogwang /*
1777a9643ea8Slogwang * Our scan
1778a9643ea8Slogwang */
1779a9643ea8Slogwang for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) {
1780a9643ea8Slogwang next = TAILQ_NEXT(p, listq);
1781a9643ea8Slogwang new_pindex = p->pindex - backing_offset_index;
1782a9643ea8Slogwang
1783a9643ea8Slogwang /*
1784a9643ea8Slogwang * Check for busy page
1785a9643ea8Slogwang */
1786*22ce4affSfengbojiang if (vm_page_tryxbusy(p) == 0) {
1787*22ce4affSfengbojiang next = vm_object_collapse_scan_wait(object, p);
1788a9643ea8Slogwang continue;
1789a9643ea8Slogwang }
1790a9643ea8Slogwang
1791*22ce4affSfengbojiang KASSERT(object->backing_object == backing_object,
1792*22ce4affSfengbojiang ("vm_object_collapse_scan: backing object mismatch %p != %p",
1793*22ce4affSfengbojiang object->backing_object, backing_object));
1794a9643ea8Slogwang KASSERT(p->object == backing_object,
1795*22ce4affSfengbojiang ("vm_object_collapse_scan: object mismatch %p != %p",
1796*22ce4affSfengbojiang p->object, backing_object));
1797a9643ea8Slogwang
1798a9643ea8Slogwang if (p->pindex < backing_offset_index ||
1799a9643ea8Slogwang new_pindex >= object->size) {
1800a9643ea8Slogwang if (backing_object->type == OBJT_SWAP)
1801a9643ea8Slogwang swap_pager_freespace(backing_object, p->pindex,
1802a9643ea8Slogwang 1);
1803a9643ea8Slogwang
1804a9643ea8Slogwang KASSERT(!pmap_page_is_mapped(p),
1805a9643ea8Slogwang ("freeing mapped page %p", p));
1806*22ce4affSfengbojiang if (vm_page_remove(p))
1807a9643ea8Slogwang vm_page_free(p);
1808*22ce4affSfengbojiang continue;
1809*22ce4affSfengbojiang }
1810*22ce4affSfengbojiang
1811*22ce4affSfengbojiang if (!vm_page_all_valid(p)) {
1812*22ce4affSfengbojiang KASSERT(!pmap_page_is_mapped(p),
1813*22ce4affSfengbojiang ("freeing mapped page %p", p));
1814*22ce4affSfengbojiang if (vm_page_remove(p))
1815*22ce4affSfengbojiang vm_page_free(p);
1816a9643ea8Slogwang continue;
1817a9643ea8Slogwang }
1818a9643ea8Slogwang
1819a9643ea8Slogwang pp = vm_page_lookup(object, new_pindex);
1820*22ce4affSfengbojiang if (pp != NULL && vm_page_tryxbusy(pp) == 0) {
1821*22ce4affSfengbojiang vm_page_xunbusy(p);
1822a9643ea8Slogwang /*
1823a9643ea8Slogwang * The page in the parent is busy and possibly not
1824a9643ea8Slogwang * (yet) valid. Until its state is finalized by the
1825a9643ea8Slogwang * busy bit owner, we can't tell whether it shadows the
1826*22ce4affSfengbojiang * original page.
1827a9643ea8Slogwang */
1828*22ce4affSfengbojiang next = vm_object_collapse_scan_wait(object, pp);
1829a9643ea8Slogwang continue;
1830a9643ea8Slogwang }
1831a9643ea8Slogwang
1832*22ce4affSfengbojiang if (pp != NULL && vm_page_none_valid(pp)) {
1833*22ce4affSfengbojiang /*
1834*22ce4affSfengbojiang * The page was invalid in the parent. Likely placed
1835*22ce4affSfengbojiang * there by an incomplete fault. Just remove and
1836*22ce4affSfengbojiang * ignore. p can replace it.
1837*22ce4affSfengbojiang */
1838*22ce4affSfengbojiang if (vm_page_remove(pp))
1839*22ce4affSfengbojiang vm_page_free(pp);
1840*22ce4affSfengbojiang pp = NULL;
1841*22ce4affSfengbojiang }
1842a9643ea8Slogwang
1843a9643ea8Slogwang if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL,
1844a9643ea8Slogwang NULL)) {
1845a9643ea8Slogwang /*
1846a9643ea8Slogwang * The page already exists in the parent OR swap exists
1847a9643ea8Slogwang * for this location in the parent. Leave the parent's
1848a9643ea8Slogwang * page alone. Destroy the original page from the
1849a9643ea8Slogwang * backing object.
1850a9643ea8Slogwang */
1851a9643ea8Slogwang if (backing_object->type == OBJT_SWAP)
1852a9643ea8Slogwang swap_pager_freespace(backing_object, p->pindex,
1853a9643ea8Slogwang 1);
1854a9643ea8Slogwang KASSERT(!pmap_page_is_mapped(p),
1855a9643ea8Slogwang ("freeing mapped page %p", p));
1856*22ce4affSfengbojiang if (vm_page_remove(p))
1857a9643ea8Slogwang vm_page_free(p);
1858*22ce4affSfengbojiang if (pp != NULL)
1859*22ce4affSfengbojiang vm_page_xunbusy(pp);
1860a9643ea8Slogwang continue;
1861a9643ea8Slogwang }
1862a9643ea8Slogwang
1863a9643ea8Slogwang /*
1864a9643ea8Slogwang * Page does not exist in parent, rename the page from the
1865a9643ea8Slogwang * backing object to the main object.
1866a9643ea8Slogwang *
1867a9643ea8Slogwang * If the page was mapped to a process, it can remain mapped
1868*22ce4affSfengbojiang * through the rename. vm_page_rename() will dirty the page.
1869a9643ea8Slogwang */
1870a9643ea8Slogwang if (vm_page_rename(p, object, new_pindex)) {
1871*22ce4affSfengbojiang vm_page_xunbusy(p);
1872*22ce4affSfengbojiang next = vm_object_collapse_scan_wait(object, NULL);
1873a9643ea8Slogwang continue;
1874a9643ea8Slogwang }
1875a9643ea8Slogwang
1876a9643ea8Slogwang /* Use the old pindex to free the right page. */
1877a9643ea8Slogwang if (backing_object->type == OBJT_SWAP)
1878a9643ea8Slogwang swap_pager_freespace(backing_object,
1879a9643ea8Slogwang new_pindex + backing_offset_index, 1);
1880a9643ea8Slogwang
1881a9643ea8Slogwang #if VM_NRESERVLEVEL > 0
1882a9643ea8Slogwang /*
1883a9643ea8Slogwang * Rename the reservation.
1884a9643ea8Slogwang */
1885a9643ea8Slogwang vm_reserv_rename(p, object, backing_object,
1886a9643ea8Slogwang backing_offset_index);
1887a9643ea8Slogwang #endif
1888*22ce4affSfengbojiang vm_page_xunbusy(p);
1889a9643ea8Slogwang }
1890a9643ea8Slogwang return;
1891a9643ea8Slogwang }
1892a9643ea8Slogwang
1893a9643ea8Slogwang /*
1894a9643ea8Slogwang * vm_object_collapse:
1895a9643ea8Slogwang *
1896a9643ea8Slogwang * Collapse an object with the object backing it.
1897a9643ea8Slogwang * Pages in the backing object are moved into the
1898a9643ea8Slogwang * parent, and the backing object is deallocated.
1899a9643ea8Slogwang */
1900a9643ea8Slogwang void
vm_object_collapse(vm_object_t object)1901a9643ea8Slogwang vm_object_collapse(vm_object_t object)
1902a9643ea8Slogwang {
1903a9643ea8Slogwang vm_object_t backing_object, new_backing_object;
1904a9643ea8Slogwang
1905a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
1906a9643ea8Slogwang
1907a9643ea8Slogwang while (TRUE) {
1908*22ce4affSfengbojiang KASSERT((object->flags & (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON,
1909*22ce4affSfengbojiang ("collapsing invalid object"));
1910*22ce4affSfengbojiang
1911a9643ea8Slogwang /*
1912*22ce4affSfengbojiang * Wait for the backing_object to finish any pending
1913*22ce4affSfengbojiang * collapse so that the caller sees the shortest possible
1914*22ce4affSfengbojiang * shadow chain.
1915a9643ea8Slogwang */
1916*22ce4affSfengbojiang backing_object = vm_object_backing_collapse_wait(object);
1917*22ce4affSfengbojiang if (backing_object == NULL)
1918*22ce4affSfengbojiang return;
1919*22ce4affSfengbojiang
1920*22ce4affSfengbojiang KASSERT(object->ref_count > 0 &&
1921*22ce4affSfengbojiang object->ref_count > object->shadow_count,
1922*22ce4affSfengbojiang ("collapse with invalid ref %d or shadow %d count.",
1923*22ce4affSfengbojiang object->ref_count, object->shadow_count));
1924*22ce4affSfengbojiang KASSERT((backing_object->flags &
1925*22ce4affSfengbojiang (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
1926*22ce4affSfengbojiang ("vm_object_collapse: Backing object already collapsing."));
1927*22ce4affSfengbojiang KASSERT((object->flags & (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
1928*22ce4affSfengbojiang ("vm_object_collapse: object is already collapsing."));
1929a9643ea8Slogwang
1930a9643ea8Slogwang /*
1931*22ce4affSfengbojiang * We know that we can either collapse the backing object if
1932*22ce4affSfengbojiang * the parent is the only reference to it, or (perhaps) have
1933a9643ea8Slogwang * the parent bypass the object if the parent happens to shadow
1934a9643ea8Slogwang * all the resident pages in the entire backing object.
1935a9643ea8Slogwang */
1936a9643ea8Slogwang if (backing_object->ref_count == 1) {
1937*22ce4affSfengbojiang KASSERT(backing_object->shadow_count == 1,
1938*22ce4affSfengbojiang ("vm_object_collapse: shadow_count: %d",
1939*22ce4affSfengbojiang backing_object->shadow_count));
1940a9643ea8Slogwang vm_object_pip_add(object, 1);
1941*22ce4affSfengbojiang vm_object_set_flag(object, OBJ_COLLAPSING);
1942a9643ea8Slogwang vm_object_pip_add(backing_object, 1);
1943*22ce4affSfengbojiang vm_object_set_flag(backing_object, OBJ_DEAD);
1944a9643ea8Slogwang
1945a9643ea8Slogwang /*
1946a9643ea8Slogwang * If there is exactly one reference to the backing
1947a9643ea8Slogwang * object, we can collapse it into the parent.
1948a9643ea8Slogwang */
1949*22ce4affSfengbojiang vm_object_collapse_scan(object);
1950a9643ea8Slogwang
1951a9643ea8Slogwang #if VM_NRESERVLEVEL > 0
1952a9643ea8Slogwang /*
1953a9643ea8Slogwang * Break any reservations from backing_object.
1954a9643ea8Slogwang */
1955a9643ea8Slogwang if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
1956a9643ea8Slogwang vm_reserv_break_all(backing_object);
1957a9643ea8Slogwang #endif
1958a9643ea8Slogwang
1959a9643ea8Slogwang /*
1960a9643ea8Slogwang * Move the pager from backing_object to object.
1961a9643ea8Slogwang */
1962a9643ea8Slogwang if (backing_object->type == OBJT_SWAP) {
1963a9643ea8Slogwang /*
1964a9643ea8Slogwang * swap_pager_copy() can sleep, in which case
1965a9643ea8Slogwang * the backing_object's and object's locks are
1966a9643ea8Slogwang * released and reacquired.
1967a9643ea8Slogwang * Since swap_pager_copy() is being asked to
1968*22ce4affSfengbojiang * destroy backing_object, it will change the
1969*22ce4affSfengbojiang * type to OBJT_DEFAULT.
1970a9643ea8Slogwang */
1971a9643ea8Slogwang swap_pager_copy(
1972a9643ea8Slogwang backing_object,
1973a9643ea8Slogwang object,
1974a9643ea8Slogwang OFF_TO_IDX(object->backing_object_offset), TRUE);
1975*22ce4affSfengbojiang }
1976a9643ea8Slogwang
1977a9643ea8Slogwang /*
1978a9643ea8Slogwang * Object now shadows whatever backing_object did.
1979a9643ea8Slogwang */
1980*22ce4affSfengbojiang vm_object_clear_flag(object, OBJ_COLLAPSING);
1981*22ce4affSfengbojiang vm_object_backing_transfer(object, backing_object);
1982a9643ea8Slogwang object->backing_object_offset +=
1983a9643ea8Slogwang backing_object->backing_object_offset;
1984*22ce4affSfengbojiang VM_OBJECT_WUNLOCK(object);
1985*22ce4affSfengbojiang vm_object_pip_wakeup(object);
1986a9643ea8Slogwang
1987a9643ea8Slogwang /*
1988a9643ea8Slogwang * Discard backing_object.
1989a9643ea8Slogwang *
1990a9643ea8Slogwang * Since the backing object has no pages, no pager left,
1991a9643ea8Slogwang * and no object references within it, all that is
1992a9643ea8Slogwang * necessary is to dispose of it.
1993a9643ea8Slogwang */
1994a9643ea8Slogwang KASSERT(backing_object->ref_count == 1, (
1995a9643ea8Slogwang "backing_object %p was somehow re-referenced during collapse!",
1996a9643ea8Slogwang backing_object));
1997a9643ea8Slogwang vm_object_pip_wakeup(backing_object);
1998*22ce4affSfengbojiang (void)refcount_release(&backing_object->ref_count);
1999*22ce4affSfengbojiang vm_object_terminate(backing_object);
2000*22ce4affSfengbojiang counter_u64_add(object_collapses, 1);
2001*22ce4affSfengbojiang VM_OBJECT_WLOCK(object);
2002a9643ea8Slogwang } else {
2003a9643ea8Slogwang /*
2004a9643ea8Slogwang * If we do not entirely shadow the backing object,
2005a9643ea8Slogwang * there is nothing we can do so we give up.
2006*22ce4affSfengbojiang *
2007*22ce4affSfengbojiang * The object lock and backing_object lock must not
2008*22ce4affSfengbojiang * be dropped during this sequence.
2009a9643ea8Slogwang */
2010*22ce4affSfengbojiang if (!vm_object_scan_all_shadowed(object)) {
2011a9643ea8Slogwang VM_OBJECT_WUNLOCK(backing_object);
2012a9643ea8Slogwang break;
2013a9643ea8Slogwang }
2014a9643ea8Slogwang
2015a9643ea8Slogwang /*
2016a9643ea8Slogwang * Make the parent shadow the next object in the
2017a9643ea8Slogwang * chain. Deallocating backing_object will not remove
2018a9643ea8Slogwang * it, since its reference count is at least 2.
2019a9643ea8Slogwang */
2020*22ce4affSfengbojiang vm_object_backing_remove_locked(object);
2021a9643ea8Slogwang new_backing_object = backing_object->backing_object;
2022*22ce4affSfengbojiang if (new_backing_object != NULL) {
2023*22ce4affSfengbojiang vm_object_backing_insert_ref(object,
2024*22ce4affSfengbojiang new_backing_object);
2025a9643ea8Slogwang object->backing_object_offset +=
2026a9643ea8Slogwang backing_object->backing_object_offset;
2027a9643ea8Slogwang }
2028a9643ea8Slogwang
2029a9643ea8Slogwang /*
2030a9643ea8Slogwang * Drop the reference count on backing_object. Since
2031a9643ea8Slogwang * its ref_count was at least 2, it will not vanish.
2032a9643ea8Slogwang */
2033*22ce4affSfengbojiang (void)refcount_release(&backing_object->ref_count);
2034*22ce4affSfengbojiang KASSERT(backing_object->ref_count >= 1, (
2035*22ce4affSfengbojiang "backing_object %p was somehow dereferenced during collapse!",
2036*22ce4affSfengbojiang backing_object));
2037a9643ea8Slogwang VM_OBJECT_WUNLOCK(backing_object);
2038*22ce4affSfengbojiang counter_u64_add(object_bypasses, 1);
2039a9643ea8Slogwang }
2040a9643ea8Slogwang
2041a9643ea8Slogwang /*
2042a9643ea8Slogwang * Try again with this object's new backing object.
2043a9643ea8Slogwang */
2044a9643ea8Slogwang }
2045a9643ea8Slogwang }
2046a9643ea8Slogwang
2047a9643ea8Slogwang /*
2048a9643ea8Slogwang * vm_object_page_remove:
2049a9643ea8Slogwang *
2050a9643ea8Slogwang * For the given object, either frees or invalidates each of the
2051a9643ea8Slogwang * specified pages. In general, a page is freed. However, if a page is
2052a9643ea8Slogwang * wired for any reason other than the existence of a managed, wired
2053a9643ea8Slogwang * mapping, then it may be invalidated but not removed from the object.
2054a9643ea8Slogwang * Pages are specified by the given range ["start", "end") and the option
2055a9643ea8Slogwang * OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range
2056a9643ea8Slogwang * extends from "start" to the end of the object. If the option
2057a9643ea8Slogwang * OBJPR_CLEANONLY is specified, then only the non-dirty pages within the
2058a9643ea8Slogwang * specified range are affected. If the option OBJPR_NOTMAPPED is
2059a9643ea8Slogwang * specified, then the pages within the specified range must have no
2060a9643ea8Slogwang * mappings. Otherwise, if this option is not specified, any mappings to
2061a9643ea8Slogwang * the specified pages are removed before the pages are freed or
2062a9643ea8Slogwang * invalidated.
2063a9643ea8Slogwang *
2064a9643ea8Slogwang * In general, this operation should only be performed on objects that
2065a9643ea8Slogwang * contain managed pages. There are, however, two exceptions. First, it
2066a9643ea8Slogwang * is performed on the kernel and kmem objects by vm_map_entry_delete().
2067a9643ea8Slogwang * Second, it is used by msync(..., MS_INVALIDATE) to invalidate device-
2068a9643ea8Slogwang * backed pages. In both of these cases, the option OBJPR_CLEANONLY must
2069a9643ea8Slogwang * not be specified and the option OBJPR_NOTMAPPED must be specified.
2070a9643ea8Slogwang *
2071a9643ea8Slogwang * The object must be locked.
2072a9643ea8Slogwang */
2073a9643ea8Slogwang void
vm_object_page_remove(vm_object_t object,vm_pindex_t start,vm_pindex_t end,int options)2074a9643ea8Slogwang vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
2075a9643ea8Slogwang int options)
2076a9643ea8Slogwang {
2077a9643ea8Slogwang vm_page_t p, next;
2078a9643ea8Slogwang
2079a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
2080a9643ea8Slogwang KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
2081a9643ea8Slogwang (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED,
2082a9643ea8Slogwang ("vm_object_page_remove: illegal options for object %p", object));
2083a9643ea8Slogwang if (object->resident_page_count == 0)
2084*22ce4affSfengbojiang return;
2085a9643ea8Slogwang vm_object_pip_add(object, 1);
2086a9643ea8Slogwang again:
2087a9643ea8Slogwang p = vm_page_find_least(object, start);
2088a9643ea8Slogwang
2089a9643ea8Slogwang /*
2090a9643ea8Slogwang * Here, the variable "p" is either (1) the page with the least pindex
2091a9643ea8Slogwang * greater than or equal to the parameter "start" or (2) NULL.
2092a9643ea8Slogwang */
2093a9643ea8Slogwang for (; p != NULL && (p->pindex < end || end == 0); p = next) {
2094a9643ea8Slogwang next = TAILQ_NEXT(p, listq);
2095a9643ea8Slogwang
2096a9643ea8Slogwang /*
2097a9643ea8Slogwang * If the page is wired for any reason besides the existence
2098a9643ea8Slogwang * of managed, wired mappings, then it cannot be freed. For
2099a9643ea8Slogwang * example, fictitious pages, which represent device memory,
2100a9643ea8Slogwang * are inherently wired and cannot be freed. They can,
2101a9643ea8Slogwang * however, be invalidated if the option OBJPR_CLEANONLY is
2102a9643ea8Slogwang * not specified.
2103a9643ea8Slogwang */
2104*22ce4affSfengbojiang if (vm_page_tryxbusy(p) == 0) {
2105*22ce4affSfengbojiang vm_page_sleep_if_busy(p, "vmopar");
2106a9643ea8Slogwang goto again;
2107a9643ea8Slogwang }
2108*22ce4affSfengbojiang if (vm_page_wired(p)) {
2109*22ce4affSfengbojiang wired:
2110*22ce4affSfengbojiang if ((options & OBJPR_NOTMAPPED) == 0 &&
2111*22ce4affSfengbojiang object->ref_count != 0)
2112a9643ea8Slogwang pmap_remove_all(p);
2113a9643ea8Slogwang if ((options & OBJPR_CLEANONLY) == 0) {
2114*22ce4affSfengbojiang vm_page_invalid(p);
2115a9643ea8Slogwang vm_page_undirty(p);
2116a9643ea8Slogwang }
2117*22ce4affSfengbojiang vm_page_xunbusy(p);
2118*22ce4affSfengbojiang continue;
2119a9643ea8Slogwang }
2120a9643ea8Slogwang KASSERT((p->flags & PG_FICTITIOUS) == 0,
2121a9643ea8Slogwang ("vm_object_page_remove: page %p is fictitious", p));
2122*22ce4affSfengbojiang if ((options & OBJPR_CLEANONLY) != 0 &&
2123*22ce4affSfengbojiang !vm_page_none_valid(p)) {
2124*22ce4affSfengbojiang if ((options & OBJPR_NOTMAPPED) == 0 &&
2125*22ce4affSfengbojiang object->ref_count != 0 &&
2126*22ce4affSfengbojiang !vm_page_try_remove_write(p))
2127*22ce4affSfengbojiang goto wired;
2128*22ce4affSfengbojiang if (p->dirty != 0) {
2129*22ce4affSfengbojiang vm_page_xunbusy(p);
2130*22ce4affSfengbojiang continue;
2131a9643ea8Slogwang }
2132*22ce4affSfengbojiang }
2133*22ce4affSfengbojiang if ((options & OBJPR_NOTMAPPED) == 0 &&
2134*22ce4affSfengbojiang object->ref_count != 0 && !vm_page_try_remove_all(p))
2135*22ce4affSfengbojiang goto wired;
2136a9643ea8Slogwang vm_page_free(p);
2137a9643ea8Slogwang }
2138a9643ea8Slogwang vm_object_pip_wakeup(object);
2139*22ce4affSfengbojiang
2140*22ce4affSfengbojiang if (object->type == OBJT_SWAP) {
2141*22ce4affSfengbojiang if (end == 0)
2142*22ce4affSfengbojiang end = object->size;
2143*22ce4affSfengbojiang swap_pager_freespace(object, start, end - start);
2144*22ce4affSfengbojiang }
2145a9643ea8Slogwang }
2146a9643ea8Slogwang
2147a9643ea8Slogwang /*
2148a9643ea8Slogwang * vm_object_page_noreuse:
2149a9643ea8Slogwang *
2150a9643ea8Slogwang * For the given object, attempt to move the specified pages to
2151a9643ea8Slogwang * the head of the inactive queue. This bypasses regular LRU
2152a9643ea8Slogwang * operation and allows the pages to be reused quickly under memory
2153a9643ea8Slogwang * pressure. If a page is wired for any reason, then it will not
2154a9643ea8Slogwang * be queued. Pages are specified by the range ["start", "end").
2155a9643ea8Slogwang * As a special case, if "end" is zero, then the range extends from
2156a9643ea8Slogwang * "start" to the end of the object.
2157a9643ea8Slogwang *
2158a9643ea8Slogwang * This operation should only be performed on objects that
2159a9643ea8Slogwang * contain non-fictitious, managed pages.
2160a9643ea8Slogwang *
2161a9643ea8Slogwang * The object must be locked.
2162a9643ea8Slogwang */
2163a9643ea8Slogwang void
vm_object_page_noreuse(vm_object_t object,vm_pindex_t start,vm_pindex_t end)2164a9643ea8Slogwang vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
2165a9643ea8Slogwang {
2166a9643ea8Slogwang vm_page_t p, next;
2167a9643ea8Slogwang
2168*22ce4affSfengbojiang VM_OBJECT_ASSERT_LOCKED(object);
2169a9643ea8Slogwang KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0,
2170a9643ea8Slogwang ("vm_object_page_noreuse: illegal object %p", object));
2171a9643ea8Slogwang if (object->resident_page_count == 0)
2172a9643ea8Slogwang return;
2173a9643ea8Slogwang p = vm_page_find_least(object, start);
2174a9643ea8Slogwang
2175a9643ea8Slogwang /*
2176a9643ea8Slogwang * Here, the variable "p" is either (1) the page with the least pindex
2177a9643ea8Slogwang * greater than or equal to the parameter "start" or (2) NULL.
2178a9643ea8Slogwang */
2179a9643ea8Slogwang for (; p != NULL && (p->pindex < end || end == 0); p = next) {
2180a9643ea8Slogwang next = TAILQ_NEXT(p, listq);
2181a9643ea8Slogwang vm_page_deactivate_noreuse(p);
2182a9643ea8Slogwang }
2183a9643ea8Slogwang }
2184a9643ea8Slogwang
2185a9643ea8Slogwang /*
2186a9643ea8Slogwang * Populate the specified range of the object with valid pages. Returns
2187a9643ea8Slogwang * TRUE if the range is successfully populated and FALSE otherwise.
2188a9643ea8Slogwang *
2189a9643ea8Slogwang * Note: This function should be optimized to pass a larger array of
2190a9643ea8Slogwang * pages to vm_pager_get_pages() before it is applied to a non-
2191a9643ea8Slogwang * OBJT_DEVICE object.
2192a9643ea8Slogwang *
2193a9643ea8Slogwang * The object must be locked.
2194a9643ea8Slogwang */
2195a9643ea8Slogwang boolean_t
vm_object_populate(vm_object_t object,vm_pindex_t start,vm_pindex_t end)2196a9643ea8Slogwang vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
2197a9643ea8Slogwang {
2198a9643ea8Slogwang vm_page_t m;
2199a9643ea8Slogwang vm_pindex_t pindex;
2200a9643ea8Slogwang int rv;
2201a9643ea8Slogwang
2202a9643ea8Slogwang VM_OBJECT_ASSERT_WLOCKED(object);
2203a9643ea8Slogwang for (pindex = start; pindex < end; pindex++) {
2204*22ce4affSfengbojiang rv = vm_page_grab_valid(&m, object, pindex, VM_ALLOC_NORMAL);
2205*22ce4affSfengbojiang if (rv != VM_PAGER_OK)
2206a9643ea8Slogwang break;
2207*22ce4affSfengbojiang
2208a9643ea8Slogwang /*
2209a9643ea8Slogwang * Keep "m" busy because a subsequent iteration may unlock
2210a9643ea8Slogwang * the object.
2211a9643ea8Slogwang */
2212a9643ea8Slogwang }
2213a9643ea8Slogwang if (pindex > start) {
2214a9643ea8Slogwang m = vm_page_lookup(object, start);
2215a9643ea8Slogwang while (m != NULL && m->pindex < pindex) {
2216a9643ea8Slogwang vm_page_xunbusy(m);
2217a9643ea8Slogwang m = TAILQ_NEXT(m, listq);
2218a9643ea8Slogwang }
2219a9643ea8Slogwang }
2220a9643ea8Slogwang return (pindex == end);
2221a9643ea8Slogwang }
2222a9643ea8Slogwang
2223a9643ea8Slogwang /*
2224a9643ea8Slogwang * Routine: vm_object_coalesce
2225a9643ea8Slogwang * Function: Coalesces two objects backing up adjoining
2226a9643ea8Slogwang * regions of memory into a single object.
2227a9643ea8Slogwang *
2228a9643ea8Slogwang * returns TRUE if objects were combined.
2229a9643ea8Slogwang *
2230a9643ea8Slogwang * NOTE: Only works at the moment if the second object is NULL -
2231a9643ea8Slogwang * if it's not, which object do we lock first?
2232a9643ea8Slogwang *
2233a9643ea8Slogwang * Parameters:
2234a9643ea8Slogwang * prev_object First object to coalesce
2235a9643ea8Slogwang * prev_offset Offset into prev_object
2236a9643ea8Slogwang * prev_size Size of reference to prev_object
2237a9643ea8Slogwang * next_size Size of reference to the second object
2238a9643ea8Slogwang * reserved Indicator that extension region has
2239a9643ea8Slogwang * swap accounted for
2240a9643ea8Slogwang *
2241a9643ea8Slogwang * Conditions:
2242a9643ea8Slogwang * The object must *not* be locked.
2243a9643ea8Slogwang */
2244a9643ea8Slogwang boolean_t
vm_object_coalesce(vm_object_t prev_object,vm_ooffset_t prev_offset,vm_size_t prev_size,vm_size_t next_size,boolean_t reserved)2245a9643ea8Slogwang vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
2246a9643ea8Slogwang vm_size_t prev_size, vm_size_t next_size, boolean_t reserved)
2247a9643ea8Slogwang {
2248a9643ea8Slogwang vm_pindex_t next_pindex;
2249a9643ea8Slogwang
2250a9643ea8Slogwang if (prev_object == NULL)
2251a9643ea8Slogwang return (TRUE);
2252*22ce4affSfengbojiang if ((prev_object->flags & OBJ_ANON) == 0)
2253a9643ea8Slogwang return (FALSE);
2254a9643ea8Slogwang
2255*22ce4affSfengbojiang VM_OBJECT_WLOCK(prev_object);
2256a9643ea8Slogwang /*
2257*22ce4affSfengbojiang * Try to collapse the object first.
2258a9643ea8Slogwang */
2259a9643ea8Slogwang vm_object_collapse(prev_object);
2260a9643ea8Slogwang
2261a9643ea8Slogwang /*
2262a9643ea8Slogwang * Can't coalesce if: . more than one reference . paged out . shadows
2263a9643ea8Slogwang * another object . has a copy elsewhere (any of which mean that the
2264a9643ea8Slogwang * pages not mapped to prev_entry may be in use anyway)
2265a9643ea8Slogwang */
2266a9643ea8Slogwang if (prev_object->backing_object != NULL) {
2267a9643ea8Slogwang VM_OBJECT_WUNLOCK(prev_object);
2268a9643ea8Slogwang return (FALSE);
2269a9643ea8Slogwang }
2270a9643ea8Slogwang
2271a9643ea8Slogwang prev_size >>= PAGE_SHIFT;
2272a9643ea8Slogwang next_size >>= PAGE_SHIFT;
2273a9643ea8Slogwang next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
2274a9643ea8Slogwang
2275*22ce4affSfengbojiang if (prev_object->ref_count > 1 &&
2276*22ce4affSfengbojiang prev_object->size != next_pindex &&
2277*22ce4affSfengbojiang (prev_object->flags & OBJ_ONEMAPPING) == 0) {
2278a9643ea8Slogwang VM_OBJECT_WUNLOCK(prev_object);
2279a9643ea8Slogwang return (FALSE);
2280a9643ea8Slogwang }
2281a9643ea8Slogwang
2282a9643ea8Slogwang /*
2283a9643ea8Slogwang * Account for the charge.
2284a9643ea8Slogwang */
2285a9643ea8Slogwang if (prev_object->cred != NULL) {
2286a9643ea8Slogwang /*
2287a9643ea8Slogwang * If prev_object was charged, then this mapping,
2288a9643ea8Slogwang * although not charged now, may become writable
2289a9643ea8Slogwang * later. Non-NULL cred in the object would prevent
2290a9643ea8Slogwang * swap reservation during enabling of the write
2291a9643ea8Slogwang * access, so reserve swap now. Failed reservation
2292a9643ea8Slogwang * cause allocation of the separate object for the map
2293a9643ea8Slogwang * entry, and swap reservation for this entry is
2294a9643ea8Slogwang * managed in appropriate time.
2295a9643ea8Slogwang */
2296a9643ea8Slogwang if (!reserved && !swap_reserve_by_cred(ptoa(next_size),
2297a9643ea8Slogwang prev_object->cred)) {
2298a9643ea8Slogwang VM_OBJECT_WUNLOCK(prev_object);
2299a9643ea8Slogwang return (FALSE);
2300a9643ea8Slogwang }
2301a9643ea8Slogwang prev_object->charge += ptoa(next_size);
2302a9643ea8Slogwang }
2303a9643ea8Slogwang
2304a9643ea8Slogwang /*
2305a9643ea8Slogwang * Remove any pages that may still be in the object from a previous
2306a9643ea8Slogwang * deallocation.
2307a9643ea8Slogwang */
2308a9643ea8Slogwang if (next_pindex < prev_object->size) {
2309a9643ea8Slogwang vm_object_page_remove(prev_object, next_pindex, next_pindex +
2310a9643ea8Slogwang next_size, 0);
2311a9643ea8Slogwang #if 0
2312a9643ea8Slogwang if (prev_object->cred != NULL) {
2313a9643ea8Slogwang KASSERT(prev_object->charge >=
2314a9643ea8Slogwang ptoa(prev_object->size - next_pindex),
2315a9643ea8Slogwang ("object %p overcharged 1 %jx %jx", prev_object,
2316a9643ea8Slogwang (uintmax_t)next_pindex, (uintmax_t)next_size));
2317a9643ea8Slogwang prev_object->charge -= ptoa(prev_object->size -
2318a9643ea8Slogwang next_pindex);
2319a9643ea8Slogwang }
2320a9643ea8Slogwang #endif
2321a9643ea8Slogwang }
2322a9643ea8Slogwang
2323a9643ea8Slogwang /*
2324a9643ea8Slogwang * Extend the object if necessary.
2325a9643ea8Slogwang */
2326a9643ea8Slogwang if (next_pindex + next_size > prev_object->size)
2327a9643ea8Slogwang prev_object->size = next_pindex + next_size;
2328a9643ea8Slogwang
2329a9643ea8Slogwang VM_OBJECT_WUNLOCK(prev_object);
2330a9643ea8Slogwang return (TRUE);
2331a9643ea8Slogwang }
2332a9643ea8Slogwang
2333a9643ea8Slogwang void
vm_object_set_writeable_dirty(vm_object_t object)2334a9643ea8Slogwang vm_object_set_writeable_dirty(vm_object_t object)
2335a9643ea8Slogwang {
2336a9643ea8Slogwang
2337*22ce4affSfengbojiang /* Only set for vnodes & tmpfs */
2338*22ce4affSfengbojiang if (object->type != OBJT_VNODE &&
2339*22ce4affSfengbojiang (object->flags & OBJ_TMPFS_NODE) == 0)
2340a9643ea8Slogwang return;
2341*22ce4affSfengbojiang atomic_add_int(&object->generation, 1);
2342a9643ea8Slogwang }
2343a9643ea8Slogwang
2344a9643ea8Slogwang /*
2345a9643ea8Slogwang * vm_object_unwire:
2346a9643ea8Slogwang *
2347a9643ea8Slogwang * For each page offset within the specified range of the given object,
2348a9643ea8Slogwang * find the highest-level page in the shadow chain and unwire it. A page
2349a9643ea8Slogwang * must exist at every page offset, and the highest-level page must be
2350a9643ea8Slogwang * wired.
2351a9643ea8Slogwang */
2352a9643ea8Slogwang void
vm_object_unwire(vm_object_t object,vm_ooffset_t offset,vm_size_t length,uint8_t queue)2353a9643ea8Slogwang vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length,
2354a9643ea8Slogwang uint8_t queue)
2355a9643ea8Slogwang {
2356*22ce4affSfengbojiang vm_object_t tobject, t1object;
2357a9643ea8Slogwang vm_page_t m, tm;
2358a9643ea8Slogwang vm_pindex_t end_pindex, pindex, tpindex;
2359a9643ea8Slogwang int depth, locked_depth;
2360a9643ea8Slogwang
2361a9643ea8Slogwang KASSERT((offset & PAGE_MASK) == 0,
2362a9643ea8Slogwang ("vm_object_unwire: offset is not page aligned"));
2363a9643ea8Slogwang KASSERT((length & PAGE_MASK) == 0,
2364a9643ea8Slogwang ("vm_object_unwire: length is not a multiple of PAGE_SIZE"));
2365a9643ea8Slogwang /* The wired count of a fictitious page never changes. */
2366a9643ea8Slogwang if ((object->flags & OBJ_FICTITIOUS) != 0)
2367a9643ea8Slogwang return;
2368a9643ea8Slogwang pindex = OFF_TO_IDX(offset);
2369a9643ea8Slogwang end_pindex = pindex + atop(length);
2370*22ce4affSfengbojiang again:
2371a9643ea8Slogwang locked_depth = 1;
2372a9643ea8Slogwang VM_OBJECT_RLOCK(object);
2373a9643ea8Slogwang m = vm_page_find_least(object, pindex);
2374a9643ea8Slogwang while (pindex < end_pindex) {
2375a9643ea8Slogwang if (m == NULL || pindex < m->pindex) {
2376a9643ea8Slogwang /*
2377a9643ea8Slogwang * The first object in the shadow chain doesn't
2378a9643ea8Slogwang * contain a page at the current index. Therefore,
2379a9643ea8Slogwang * the page must exist in a backing object.
2380a9643ea8Slogwang */
2381a9643ea8Slogwang tobject = object;
2382a9643ea8Slogwang tpindex = pindex;
2383a9643ea8Slogwang depth = 0;
2384a9643ea8Slogwang do {
2385a9643ea8Slogwang tpindex +=
2386a9643ea8Slogwang OFF_TO_IDX(tobject->backing_object_offset);
2387a9643ea8Slogwang tobject = tobject->backing_object;
2388a9643ea8Slogwang KASSERT(tobject != NULL,
2389a9643ea8Slogwang ("vm_object_unwire: missing page"));
2390a9643ea8Slogwang if ((tobject->flags & OBJ_FICTITIOUS) != 0)
2391a9643ea8Slogwang goto next_page;
2392a9643ea8Slogwang depth++;
2393a9643ea8Slogwang if (depth == locked_depth) {
2394a9643ea8Slogwang locked_depth++;
2395a9643ea8Slogwang VM_OBJECT_RLOCK(tobject);
2396a9643ea8Slogwang }
2397a9643ea8Slogwang } while ((tm = vm_page_lookup(tobject, tpindex)) ==
2398a9643ea8Slogwang NULL);
2399a9643ea8Slogwang } else {
2400a9643ea8Slogwang tm = m;
2401a9643ea8Slogwang m = TAILQ_NEXT(m, listq);
2402a9643ea8Slogwang }
2403*22ce4affSfengbojiang if (vm_page_trysbusy(tm) == 0) {
2404*22ce4affSfengbojiang for (tobject = object; locked_depth >= 1;
2405*22ce4affSfengbojiang locked_depth--) {
2406*22ce4affSfengbojiang t1object = tobject->backing_object;
2407*22ce4affSfengbojiang if (tm->object != tobject)
2408*22ce4affSfengbojiang VM_OBJECT_RUNLOCK(tobject);
2409*22ce4affSfengbojiang tobject = t1object;
2410*22ce4affSfengbojiang }
2411*22ce4affSfengbojiang vm_page_busy_sleep(tm, "unwbo", true);
2412*22ce4affSfengbojiang goto again;
2413*22ce4affSfengbojiang }
2414a9643ea8Slogwang vm_page_unwire(tm, queue);
2415*22ce4affSfengbojiang vm_page_sunbusy(tm);
2416a9643ea8Slogwang next_page:
2417a9643ea8Slogwang pindex++;
2418a9643ea8Slogwang }
2419a9643ea8Slogwang /* Release the accumulated object locks. */
2420*22ce4affSfengbojiang for (tobject = object; locked_depth >= 1; locked_depth--) {
2421*22ce4affSfengbojiang t1object = tobject->backing_object;
2422*22ce4affSfengbojiang VM_OBJECT_RUNLOCK(tobject);
2423*22ce4affSfengbojiang tobject = t1object;
2424a9643ea8Slogwang }
2425a9643ea8Slogwang }
2426a9643ea8Slogwang
2427*22ce4affSfengbojiang /*
2428*22ce4affSfengbojiang * Return the vnode for the given object, or NULL if none exists.
2429*22ce4affSfengbojiang * For tmpfs objects, the function may return NULL if there is
2430*22ce4affSfengbojiang * no vnode allocated at the time of the call.
2431*22ce4affSfengbojiang */
2432a9643ea8Slogwang struct vnode *
vm_object_vnode(vm_object_t object)2433a9643ea8Slogwang vm_object_vnode(vm_object_t object)
2434a9643ea8Slogwang {
2435*22ce4affSfengbojiang struct vnode *vp;
2436a9643ea8Slogwang
2437a9643ea8Slogwang VM_OBJECT_ASSERT_LOCKED(object);
2438*22ce4affSfengbojiang if (object->type == OBJT_VNODE) {
2439*22ce4affSfengbojiang vp = object->handle;
2440*22ce4affSfengbojiang KASSERT(vp != NULL, ("%s: OBJT_VNODE has no vnode", __func__));
2441*22ce4affSfengbojiang } else if (object->type == OBJT_SWAP &&
2442*22ce4affSfengbojiang (object->flags & OBJ_TMPFS) != 0) {
2443*22ce4affSfengbojiang vp = object->un_pager.swp.swp_tmpfs;
2444*22ce4affSfengbojiang KASSERT(vp != NULL, ("%s: OBJT_TMPFS has no vnode", __func__));
2445*22ce4affSfengbojiang } else {
2446*22ce4affSfengbojiang vp = NULL;
2447*22ce4affSfengbojiang }
2448*22ce4affSfengbojiang return (vp);
2449*22ce4affSfengbojiang }
2450*22ce4affSfengbojiang
2451*22ce4affSfengbojiang /*
2452*22ce4affSfengbojiang * Busy the vm object. This prevents new pages belonging to the object from
2453*22ce4affSfengbojiang * becoming busy. Existing pages persist as busy. Callers are responsible
2454*22ce4affSfengbojiang * for checking page state before proceeding.
2455*22ce4affSfengbojiang */
2456*22ce4affSfengbojiang void
vm_object_busy(vm_object_t obj)2457*22ce4affSfengbojiang vm_object_busy(vm_object_t obj)
2458*22ce4affSfengbojiang {
2459*22ce4affSfengbojiang
2460*22ce4affSfengbojiang VM_OBJECT_ASSERT_LOCKED(obj);
2461*22ce4affSfengbojiang
2462*22ce4affSfengbojiang blockcount_acquire(&obj->busy, 1);
2463*22ce4affSfengbojiang /* The fence is required to order loads of page busy. */
2464*22ce4affSfengbojiang atomic_thread_fence_acq_rel();
2465*22ce4affSfengbojiang }
2466*22ce4affSfengbojiang
2467*22ce4affSfengbojiang void
vm_object_unbusy(vm_object_t obj)2468*22ce4affSfengbojiang vm_object_unbusy(vm_object_t obj)
2469*22ce4affSfengbojiang {
2470*22ce4affSfengbojiang
2471*22ce4affSfengbojiang blockcount_release(&obj->busy, 1);
2472*22ce4affSfengbojiang }
2473*22ce4affSfengbojiang
2474*22ce4affSfengbojiang void
vm_object_busy_wait(vm_object_t obj,const char * wmesg)2475*22ce4affSfengbojiang vm_object_busy_wait(vm_object_t obj, const char *wmesg)
2476*22ce4affSfengbojiang {
2477*22ce4affSfengbojiang
2478*22ce4affSfengbojiang VM_OBJECT_ASSERT_UNLOCKED(obj);
2479*22ce4affSfengbojiang
2480*22ce4affSfengbojiang (void)blockcount_sleep(&obj->busy, NULL, wmesg, PVM);
2481*22ce4affSfengbojiang }
2482*22ce4affSfengbojiang
2483*22ce4affSfengbojiang /*
2484*22ce4affSfengbojiang * Return the kvme type of the given object.
2485*22ce4affSfengbojiang * If vpp is not NULL, set it to the object's vm_object_vnode() or NULL.
2486*22ce4affSfengbojiang */
2487*22ce4affSfengbojiang int
vm_object_kvme_type(vm_object_t object,struct vnode ** vpp)2488*22ce4affSfengbojiang vm_object_kvme_type(vm_object_t object, struct vnode **vpp)
2489*22ce4affSfengbojiang {
2490*22ce4affSfengbojiang
2491*22ce4affSfengbojiang VM_OBJECT_ASSERT_LOCKED(object);
2492*22ce4affSfengbojiang if (vpp != NULL)
2493*22ce4affSfengbojiang *vpp = vm_object_vnode(object);
2494*22ce4affSfengbojiang switch (object->type) {
2495*22ce4affSfengbojiang case OBJT_DEFAULT:
2496*22ce4affSfengbojiang return (KVME_TYPE_DEFAULT);
2497*22ce4affSfengbojiang case OBJT_VNODE:
2498*22ce4affSfengbojiang return (KVME_TYPE_VNODE);
2499*22ce4affSfengbojiang case OBJT_SWAP:
2500*22ce4affSfengbojiang if ((object->flags & OBJ_TMPFS_NODE) != 0)
2501*22ce4affSfengbojiang return (KVME_TYPE_VNODE);
2502*22ce4affSfengbojiang return (KVME_TYPE_SWAP);
2503*22ce4affSfengbojiang case OBJT_DEVICE:
2504*22ce4affSfengbojiang return (KVME_TYPE_DEVICE);
2505*22ce4affSfengbojiang case OBJT_PHYS:
2506*22ce4affSfengbojiang return (KVME_TYPE_PHYS);
2507*22ce4affSfengbojiang case OBJT_DEAD:
2508*22ce4affSfengbojiang return (KVME_TYPE_DEAD);
2509*22ce4affSfengbojiang case OBJT_SG:
2510*22ce4affSfengbojiang return (KVME_TYPE_SG);
2511*22ce4affSfengbojiang case OBJT_MGTDEVICE:
2512*22ce4affSfengbojiang return (KVME_TYPE_MGTDEVICE);
2513*22ce4affSfengbojiang default:
2514*22ce4affSfengbojiang return (KVME_TYPE_UNKNOWN);
2515*22ce4affSfengbojiang }
2516a9643ea8Slogwang }
2517a9643ea8Slogwang
2518a9643ea8Slogwang static int
sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)2519a9643ea8Slogwang sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
2520a9643ea8Slogwang {
2521*22ce4affSfengbojiang struct kinfo_vmobject *kvo;
2522a9643ea8Slogwang char *fullpath, *freepath;
2523a9643ea8Slogwang struct vnode *vp;
2524a9643ea8Slogwang struct vattr va;
2525a9643ea8Slogwang vm_object_t obj;
2526a9643ea8Slogwang vm_page_t m;
2527a9643ea8Slogwang int count, error;
2528a9643ea8Slogwang
2529a9643ea8Slogwang if (req->oldptr == NULL) {
2530a9643ea8Slogwang /*
2531a9643ea8Slogwang * If an old buffer has not been provided, generate an
2532a9643ea8Slogwang * estimate of the space needed for a subsequent call.
2533a9643ea8Slogwang */
2534a9643ea8Slogwang mtx_lock(&vm_object_list_mtx);
2535a9643ea8Slogwang count = 0;
2536a9643ea8Slogwang TAILQ_FOREACH(obj, &vm_object_list, object_list) {
2537a9643ea8Slogwang if (obj->type == OBJT_DEAD)
2538a9643ea8Slogwang continue;
2539a9643ea8Slogwang count++;
2540a9643ea8Slogwang }
2541a9643ea8Slogwang mtx_unlock(&vm_object_list_mtx);
2542a9643ea8Slogwang return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) *
2543a9643ea8Slogwang count * 11 / 10));
2544a9643ea8Slogwang }
2545a9643ea8Slogwang
2546*22ce4affSfengbojiang kvo = malloc(sizeof(*kvo), M_TEMP, M_WAITOK);
2547a9643ea8Slogwang error = 0;
2548a9643ea8Slogwang
2549a9643ea8Slogwang /*
2550a9643ea8Slogwang * VM objects are type stable and are never removed from the
2551a9643ea8Slogwang * list once added. This allows us to safely read obj->object_list
2552a9643ea8Slogwang * after reacquiring the VM object lock.
2553a9643ea8Slogwang */
2554a9643ea8Slogwang mtx_lock(&vm_object_list_mtx);
2555a9643ea8Slogwang TAILQ_FOREACH(obj, &vm_object_list, object_list) {
2556a9643ea8Slogwang if (obj->type == OBJT_DEAD)
2557a9643ea8Slogwang continue;
2558a9643ea8Slogwang VM_OBJECT_RLOCK(obj);
2559a9643ea8Slogwang if (obj->type == OBJT_DEAD) {
2560a9643ea8Slogwang VM_OBJECT_RUNLOCK(obj);
2561a9643ea8Slogwang continue;
2562a9643ea8Slogwang }
2563a9643ea8Slogwang mtx_unlock(&vm_object_list_mtx);
2564*22ce4affSfengbojiang kvo->kvo_size = ptoa(obj->size);
2565*22ce4affSfengbojiang kvo->kvo_resident = obj->resident_page_count;
2566*22ce4affSfengbojiang kvo->kvo_ref_count = obj->ref_count;
2567*22ce4affSfengbojiang kvo->kvo_shadow_count = obj->shadow_count;
2568*22ce4affSfengbojiang kvo->kvo_memattr = obj->memattr;
2569*22ce4affSfengbojiang kvo->kvo_active = 0;
2570*22ce4affSfengbojiang kvo->kvo_inactive = 0;
2571a9643ea8Slogwang TAILQ_FOREACH(m, &obj->memq, listq) {
2572a9643ea8Slogwang /*
2573a9643ea8Slogwang * A page may belong to the object but be
2574a9643ea8Slogwang * dequeued and set to PQ_NONE while the
2575a9643ea8Slogwang * object lock is not held. This makes the
2576a9643ea8Slogwang * reads of m->queue below racy, and we do not
2577a9643ea8Slogwang * count pages set to PQ_NONE. However, this
2578a9643ea8Slogwang * sysctl is only meant to give an
2579a9643ea8Slogwang * approximation of the system anyway.
2580a9643ea8Slogwang */
2581*22ce4affSfengbojiang if (m->a.queue == PQ_ACTIVE)
2582*22ce4affSfengbojiang kvo->kvo_active++;
2583*22ce4affSfengbojiang else if (m->a.queue == PQ_INACTIVE)
2584*22ce4affSfengbojiang kvo->kvo_inactive++;
2585a9643ea8Slogwang }
2586a9643ea8Slogwang
2587*22ce4affSfengbojiang kvo->kvo_vn_fileid = 0;
2588*22ce4affSfengbojiang kvo->kvo_vn_fsid = 0;
2589*22ce4affSfengbojiang kvo->kvo_vn_fsid_freebsd11 = 0;
2590a9643ea8Slogwang freepath = NULL;
2591a9643ea8Slogwang fullpath = "";
2592*22ce4affSfengbojiang kvo->kvo_type = vm_object_kvme_type(obj, &vp);
2593*22ce4affSfengbojiang if (vp != NULL)
2594a9643ea8Slogwang vref(vp);
2595a9643ea8Slogwang VM_OBJECT_RUNLOCK(obj);
2596a9643ea8Slogwang if (vp != NULL) {
2597*22ce4affSfengbojiang vn_fullpath(vp, &fullpath, &freepath);
2598a9643ea8Slogwang vn_lock(vp, LK_SHARED | LK_RETRY);
2599a9643ea8Slogwang if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
2600*22ce4affSfengbojiang kvo->kvo_vn_fileid = va.va_fileid;
2601*22ce4affSfengbojiang kvo->kvo_vn_fsid = va.va_fsid;
2602*22ce4affSfengbojiang kvo->kvo_vn_fsid_freebsd11 = va.va_fsid;
2603*22ce4affSfengbojiang /* truncate */
2604a9643ea8Slogwang }
2605a9643ea8Slogwang vput(vp);
2606a9643ea8Slogwang }
2607a9643ea8Slogwang
2608*22ce4affSfengbojiang strlcpy(kvo->kvo_path, fullpath, sizeof(kvo->kvo_path));
2609a9643ea8Slogwang if (freepath != NULL)
2610a9643ea8Slogwang free(freepath, M_TEMP);
2611a9643ea8Slogwang
2612a9643ea8Slogwang /* Pack record size down */
2613*22ce4affSfengbojiang kvo->kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path)
2614*22ce4affSfengbojiang + strlen(kvo->kvo_path) + 1;
2615*22ce4affSfengbojiang kvo->kvo_structsize = roundup(kvo->kvo_structsize,
2616a9643ea8Slogwang sizeof(uint64_t));
2617*22ce4affSfengbojiang error = SYSCTL_OUT(req, kvo, kvo->kvo_structsize);
2618a9643ea8Slogwang mtx_lock(&vm_object_list_mtx);
2619a9643ea8Slogwang if (error)
2620a9643ea8Slogwang break;
2621a9643ea8Slogwang }
2622a9643ea8Slogwang mtx_unlock(&vm_object_list_mtx);
2623*22ce4affSfengbojiang free(kvo, M_TEMP);
2624a9643ea8Slogwang return (error);
2625a9643ea8Slogwang }
2626a9643ea8Slogwang SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP |
2627a9643ea8Slogwang CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject",
2628a9643ea8Slogwang "List of VM objects");
2629a9643ea8Slogwang
2630a9643ea8Slogwang #include "opt_ddb.h"
2631a9643ea8Slogwang #ifdef DDB
2632a9643ea8Slogwang #include <sys/kernel.h>
2633a9643ea8Slogwang
2634a9643ea8Slogwang #include <sys/cons.h>
2635a9643ea8Slogwang
2636a9643ea8Slogwang #include <ddb/ddb.h>
2637a9643ea8Slogwang
2638a9643ea8Slogwang static int
_vm_object_in_map(vm_map_t map,vm_object_t object,vm_map_entry_t entry)2639a9643ea8Slogwang _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
2640a9643ea8Slogwang {
2641a9643ea8Slogwang vm_map_t tmpm;
2642a9643ea8Slogwang vm_map_entry_t tmpe;
2643a9643ea8Slogwang vm_object_t obj;
2644a9643ea8Slogwang
2645a9643ea8Slogwang if (map == 0)
2646a9643ea8Slogwang return 0;
2647a9643ea8Slogwang
2648a9643ea8Slogwang if (entry == 0) {
2649*22ce4affSfengbojiang VM_MAP_ENTRY_FOREACH(tmpe, map) {
2650a9643ea8Slogwang if (_vm_object_in_map(map, object, tmpe)) {
2651a9643ea8Slogwang return 1;
2652a9643ea8Slogwang }
2653a9643ea8Slogwang }
2654a9643ea8Slogwang } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2655a9643ea8Slogwang tmpm = entry->object.sub_map;
2656*22ce4affSfengbojiang VM_MAP_ENTRY_FOREACH(tmpe, tmpm) {
2657a9643ea8Slogwang if (_vm_object_in_map(tmpm, object, tmpe)) {
2658a9643ea8Slogwang return 1;
2659a9643ea8Slogwang }
2660a9643ea8Slogwang }
2661a9643ea8Slogwang } else if ((obj = entry->object.vm_object) != NULL) {
2662a9643ea8Slogwang for (; obj; obj = obj->backing_object)
2663a9643ea8Slogwang if (obj == object) {
2664a9643ea8Slogwang return 1;
2665a9643ea8Slogwang }
2666a9643ea8Slogwang }
2667a9643ea8Slogwang return 0;
2668a9643ea8Slogwang }
2669a9643ea8Slogwang
2670a9643ea8Slogwang static int
vm_object_in_map(vm_object_t object)2671a9643ea8Slogwang vm_object_in_map(vm_object_t object)
2672a9643ea8Slogwang {
2673a9643ea8Slogwang struct proc *p;
2674a9643ea8Slogwang
2675a9643ea8Slogwang /* sx_slock(&allproc_lock); */
2676a9643ea8Slogwang FOREACH_PROC_IN_SYSTEM(p) {
2677a9643ea8Slogwang if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
2678a9643ea8Slogwang continue;
2679a9643ea8Slogwang if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
2680a9643ea8Slogwang /* sx_sunlock(&allproc_lock); */
2681a9643ea8Slogwang return 1;
2682a9643ea8Slogwang }
2683a9643ea8Slogwang }
2684a9643ea8Slogwang /* sx_sunlock(&allproc_lock); */
2685a9643ea8Slogwang if (_vm_object_in_map(kernel_map, object, 0))
2686a9643ea8Slogwang return 1;
2687a9643ea8Slogwang return 0;
2688a9643ea8Slogwang }
2689a9643ea8Slogwang
DB_SHOW_COMMAND(vmochk,vm_object_check)2690a9643ea8Slogwang DB_SHOW_COMMAND(vmochk, vm_object_check)
2691a9643ea8Slogwang {
2692a9643ea8Slogwang vm_object_t object;
2693a9643ea8Slogwang
2694a9643ea8Slogwang /*
2695a9643ea8Slogwang * make sure that internal objs are in a map somewhere
2696a9643ea8Slogwang * and none have zero ref counts.
2697a9643ea8Slogwang */
2698a9643ea8Slogwang TAILQ_FOREACH(object, &vm_object_list, object_list) {
2699*22ce4affSfengbojiang if ((object->flags & OBJ_ANON) != 0) {
2700a9643ea8Slogwang if (object->ref_count == 0) {
2701a9643ea8Slogwang db_printf("vmochk: internal obj has zero ref count: %ld\n",
2702a9643ea8Slogwang (long)object->size);
2703a9643ea8Slogwang }
2704a9643ea8Slogwang if (!vm_object_in_map(object)) {
2705a9643ea8Slogwang db_printf(
2706a9643ea8Slogwang "vmochk: internal obj is not in a map: "
2707a9643ea8Slogwang "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
2708a9643ea8Slogwang object->ref_count, (u_long)object->size,
2709a9643ea8Slogwang (u_long)object->size,
2710a9643ea8Slogwang (void *)object->backing_object);
2711a9643ea8Slogwang }
2712a9643ea8Slogwang }
2713*22ce4affSfengbojiang if (db_pager_quit)
2714*22ce4affSfengbojiang return;
2715a9643ea8Slogwang }
2716a9643ea8Slogwang }
2717a9643ea8Slogwang
2718a9643ea8Slogwang /*
2719a9643ea8Slogwang * vm_object_print: [ debug ]
2720a9643ea8Slogwang */
DB_SHOW_COMMAND(object,vm_object_print_static)2721a9643ea8Slogwang DB_SHOW_COMMAND(object, vm_object_print_static)
2722a9643ea8Slogwang {
2723a9643ea8Slogwang /* XXX convert args. */
2724a9643ea8Slogwang vm_object_t object = (vm_object_t)addr;
2725a9643ea8Slogwang boolean_t full = have_addr;
2726a9643ea8Slogwang
2727a9643ea8Slogwang vm_page_t p;
2728a9643ea8Slogwang
2729a9643ea8Slogwang /* XXX count is an (unused) arg. Avoid shadowing it. */
2730a9643ea8Slogwang #define count was_count
2731a9643ea8Slogwang
2732a9643ea8Slogwang int count;
2733a9643ea8Slogwang
2734a9643ea8Slogwang if (object == NULL)
2735a9643ea8Slogwang return;
2736a9643ea8Slogwang
2737a9643ea8Slogwang db_iprintf(
2738a9643ea8Slogwang "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n",
2739a9643ea8Slogwang object, (int)object->type, (uintmax_t)object->size,
2740a9643ea8Slogwang object->resident_page_count, object->ref_count, object->flags,
2741a9643ea8Slogwang object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge);
2742a9643ea8Slogwang db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
2743a9643ea8Slogwang object->shadow_count,
2744a9643ea8Slogwang object->backing_object ? object->backing_object->ref_count : 0,
2745a9643ea8Slogwang object->backing_object, (uintmax_t)object->backing_object_offset);
2746a9643ea8Slogwang
2747a9643ea8Slogwang if (!full)
2748a9643ea8Slogwang return;
2749a9643ea8Slogwang
2750a9643ea8Slogwang db_indent += 2;
2751a9643ea8Slogwang count = 0;
2752a9643ea8Slogwang TAILQ_FOREACH(p, &object->memq, listq) {
2753a9643ea8Slogwang if (count == 0)
2754a9643ea8Slogwang db_iprintf("memory:=");
2755a9643ea8Slogwang else if (count == 6) {
2756a9643ea8Slogwang db_printf("\n");
2757a9643ea8Slogwang db_iprintf(" ...");
2758a9643ea8Slogwang count = 0;
2759a9643ea8Slogwang } else
2760a9643ea8Slogwang db_printf(",");
2761a9643ea8Slogwang count++;
2762a9643ea8Slogwang
2763a9643ea8Slogwang db_printf("(off=0x%jx,page=0x%jx)",
2764a9643ea8Slogwang (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
2765*22ce4affSfengbojiang
2766*22ce4affSfengbojiang if (db_pager_quit)
2767*22ce4affSfengbojiang break;
2768a9643ea8Slogwang }
2769a9643ea8Slogwang if (count != 0)
2770a9643ea8Slogwang db_printf("\n");
2771a9643ea8Slogwang db_indent -= 2;
2772a9643ea8Slogwang }
2773a9643ea8Slogwang
2774a9643ea8Slogwang /* XXX. */
2775a9643ea8Slogwang #undef count
2776a9643ea8Slogwang
2777a9643ea8Slogwang /* XXX need this non-static entry for calling from vm_map_print. */
2778a9643ea8Slogwang void
vm_object_print(long addr,boolean_t have_addr,long count,char * modif)2779a9643ea8Slogwang vm_object_print(
2780a9643ea8Slogwang /* db_expr_t */ long addr,
2781a9643ea8Slogwang boolean_t have_addr,
2782a9643ea8Slogwang /* db_expr_t */ long count,
2783a9643ea8Slogwang char *modif)
2784a9643ea8Slogwang {
2785a9643ea8Slogwang vm_object_print_static(addr, have_addr, count, modif);
2786a9643ea8Slogwang }
2787a9643ea8Slogwang
DB_SHOW_COMMAND(vmopag,vm_object_print_pages)2788a9643ea8Slogwang DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
2789a9643ea8Slogwang {
2790a9643ea8Slogwang vm_object_t object;
2791a9643ea8Slogwang vm_pindex_t fidx;
2792a9643ea8Slogwang vm_paddr_t pa;
2793a9643ea8Slogwang vm_page_t m, prev_m;
2794a9643ea8Slogwang int rcount, nl, c;
2795a9643ea8Slogwang
2796a9643ea8Slogwang nl = 0;
2797a9643ea8Slogwang TAILQ_FOREACH(object, &vm_object_list, object_list) {
2798a9643ea8Slogwang db_printf("new object: %p\n", (void *)object);
2799a9643ea8Slogwang if (nl > 18) {
2800a9643ea8Slogwang c = cngetc();
2801a9643ea8Slogwang if (c != ' ')
2802a9643ea8Slogwang return;
2803a9643ea8Slogwang nl = 0;
2804a9643ea8Slogwang }
2805a9643ea8Slogwang nl++;
2806a9643ea8Slogwang rcount = 0;
2807a9643ea8Slogwang fidx = 0;
2808a9643ea8Slogwang pa = -1;
2809a9643ea8Slogwang TAILQ_FOREACH(m, &object->memq, listq) {
2810a9643ea8Slogwang if (m->pindex > 128)
2811a9643ea8Slogwang break;
2812a9643ea8Slogwang if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL &&
2813a9643ea8Slogwang prev_m->pindex + 1 != m->pindex) {
2814a9643ea8Slogwang if (rcount) {
2815a9643ea8Slogwang db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2816a9643ea8Slogwang (long)fidx, rcount, (long)pa);
2817a9643ea8Slogwang if (nl > 18) {
2818a9643ea8Slogwang c = cngetc();
2819a9643ea8Slogwang if (c != ' ')
2820a9643ea8Slogwang return;
2821a9643ea8Slogwang nl = 0;
2822a9643ea8Slogwang }
2823a9643ea8Slogwang nl++;
2824a9643ea8Slogwang rcount = 0;
2825a9643ea8Slogwang }
2826a9643ea8Slogwang }
2827a9643ea8Slogwang if (rcount &&
2828a9643ea8Slogwang (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
2829a9643ea8Slogwang ++rcount;
2830a9643ea8Slogwang continue;
2831a9643ea8Slogwang }
2832a9643ea8Slogwang if (rcount) {
2833a9643ea8Slogwang db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2834a9643ea8Slogwang (long)fidx, rcount, (long)pa);
2835a9643ea8Slogwang if (nl > 18) {
2836a9643ea8Slogwang c = cngetc();
2837a9643ea8Slogwang if (c != ' ')
2838a9643ea8Slogwang return;
2839a9643ea8Slogwang nl = 0;
2840a9643ea8Slogwang }
2841a9643ea8Slogwang nl++;
2842a9643ea8Slogwang }
2843a9643ea8Slogwang fidx = m->pindex;
2844a9643ea8Slogwang pa = VM_PAGE_TO_PHYS(m);
2845a9643ea8Slogwang rcount = 1;
2846a9643ea8Slogwang }
2847a9643ea8Slogwang if (rcount) {
2848a9643ea8Slogwang db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2849a9643ea8Slogwang (long)fidx, rcount, (long)pa);
2850a9643ea8Slogwang if (nl > 18) {
2851a9643ea8Slogwang c = cngetc();
2852a9643ea8Slogwang if (c != ' ')
2853a9643ea8Slogwang return;
2854a9643ea8Slogwang nl = 0;
2855a9643ea8Slogwang }
2856a9643ea8Slogwang nl++;
2857a9643ea8Slogwang }
2858a9643ea8Slogwang }
2859a9643ea8Slogwang }
2860a9643ea8Slogwang #endif /* DDB */
2861