1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
61 *
62 * External memory management interface control functions.
63 */
64
65 /*
66 * Interface dependencies:
67 */
68
69 #include <mach/std_types.h> /* For pointer_t */
70 #include <mach/mach_types.h>
71
72 #include <mach/mig.h>
73 #include <mach/kern_return.h>
74 #include <mach/memory_object.h>
75 #include <mach/memory_object_control.h>
76 #include <mach/host_priv_server.h>
77 #include <mach/boolean.h>
78 #include <mach/vm_prot.h>
79 #include <mach/message.h>
80
81 /*
82 * Implementation dependencies:
83 */
84 #include <string.h> /* For memcpy() */
85
86 #include <kern/host.h>
87 #include <kern/thread.h> /* For current_thread() */
88 #include <kern/ipc_mig.h>
89 #include <kern/misc_protos.h>
90
91 #include <vm/vm_object_internal.h>
92 #include <vm/vm_fault_internal.h>
93 #include <vm/memory_object_internal.h>
94 #include <vm/vm_page_internal.h>
95 #include <vm/vm_pageout_internal.h>
96 #include <vm/pmap.h> /* For pmap_clear_modify */
97 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
98 #include <vm/vm_map_xnu.h> /* For vm_map_pageable */
99 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
100 #include <vm/vm_shared_region.h>
101 #include <vm/vm_memory_entry_xnu.h>
102
103 #include <vm/vm_external.h>
104
105 #include <vm/vm_protos_internal.h>
106 #include <vm/vm_iokit.h>
107 #include <vm/vm_ubc.h>
108
109 memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
110 LCK_MTX_DECLARE(memory_manager_default_lock, &vm_object_lck_grp);
111
112
113 /*
114 * Routine: memory_object_should_return_page
115 *
116 * Description:
117 * Determine whether the given page should be returned,
118 * based on the page's state and on the given return policy.
119 *
120 * We should return the page if one of the following is true:
121 *
122 * 1. Page is dirty and should_return is not RETURN_NONE.
123 * 2. Page is precious and should_return is RETURN_ALL.
124 * 3. Should_return is RETURN_ANYTHING.
125 *
126 * As a side effect, m->vmp_dirty will be made consistent
127 * with pmap_is_modified(m), if should_return is not
128 * MEMORY_OBJECT_RETURN_NONE.
129 */
130
131 #define memory_object_should_return_page(m, should_return) \
132 (should_return != MEMORY_OBJECT_RETURN_NONE && \
133 (((m)->vmp_dirty || ((m)->vmp_dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))) || \
134 ((m)->vmp_precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
135 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
136
137 typedef int memory_object_lock_result_t;
138
139 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
140 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
141 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
142 #define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
143
144 memory_object_lock_result_t memory_object_lock_page(
145 vm_page_t m,
146 memory_object_return_t should_return,
147 boolean_t should_flush,
148 vm_prot_t prot);
149
150 /*
151 * Routine: memory_object_lock_page
152 *
153 * Description:
154 * Perform the appropriate lock operations on the
155 * given page. See the description of
156 * "memory_object_lock_request" for the meanings
157 * of the arguments.
158 *
159 * Returns an indication that the operation
160 * completed, blocked, or that the page must
161 * be cleaned.
162 */
163 memory_object_lock_result_t
memory_object_lock_page(vm_page_t m,memory_object_return_t should_return,boolean_t should_flush,vm_prot_t prot)164 memory_object_lock_page(
165 vm_page_t m,
166 memory_object_return_t should_return,
167 boolean_t should_flush,
168 vm_prot_t prot)
169 {
170 if (prot == VM_PROT_NO_CHANGE_LEGACY) {
171 prot = VM_PROT_NO_CHANGE;
172 }
173
174 if (m->vmp_busy || m->vmp_cleaning) {
175 return MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK;
176 }
177
178 if (m->vmp_laundry) {
179 vm_pageout_steal_laundry(m, FALSE);
180 }
181
182 /*
183 * Don't worry about pages for which the kernel
184 * does not have any data.
185 */
186 if (m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_restart) {
187 if (VMP_ERROR_GET(m) && should_flush && !VM_PAGE_WIRED(m)) {
188 /*
189 * dump the page, pager wants us to
190 * clean it up and there is no
191 * relevant data to return
192 */
193 return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE;
194 }
195 return MEMORY_OBJECT_LOCK_RESULT_DONE;
196 }
197 assert(!m->vmp_fictitious);
198
199 if (VM_PAGE_WIRED(m)) {
200 /*
201 * The page is wired... just clean or return the page if needed.
202 * Wired pages don't get flushed or disconnected from the pmap.
203 */
204 if (memory_object_should_return_page(m, should_return)) {
205 return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN;
206 }
207
208 return MEMORY_OBJECT_LOCK_RESULT_DONE;
209 }
210
211 if (should_flush) {
212 /*
213 * must do the pmap_disconnect before determining the
214 * need to return the page... otherwise it's possible
215 * for the page to go from the clean to the dirty state
216 * after we've made our decision
217 */
218 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
219 SET_PAGE_DIRTY(m, FALSE);
220 }
221 } else {
222 /*
223 * If we are decreasing permission, do it now;
224 * let the fault handler take care of increases
225 * (pmap_page_protect may not increase protection).
226 */
227 if (prot != VM_PROT_NO_CHANGE) {
228 pmap_page_protect(VM_PAGE_GET_PHYS_PAGE(m), VM_PROT_ALL & ~prot);
229 }
230 }
231 /*
232 * Handle returning dirty or precious pages
233 */
234 if (memory_object_should_return_page(m, should_return)) {
235 /*
236 * we use to do a pmap_disconnect here in support
237 * of memory_object_lock_request, but that routine
238 * no longer requires this... in any event, in
239 * our world, it would turn into a big noop since
240 * we don't lock the page in any way and as soon
241 * as we drop the object lock, the page can be
242 * faulted back into an address space
243 *
244 * if (!should_flush)
245 * pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
246 */
247 return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN;
248 }
249
250 /*
251 * Handle flushing clean pages
252 */
253 if (should_flush) {
254 return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE;
255 }
256
257 /*
258 * we use to deactivate clean pages at this point,
259 * but we do not believe that an msync should change
260 * the 'age' of a page in the cache... here is the
261 * original comment and code concerning this...
262 *
263 * XXX Make clean but not flush a paging hint,
264 * and deactivate the pages. This is a hack
265 * because it overloads flush/clean with
266 * implementation-dependent meaning. This only
267 * happens to pages that are already clean.
268 *
269 * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
270 * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
271 */
272
273 return MEMORY_OBJECT_LOCK_RESULT_DONE;
274 }
275
276
277
278 /*
279 * Routine: memory_object_lock_request [user interface]
280 *
281 * Description:
282 * Control use of the data associated with the given
283 * memory object. For each page in the given range,
284 * perform the following operations, in order:
285 * 1) restrict access to the page (disallow
286 * forms specified by "prot");
287 * 2) return data to the manager (if "should_return"
288 * is RETURN_DIRTY and the page is dirty, or
289 * "should_return" is RETURN_ALL and the page
290 * is either dirty or precious); and,
291 * 3) flush the cached copy (if "should_flush"
292 * is asserted).
293 * The set of pages is defined by a starting offset
294 * ("offset") and size ("size"). Only pages with the
295 * same page alignment as the starting offset are
296 * considered.
297 *
298 * A single acknowledgement is sent (to the "reply_to"
299 * port) when these actions are complete. If successful,
300 * the naked send right for reply_to is consumed.
301 */
302
303 kern_return_t
memory_object_lock_request(memory_object_control_t control,memory_object_offset_t offset,memory_object_size_t size,memory_object_offset_t * resid_offset,int * io_errno,memory_object_return_t should_return,int flags,vm_prot_t prot)304 memory_object_lock_request(
305 memory_object_control_t control,
306 memory_object_offset_t offset,
307 memory_object_size_t size,
308 memory_object_offset_t * resid_offset,
309 int * io_errno,
310 memory_object_return_t should_return,
311 int flags,
312 vm_prot_t prot)
313 {
314 vm_object_t object;
315
316 if (prot == VM_PROT_NO_CHANGE_LEGACY) {
317 prot = VM_PROT_NO_CHANGE;
318 }
319
320 /*
321 * Check for bogus arguments.
322 */
323 object = memory_object_control_to_vm_object(control);
324 if (object == VM_OBJECT_NULL) {
325 return KERN_INVALID_ARGUMENT;
326 }
327
328 if ((prot & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) != 0 && prot != VM_PROT_NO_CHANGE) {
329 return KERN_INVALID_ARGUMENT;
330 }
331
332 size = round_page_64(size);
333
334 /*
335 * Lock the object, and acquire a paging reference to
336 * prevent the memory_object reference from being released.
337 */
338 vm_object_lock(object);
339 vm_object_paging_begin(object);
340
341 if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
342 if ((should_return != MEMORY_OBJECT_RETURN_NONE) || offset || object->vo_copy) {
343 flags &= ~MEMORY_OBJECT_DATA_FLUSH_ALL;
344 flags |= MEMORY_OBJECT_DATA_FLUSH;
345 }
346 }
347 offset -= object->paging_offset;
348
349 if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
350 vm_object_reap_pages(object, REAP_DATA_FLUSH);
351 } else {
352 (void)vm_object_update(object, offset, size, resid_offset,
353 io_errno, should_return, flags, prot);
354 }
355
356 vm_object_paging_end(object);
357 vm_object_unlock(object);
358
359 return KERN_SUCCESS;
360 }
361
362 /*
363 * Routine: memory_object_destroy [user interface]
364 * Purpose:
365 * Shut down a memory object, despite the
366 * presence of address map (or other) references
367 * to the vm_object.
368 */
369 kern_return_t
memory_object_destroy(memory_object_control_t control,vm_object_destroy_reason_t reason)370 memory_object_destroy(
371 memory_object_control_t control,
372 vm_object_destroy_reason_t reason)
373 {
374 vm_object_t object;
375
376 object = memory_object_control_to_vm_object(control);
377 if (object == VM_OBJECT_NULL) {
378 return KERN_INVALID_ARGUMENT;
379 }
380
381 return vm_object_destroy(object, reason);
382 }
383
384 /*
385 * Routine: vm_object_sync
386 *
387 * Kernel internal function to synch out pages in a given
388 * range within an object to its memory manager. Much the
389 * same as memory_object_lock_request but page protection
390 * is not changed.
391 *
392 * If the should_flush and should_return flags are true pages
393 * are flushed, that is dirty & precious pages are written to
394 * the memory manager and then discarded. If should_return
395 * is false, only precious pages are returned to the memory
396 * manager.
397 *
398 * If should flush is false and should_return true, the memory
399 * manager's copy of the pages is updated. If should_return
400 * is also false, only the precious pages are updated. This
401 * last option is of limited utility.
402 *
403 * Returns:
404 * FALSE if no pages were returned to the pager
405 * TRUE otherwise.
406 */
407
408 boolean_t
vm_object_sync(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,boolean_t should_flush,boolean_t should_return,boolean_t should_iosync)409 vm_object_sync(
410 vm_object_t object,
411 vm_object_offset_t offset,
412 vm_object_size_t size,
413 boolean_t should_flush,
414 boolean_t should_return,
415 boolean_t should_iosync)
416 {
417 boolean_t rv;
418 int flags;
419
420 /*
421 * Lock the object, and acquire a paging reference to
422 * prevent the memory_object and control ports from
423 * being destroyed.
424 */
425 vm_object_lock(object);
426 vm_object_paging_begin(object);
427
428 if (should_flush) {
429 flags = MEMORY_OBJECT_DATA_FLUSH;
430 /*
431 * This flush is from an msync(), not a truncate(), so the
432 * contents of the file are not affected.
433 * MEMORY_OBECT_DATA_NO_CHANGE lets vm_object_update() know
434 * that the data is not changed and that there's no need to
435 * push the old contents to a copy object.
436 */
437 flags |= MEMORY_OBJECT_DATA_NO_CHANGE;
438 } else {
439 flags = 0;
440 }
441
442 if (should_iosync) {
443 flags |= MEMORY_OBJECT_IO_SYNC;
444 }
445
446 rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
447 (should_return) ?
448 MEMORY_OBJECT_RETURN_ALL :
449 MEMORY_OBJECT_RETURN_NONE,
450 flags,
451 VM_PROT_NO_CHANGE);
452
453
454 vm_object_paging_end(object);
455 vm_object_unlock(object);
456 return rv;
457 }
458
459
460
461 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
462 MACRO_BEGIN \
463 \
464 int upl_flags; \
465 memory_object_t pager; \
466 \
467 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
468 vm_object_paging_begin(object); \
469 vm_object_unlock(object); \
470 \
471 if (iosync) \
472 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
473 else \
474 upl_flags = UPL_MSYNC; \
475 \
476 (void) memory_object_data_return(pager, \
477 po, \
478 (memory_object_cluster_size_t)data_cnt, \
479 ro, \
480 ioerr, \
481 FALSE, \
482 FALSE, \
483 upl_flags); \
484 \
485 vm_object_lock(object); \
486 vm_object_paging_end(object); \
487 } \
488 MACRO_END
489
490 extern struct vnode *
491 vnode_pager_lookup_vnode(memory_object_t);
492
493 static int
vm_object_update_extent(vm_object_t object,vm_object_offset_t offset,vm_object_offset_t offset_end,vm_object_offset_t * offset_resid,int * io_errno,boolean_t should_flush,memory_object_return_t should_return,boolean_t should_iosync,vm_prot_t prot)494 vm_object_update_extent(
495 vm_object_t object,
496 vm_object_offset_t offset,
497 vm_object_offset_t offset_end,
498 vm_object_offset_t *offset_resid,
499 int *io_errno,
500 boolean_t should_flush,
501 memory_object_return_t should_return,
502 boolean_t should_iosync,
503 vm_prot_t prot)
504 {
505 vm_page_t m;
506 int retval = 0;
507 vm_object_offset_t paging_offset = 0;
508 vm_object_offset_t next_offset = offset;
509 memory_object_lock_result_t page_lock_result;
510 memory_object_cluster_size_t data_cnt = 0;
511 struct vm_page_delayed_work dw_array;
512 struct vm_page_delayed_work *dwp, *dwp_start;
513 bool dwp_finish_ctx = TRUE;
514 int dw_count;
515 int dw_limit;
516 int dirty_count;
517
518 dwp_start = dwp = NULL;
519 dw_count = 0;
520 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
521 dwp_start = vm_page_delayed_work_get_ctx();
522 if (dwp_start == NULL) {
523 dwp_start = &dw_array;
524 dw_limit = 1;
525 dwp_finish_ctx = FALSE;
526 }
527 dwp = dwp_start;
528
529 dirty_count = 0;
530
531 for (;
532 offset < offset_end && object->resident_page_count;
533 offset += PAGE_SIZE_64) {
534 /*
535 * Limit the number of pages to be cleaned at once to a contiguous
536 * run, or at most MAX_UPL_TRANSFER_BYTES
537 */
538 if (data_cnt) {
539 if ((data_cnt >= MAX_UPL_TRANSFER_BYTES) || (next_offset != offset)) {
540 if (dw_count) {
541 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
542 dwp = dwp_start;
543 dw_count = 0;
544 }
545 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
546 paging_offset, offset_resid, io_errno, should_iosync);
547 data_cnt = 0;
548 }
549 }
550 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
551 dwp->dw_mask = 0;
552
553 page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
554
555 if (data_cnt && page_lock_result != MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN) {
556 /*
557 * End of a run of dirty/precious pages.
558 */
559 if (dw_count) {
560 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
561 dwp = dwp_start;
562 dw_count = 0;
563 }
564 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
565 paging_offset, offset_resid, io_errno, should_iosync);
566 /*
567 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
568 * allow the state of page 'm' to change... we need to re-lookup
569 * the current offset
570 */
571 data_cnt = 0;
572 continue;
573 }
574
575 switch (page_lock_result) {
576 case MEMORY_OBJECT_LOCK_RESULT_DONE:
577 break;
578
579 case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
580 if (m->vmp_dirty == TRUE) {
581 dirty_count++;
582 }
583 dwp->dw_mask |= DW_vm_page_free;
584 break;
585
586 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
587 vm_page_sleep(object, m, THREAD_UNINT, LCK_SLEEP_EXCLUSIVE);
588 continue;
589
590 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
591 if (data_cnt == 0) {
592 paging_offset = offset;
593 }
594
595 data_cnt += PAGE_SIZE;
596 next_offset = offset + PAGE_SIZE_64;
597
598 /*
599 * wired pages shouldn't be flushed and
600 * since they aren't on any queue,
601 * no need to remove them
602 */
603 if (!VM_PAGE_WIRED(m)) {
604 if (should_flush) {
605 /*
606 * add additional state for the flush
607 */
608 m->vmp_free_when_done = TRUE;
609 }
610 /*
611 * we use to remove the page from the queues at this
612 * point, but we do not believe that an msync
613 * should cause the 'age' of a page to be changed
614 *
615 * else
616 * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
617 */
618 }
619 retval = 1;
620 break;
621 }
622 if (dwp->dw_mask) {
623 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
624
625 if (dw_count >= dw_limit) {
626 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
627 dwp = dwp_start;
628 dw_count = 0;
629 }
630 }
631 break;
632 }
633 }
634
635 if (object->pager) {
636 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_INVALIDATED, vnode_pager_lookup_vnode(object->pager));
637 }
638 /*
639 * We have completed the scan for applicable pages.
640 * Clean any pages that have been saved.
641 */
642 if (dw_count) {
643 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
644 }
645
646 if (data_cnt) {
647 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
648 paging_offset, offset_resid, io_errno, should_iosync);
649 }
650
651 if (dwp_start && dwp_finish_ctx) {
652 vm_page_delayed_work_finish_ctx(dwp_start);
653 dwp_start = dwp = NULL;
654 }
655
656 return retval;
657 }
658
659
660
661 /*
662 * Routine: vm_object_update
663 * Description:
664 * Work function for m_o_lock_request(), vm_o_sync().
665 *
666 * Called with object locked and paging ref taken.
667 */
668 kern_return_t
vm_object_update(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_object_offset_t * resid_offset,int * io_errno,memory_object_return_t should_return,int flags,vm_prot_t protection)669 vm_object_update(
670 vm_object_t object,
671 vm_object_offset_t offset,
672 vm_object_size_t size,
673 vm_object_offset_t *resid_offset,
674 int *io_errno,
675 memory_object_return_t should_return,
676 int flags,
677 vm_prot_t protection)
678 {
679 vm_object_t copy_object = VM_OBJECT_NULL;
680 boolean_t data_returned = FALSE;
681 boolean_t update_cow;
682 boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
683 boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
684 vm_fault_return_t result;
685 int num_of_extents;
686 int n;
687 #define MAX_EXTENTS 8
688 #define EXTENT_SIZE (1024 * 1024 * 256)
689 #define RESIDENT_LIMIT (1024 * 32)
690 struct extent {
691 vm_object_offset_t e_base;
692 vm_object_offset_t e_min;
693 vm_object_offset_t e_max;
694 } extents[MAX_EXTENTS];
695
696 /*
697 * To avoid blocking while scanning for pages, save
698 * dirty pages to be cleaned all at once.
699 *
700 * XXXO A similar strategy could be used to limit the
701 * number of times that a scan must be restarted for
702 * other reasons. Those pages that would require blocking
703 * could be temporarily collected in another list, or
704 * their offsets could be recorded in a small array.
705 */
706
707 /*
708 * XXX NOTE: May want to consider converting this to a page list
709 * XXX vm_map_copy interface. Need to understand object
710 * XXX coalescing implications before doing so.
711 */
712
713 update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
714 && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
715 !(flags & MEMORY_OBJECT_DATA_PURGE)))
716 || (flags & MEMORY_OBJECT_COPY_SYNC);
717
718 if (update_cow || (flags & (MEMORY_OBJECT_DATA_PURGE | MEMORY_OBJECT_DATA_SYNC))) {
719 int collisions = 0;
720
721 while ((copy_object = object->vo_copy) != VM_OBJECT_NULL) {
722 /*
723 * need to do a try here since we're swimming upstream
724 * against the normal lock ordering... however, we need
725 * to hold the object stable until we gain control of the
726 * copy object so we have to be careful how we approach this
727 */
728 if (vm_object_lock_try(copy_object)) {
729 /*
730 * we 'won' the lock on the copy object...
731 * no need to hold the object lock any longer...
732 * take a real reference on the copy object because
733 * we're going to call vm_fault_page on it which may
734 * under certain conditions drop the lock and the paging
735 * reference we're about to take... the reference
736 * will keep the copy object from going away if that happens
737 */
738 vm_object_unlock(object);
739 vm_object_reference_locked(copy_object);
740 break;
741 }
742 vm_object_unlock(object);
743
744 collisions++;
745 mutex_pause(collisions);
746
747 vm_object_lock(object);
748 }
749 }
750 if ((copy_object != VM_OBJECT_NULL && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) {
751 vm_object_offset_t i;
752 vm_object_size_t copy_size;
753 vm_object_offset_t copy_offset;
754 vm_prot_t prot;
755 vm_page_t page;
756 vm_page_t top_page;
757 kern_return_t error = 0;
758 struct vm_object_fault_info fault_info = {};
759
760 if (copy_object != VM_OBJECT_NULL) {
761 /*
762 * translate offset with respect to shadow's offset
763 */
764 copy_offset = (offset >= copy_object->vo_shadow_offset) ?
765 (offset - copy_object->vo_shadow_offset) : 0;
766
767 if (copy_offset > copy_object->vo_size) {
768 copy_offset = copy_object->vo_size;
769 }
770
771 /*
772 * clip size with respect to shadow offset
773 */
774 if (offset >= copy_object->vo_shadow_offset) {
775 copy_size = size;
776 } else if (size >= copy_object->vo_shadow_offset - offset) {
777 copy_size = (size - (copy_object->vo_shadow_offset - offset));
778 } else {
779 copy_size = 0;
780 }
781
782 if (copy_offset + copy_size > copy_object->vo_size) {
783 if (copy_object->vo_size >= copy_offset) {
784 copy_size = copy_object->vo_size - copy_offset;
785 } else {
786 copy_size = 0;
787 }
788 }
789 copy_size += copy_offset;
790 } else {
791 copy_object = object;
792
793 copy_size = offset + size;
794 copy_offset = offset;
795 }
796 fault_info.interruptible = THREAD_UNINT;
797 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
798 fault_info.lo_offset = copy_offset;
799 fault_info.hi_offset = copy_size;
800 fault_info.stealth = TRUE;
801 assert(fault_info.cs_bypass == FALSE);
802 assert(fault_info.csm_associated == FALSE);
803
804 vm_object_paging_begin(copy_object);
805
806 for (i = copy_offset; i < copy_size; i += PAGE_SIZE) {
807 RETRY_COW_OF_LOCK_REQUEST:
808 fault_info.cluster_size = (vm_size_t) (copy_size - i);
809 assert(fault_info.cluster_size == copy_size - i);
810
811 prot = VM_PROT_WRITE | VM_PROT_READ;
812 page = VM_PAGE_NULL;
813 result = vm_fault_page(copy_object, i,
814 VM_PROT_WRITE | VM_PROT_READ,
815 FALSE,
816 FALSE, /* page not looked up */
817 &prot,
818 &page,
819 &top_page,
820 (int *)0,
821 &error,
822 FALSE,
823 &fault_info);
824
825 switch (result) {
826 case VM_FAULT_SUCCESS:
827 if (top_page) {
828 vm_fault_cleanup(
829 VM_PAGE_OBJECT(page), top_page);
830 vm_object_lock(copy_object);
831 vm_object_paging_begin(copy_object);
832 }
833 if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
834 vm_page_lockspin_queues();
835
836 if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
837 vm_page_deactivate(page);
838 }
839 vm_page_unlock_queues();
840 }
841 vm_page_wakeup_done(copy_object, page);
842 break;
843 case VM_FAULT_RETRY:
844 prot = VM_PROT_WRITE | VM_PROT_READ;
845 vm_object_lock(copy_object);
846 vm_object_paging_begin(copy_object);
847 goto RETRY_COW_OF_LOCK_REQUEST;
848 case VM_FAULT_INTERRUPTED:
849 prot = VM_PROT_WRITE | VM_PROT_READ;
850 vm_object_lock(copy_object);
851 vm_object_paging_begin(copy_object);
852 goto RETRY_COW_OF_LOCK_REQUEST;
853 case VM_FAULT_MEMORY_SHORTAGE:
854 VM_PAGE_WAIT();
855 prot = VM_PROT_WRITE | VM_PROT_READ;
856 vm_object_lock(copy_object);
857 vm_object_paging_begin(copy_object);
858 goto RETRY_COW_OF_LOCK_REQUEST;
859 case VM_FAULT_SUCCESS_NO_VM_PAGE:
860 /* success but no VM page: fail */
861 vm_object_paging_end(copy_object);
862 vm_object_unlock(copy_object);
863 OS_FALLTHROUGH;
864 case VM_FAULT_MEMORY_ERROR:
865 if (object != copy_object) {
866 vm_object_deallocate(copy_object);
867 }
868 vm_object_lock(object);
869 goto BYPASS_COW_COPYIN;
870 default:
871 panic("vm_object_update: unexpected error 0x%x"
872 " from vm_fault_page()\n", result);
873 }
874 }
875 vm_object_paging_end(copy_object);
876 }
877 if ((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
878 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
879 vm_object_unlock(copy_object);
880 vm_object_deallocate(copy_object);
881 vm_object_lock(object);
882 }
883 return KERN_SUCCESS;
884 }
885 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
886 if ((flags & MEMORY_OBJECT_DATA_PURGE)) {
887 vm_object_lock_assert_exclusive(copy_object);
888 VM_OBJECT_SET_SHADOW_SEVERED(copy_object, TRUE);
889 VM_OBJECT_SET_SHADOWED(copy_object, FALSE);
890 copy_object->shadow = NULL;
891 /*
892 * delete the ref the COW was holding on the target object
893 */
894 vm_object_deallocate(object);
895 }
896 vm_object_unlock(copy_object);
897 vm_object_deallocate(copy_object);
898 vm_object_lock(object);
899 }
900 BYPASS_COW_COPYIN:
901
902 /*
903 * when we have a really large range to check relative
904 * to the number of actual resident pages, we'd like
905 * to use the resident page list to drive our checks
906 * however, the object lock will get dropped while processing
907 * the page which means the resident queue can change which
908 * means we can't walk the queue as we process the pages
909 * we also want to do the processing in offset order to allow
910 * 'runs' of pages to be collected if we're being told to
911 * flush to disk... the resident page queue is NOT ordered.
912 *
913 * a temporary solution (until we figure out how to deal with
914 * large address spaces more generically) is to pre-flight
915 * the resident page queue (if it's small enough) and develop
916 * a collection of extents (that encompass actual resident pages)
917 * to visit. This will at least allow us to deal with some of the
918 * more pathological cases in a more efficient manner. The current
919 * worst case (a single resident page at the end of an extremely large
920 * range) can take minutes to complete for ranges in the terrabyte
921 * category... since this routine is called when truncating a file,
922 * and we currently support files up to 16 Tbytes in size, this
923 * is not a theoretical problem
924 */
925
926 if ((object->resident_page_count < RESIDENT_LIMIT) &&
927 (atop_64(size) > (unsigned)(object->resident_page_count / (8 * MAX_EXTENTS)))) {
928 vm_page_t next;
929 vm_object_offset_t start;
930 vm_object_offset_t end;
931 vm_object_size_t e_mask;
932 vm_page_t m;
933
934 start = offset;
935 end = offset + size;
936 num_of_extents = 0;
937 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
938
939 m = (vm_page_t) vm_page_queue_first(&object->memq);
940
941 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t) m)) {
942 next = (vm_page_t) vm_page_queue_next(&m->vmp_listq);
943
944 if ((m->vmp_offset >= start) && (m->vmp_offset < end)) {
945 /*
946 * this is a page we're interested in
947 * try to fit it into a current extent
948 */
949 for (n = 0; n < num_of_extents; n++) {
950 if ((m->vmp_offset & e_mask) == extents[n].e_base) {
951 /*
952 * use (PAGE_SIZE - 1) to determine the
953 * max offset so that we don't wrap if
954 * we're at the last page of the space
955 */
956 if (m->vmp_offset < extents[n].e_min) {
957 extents[n].e_min = m->vmp_offset;
958 } else if ((m->vmp_offset + (PAGE_SIZE - 1)) > extents[n].e_max) {
959 extents[n].e_max = m->vmp_offset + (PAGE_SIZE - 1);
960 }
961 break;
962 }
963 }
964 if (n == num_of_extents) {
965 /*
966 * didn't find a current extent that can encompass
967 * this page
968 */
969 if (n < MAX_EXTENTS) {
970 /*
971 * if we still have room,
972 * create a new extent
973 */
974 extents[n].e_base = m->vmp_offset & e_mask;
975 extents[n].e_min = m->vmp_offset;
976 extents[n].e_max = m->vmp_offset + (PAGE_SIZE - 1);
977
978 num_of_extents++;
979 } else {
980 /*
981 * no room to create a new extent...
982 * fall back to a single extent based
983 * on the min and max page offsets
984 * we find in the range we're interested in...
985 * first, look through the extent list and
986 * develop the overall min and max for the
987 * pages we've looked at up to this point
988 */
989 for (n = 1; n < num_of_extents; n++) {
990 if (extents[n].e_min < extents[0].e_min) {
991 extents[0].e_min = extents[n].e_min;
992 }
993 if (extents[n].e_max > extents[0].e_max) {
994 extents[0].e_max = extents[n].e_max;
995 }
996 }
997 /*
998 * now setup to run through the remaining pages
999 * to determine the overall min and max
1000 * offset for the specified range
1001 */
1002 extents[0].e_base = 0;
1003 e_mask = 0;
1004 num_of_extents = 1;
1005
1006 /*
1007 * by continuing, we'll reprocess the
1008 * page that forced us to abandon trying
1009 * to develop multiple extents
1010 */
1011 continue;
1012 }
1013 }
1014 }
1015 m = next;
1016 }
1017 } else {
1018 extents[0].e_min = offset;
1019 extents[0].e_max = offset + (size - 1);
1020
1021 num_of_extents = 1;
1022 }
1023 for (n = 0; n < num_of_extents; n++) {
1024 if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
1025 should_flush, should_return, should_iosync, protection)) {
1026 data_returned = TRUE;
1027 }
1028 }
1029 return data_returned;
1030 }
1031
1032
1033 static kern_return_t
vm_object_set_attributes_common(vm_object_t object,boolean_t may_cache,memory_object_copy_strategy_t copy_strategy)1034 vm_object_set_attributes_common(
1035 vm_object_t object,
1036 boolean_t may_cache,
1037 memory_object_copy_strategy_t copy_strategy)
1038 {
1039 boolean_t object_became_ready;
1040
1041 if (object == VM_OBJECT_NULL) {
1042 return KERN_INVALID_ARGUMENT;
1043 }
1044
1045 /*
1046 * Verify the attributes of importance
1047 */
1048
1049 switch (copy_strategy) {
1050 case MEMORY_OBJECT_COPY_NONE:
1051 case MEMORY_OBJECT_COPY_DELAY:
1052 case MEMORY_OBJECT_COPY_DELAY_FORK:
1053 break;
1054 default:
1055 return KERN_INVALID_ARGUMENT;
1056 }
1057
1058 if (may_cache) {
1059 may_cache = TRUE;
1060 }
1061
1062 vm_object_lock(object);
1063
1064 /*
1065 * Copy the attributes
1066 */
1067 assert(!object->internal);
1068 object_became_ready = !object->pager_ready;
1069 object->copy_strategy = copy_strategy;
1070 VM_OBJECT_SET_CAN_PERSIST(object, may_cache);
1071
1072 /*
1073 * Wake up anyone waiting for the ready attribute
1074 * to become asserted.
1075 */
1076
1077 if (object_became_ready) {
1078 VM_OBJECT_SET_PAGER_READY(object, TRUE);
1079 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1080 }
1081
1082 vm_object_unlock(object);
1083
1084 return KERN_SUCCESS;
1085 }
1086
1087
1088 /*
1089 * Set the memory object attribute as provided.
1090 *
1091 * XXX This routine cannot be completed until the vm_msync, clean
1092 * in place, and cluster work is completed. See ifdef notyet
1093 * below and note that vm_object_set_attributes_common()
1094 * may have to be expanded.
1095 */
1096 kern_return_t
memory_object_change_attributes(memory_object_control_t control,memory_object_flavor_t flavor,memory_object_info_t attributes,mach_msg_type_number_t count)1097 memory_object_change_attributes(
1098 memory_object_control_t control,
1099 memory_object_flavor_t flavor,
1100 memory_object_info_t attributes,
1101 mach_msg_type_number_t count)
1102 {
1103 vm_object_t object;
1104 kern_return_t result = KERN_SUCCESS;
1105 boolean_t may_cache;
1106 boolean_t invalidate;
1107 memory_object_copy_strategy_t copy_strategy;
1108
1109 object = memory_object_control_to_vm_object(control);
1110 if (object == VM_OBJECT_NULL) {
1111 return KERN_INVALID_ARGUMENT;
1112 }
1113
1114 vm_object_lock(object);
1115
1116 may_cache = object->can_persist;
1117 copy_strategy = object->copy_strategy;
1118 #if notyet
1119 invalidate = object->invalidate;
1120 #endif
1121 vm_object_unlock(object);
1122
1123 switch (flavor) {
1124 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1125 {
1126 old_memory_object_behave_info_t behave;
1127
1128 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1129 result = KERN_INVALID_ARGUMENT;
1130 break;
1131 }
1132
1133 behave = (old_memory_object_behave_info_t) attributes;
1134
1135 invalidate = behave->invalidate;
1136 copy_strategy = behave->copy_strategy;
1137
1138 break;
1139 }
1140
1141 case MEMORY_OBJECT_BEHAVIOR_INFO:
1142 {
1143 memory_object_behave_info_t behave;
1144
1145 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1146 result = KERN_INVALID_ARGUMENT;
1147 break;
1148 }
1149
1150 behave = (memory_object_behave_info_t) attributes;
1151
1152 invalidate = behave->invalidate;
1153 copy_strategy = behave->copy_strategy;
1154 break;
1155 }
1156
1157 case MEMORY_OBJECT_PERFORMANCE_INFO:
1158 {
1159 memory_object_perf_info_t perf;
1160
1161 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1162 result = KERN_INVALID_ARGUMENT;
1163 break;
1164 }
1165
1166 perf = (memory_object_perf_info_t) attributes;
1167
1168 may_cache = perf->may_cache;
1169
1170 break;
1171 }
1172
1173 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1174 {
1175 old_memory_object_attr_info_t attr;
1176
1177 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1178 result = KERN_INVALID_ARGUMENT;
1179 break;
1180 }
1181
1182 attr = (old_memory_object_attr_info_t) attributes;
1183
1184 may_cache = attr->may_cache;
1185 copy_strategy = attr->copy_strategy;
1186
1187 break;
1188 }
1189
1190 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1191 {
1192 memory_object_attr_info_t attr;
1193
1194 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1195 result = KERN_INVALID_ARGUMENT;
1196 break;
1197 }
1198
1199 attr = (memory_object_attr_info_t) attributes;
1200
1201 copy_strategy = attr->copy_strategy;
1202 may_cache = attr->may_cache_object;
1203
1204 break;
1205 }
1206
1207 default:
1208 result = KERN_INVALID_ARGUMENT;
1209 break;
1210 }
1211
1212 if (result != KERN_SUCCESS) {
1213 return result;
1214 }
1215
1216 if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1217 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1218 }
1219
1220 /*
1221 * XXX may_cache may become a tri-valued variable to handle
1222 * XXX uncache if not in use.
1223 */
1224 return vm_object_set_attributes_common(object,
1225 may_cache,
1226 copy_strategy);
1227 }
1228
1229 kern_return_t
memory_object_iopl_request(ipc_port_t port,memory_object_offset_t offset,upl_size_t * upl_size,upl_t * upl_ptr,upl_page_info_array_t user_page_list,unsigned int * page_list_count,upl_control_flags_t * flags,vm_tag_t tag)1230 memory_object_iopl_request(
1231 ipc_port_t port,
1232 memory_object_offset_t offset,
1233 upl_size_t *upl_size,
1234 upl_t *upl_ptr,
1235 upl_page_info_array_t user_page_list,
1236 unsigned int *page_list_count,
1237 upl_control_flags_t *flags,
1238 vm_tag_t tag)
1239 {
1240 vm_object_t object;
1241 kern_return_t ret;
1242 upl_control_flags_t caller_flags;
1243 vm_named_entry_t named_entry;
1244
1245 caller_flags = *flags;
1246
1247 if (caller_flags & ~UPL_VALID_FLAGS) {
1248 /*
1249 * For forward compatibility's sake,
1250 * reject any unknown flag.
1251 */
1252 return KERN_INVALID_VALUE;
1253 }
1254
1255 named_entry = mach_memory_entry_from_port(port);
1256 if (named_entry != NULL) {
1257 /* a few checks to make sure user is obeying rules */
1258 if (*upl_size == 0) {
1259 if (offset >= named_entry->size) {
1260 return KERN_INVALID_RIGHT;
1261 }
1262 *upl_size = (upl_size_t)(named_entry->size - offset);
1263 if (*upl_size != named_entry->size - offset) {
1264 return KERN_INVALID_ARGUMENT;
1265 }
1266 }
1267 if (caller_flags & UPL_COPYOUT_FROM) {
1268 if ((named_entry->protection & VM_PROT_READ)
1269 != VM_PROT_READ) {
1270 return KERN_INVALID_RIGHT;
1271 }
1272 } else {
1273 if ((named_entry->protection &
1274 (VM_PROT_READ | VM_PROT_WRITE))
1275 != (VM_PROT_READ | VM_PROT_WRITE)) {
1276 return KERN_INVALID_RIGHT;
1277 }
1278 }
1279 if (named_entry->size < (offset + *upl_size)) {
1280 return KERN_INVALID_ARGUMENT;
1281 }
1282
1283 /* the callers parameter offset is defined to be the */
1284 /* offset from beginning of named entry offset in object */
1285 offset = offset + named_entry->offset;
1286 offset += named_entry->data_offset;
1287
1288 if (named_entry->is_sub_map ||
1289 named_entry->is_copy) {
1290 return KERN_INVALID_ARGUMENT;
1291 }
1292 if (!named_entry->is_object) {
1293 return KERN_INVALID_ARGUMENT;
1294 }
1295
1296 named_entry_lock(named_entry);
1297
1298 object = vm_named_entry_to_vm_object(named_entry);
1299 assert(object != VM_OBJECT_NULL);
1300 vm_object_reference(object);
1301 named_entry_unlock(named_entry);
1302 } else {
1303 return KERN_INVALID_ARGUMENT;
1304 }
1305 if (object == VM_OBJECT_NULL) {
1306 return KERN_INVALID_ARGUMENT;
1307 }
1308
1309 if (!object->private) {
1310 if (object->phys_contiguous) {
1311 *flags = UPL_PHYS_CONTIG;
1312 } else {
1313 *flags = 0;
1314 }
1315 } else {
1316 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1317 }
1318
1319 ret = vm_object_iopl_request(object,
1320 offset,
1321 *upl_size,
1322 upl_ptr,
1323 user_page_list,
1324 page_list_count,
1325 caller_flags,
1326 tag);
1327 vm_object_deallocate(object);
1328 return ret;
1329 }
1330
1331 /*
1332 * Routine: memory_object_upl_request [interface]
1333 * Purpose:
1334 * Cause the population of a portion of a vm_object.
1335 * Depending on the nature of the request, the pages
1336 * returned may be contain valid data or be uninitialized.
1337 *
1338 */
1339
1340 kern_return_t
memory_object_upl_request(memory_object_control_t control,memory_object_offset_t offset,upl_size_t size,upl_t * upl_ptr,upl_page_info_array_t user_page_list,unsigned int * page_list_count,int cntrl_flags,int tag)1341 memory_object_upl_request(
1342 memory_object_control_t control,
1343 memory_object_offset_t offset,
1344 upl_size_t size,
1345 upl_t *upl_ptr,
1346 upl_page_info_array_t user_page_list,
1347 unsigned int *page_list_count,
1348 int cntrl_flags,
1349 int tag)
1350 {
1351 vm_object_t object;
1352 vm_tag_t vmtag = (vm_tag_t)tag;
1353 assert(vmtag == tag);
1354
1355 object = memory_object_control_to_vm_object(control);
1356 if (object == VM_OBJECT_NULL) {
1357 return KERN_TERMINATED;
1358 }
1359
1360 return vm_object_upl_request(object,
1361 offset,
1362 size,
1363 upl_ptr,
1364 user_page_list,
1365 page_list_count,
1366 (upl_control_flags_t)(unsigned int) cntrl_flags,
1367 vmtag);
1368 }
1369
1370
1371 kern_return_t
memory_object_cluster_size(memory_object_control_t control,memory_object_offset_t * start,vm_size_t * length,uint32_t * io_streaming,memory_object_fault_info_t mo_fault_info)1372 memory_object_cluster_size(
1373 memory_object_control_t control,
1374 memory_object_offset_t *start,
1375 vm_size_t *length,
1376 uint32_t *io_streaming,
1377 memory_object_fault_info_t mo_fault_info)
1378 {
1379 vm_object_t object;
1380 vm_object_fault_info_t fault_info;
1381
1382 object = memory_object_control_to_vm_object(control);
1383
1384 if (object == VM_OBJECT_NULL || object->paging_offset > *start) {
1385 return KERN_INVALID_ARGUMENT;
1386 }
1387
1388 *start -= object->paging_offset;
1389
1390 fault_info = (vm_object_fault_info_t)(uintptr_t) mo_fault_info;
1391 vm_object_cluster_size(object,
1392 (vm_object_offset_t *)start,
1393 length,
1394 fault_info,
1395 io_streaming);
1396
1397 *start += object->paging_offset;
1398
1399 return KERN_SUCCESS;
1400 }
1401
1402
1403 /*
1404 * Routine: host_default_memory_manager [interface]
1405 * Purpose:
1406 * set/get the default memory manager port and default cluster
1407 * size.
1408 *
1409 * If successful, consumes the supplied naked send right.
1410 */
1411 kern_return_t
host_default_memory_manager(host_priv_t host_priv,memory_object_default_t * default_manager,__unused memory_object_cluster_size_t cluster_size)1412 host_default_memory_manager(
1413 host_priv_t host_priv,
1414 memory_object_default_t *default_manager,
1415 __unused memory_object_cluster_size_t cluster_size)
1416 {
1417 memory_object_default_t current_manager;
1418 memory_object_default_t new_manager;
1419 memory_object_default_t returned_manager;
1420 kern_return_t result = KERN_SUCCESS;
1421
1422 if (host_priv == HOST_PRIV_NULL) {
1423 return KERN_INVALID_HOST;
1424 }
1425
1426 new_manager = *default_manager;
1427 lck_mtx_lock(&memory_manager_default_lock);
1428 current_manager = memory_manager_default;
1429 returned_manager = MEMORY_OBJECT_DEFAULT_NULL;
1430
1431 if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1432 /*
1433 * Retrieve the current value.
1434 */
1435 returned_manager = ipc_port_make_send_mqueue(current_manager);
1436 } else {
1437 /*
1438 * Only allow the kernel to change the value.
1439 */
1440 extern task_t kernel_task;
1441 if (current_task() != kernel_task) {
1442 result = KERN_NO_ACCESS;
1443 goto out;
1444 }
1445
1446 /*
1447 * If this is the first non-null manager, start
1448 * up the internal pager support.
1449 */
1450 if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1451 result = vm_pageout_internal_start();
1452 if (result != KERN_SUCCESS) {
1453 goto out;
1454 }
1455 }
1456
1457 /*
1458 * Retrieve the current value,
1459 * and replace it with the supplied value.
1460 * We return the old reference to the caller
1461 * but we have to take a reference on the new
1462 * one.
1463 */
1464 returned_manager = current_manager;
1465 memory_manager_default = ipc_port_make_send_mqueue(new_manager);
1466
1467 /*
1468 * In case anyone's been waiting for a memory
1469 * manager to be established, wake them up.
1470 */
1471
1472 thread_wakeup((event_t) &memory_manager_default);
1473
1474 /*
1475 * Now that we have a default pager for anonymous memory,
1476 * reactivate all the throttled pages (i.e. dirty pages with
1477 * no pager).
1478 */
1479 if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1480 vm_page_reactivate_all_throttled();
1481 }
1482 }
1483 out:
1484 lck_mtx_unlock(&memory_manager_default_lock);
1485
1486 *default_manager = returned_manager;
1487 return result;
1488 }
1489
1490 /*
1491 * Routine: memory_manager_default_reference
1492 * Purpose:
1493 * Returns a naked send right for the default
1494 * memory manager. The returned right is always
1495 * valid (not IP_NULL or IP_DEAD).
1496 */
1497
1498 __private_extern__ memory_object_default_t
memory_manager_default_reference(void)1499 memory_manager_default_reference(void)
1500 {
1501 memory_object_default_t current_manager;
1502
1503 lck_mtx_lock(&memory_manager_default_lock);
1504 current_manager = memory_manager_default;
1505 while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1506 wait_result_t res;
1507
1508 res = lck_mtx_sleep(&memory_manager_default_lock,
1509 LCK_SLEEP_DEFAULT,
1510 (event_t) &memory_manager_default,
1511 THREAD_UNINT);
1512 assert(res == THREAD_AWAKENED);
1513 current_manager = memory_manager_default;
1514 }
1515 current_manager = ipc_port_make_send_mqueue(current_manager);
1516 lck_mtx_unlock(&memory_manager_default_lock);
1517
1518 return current_manager;
1519 }
1520
1521 /*
1522 * Routine: memory_manager_default_check
1523 *
1524 * Purpose:
1525 * Check whether a default memory manager has been set
1526 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1527 * and KERN_FAILURE if dmm does not exist.
1528 *
1529 * If there is no default memory manager, log an error,
1530 * but only the first time.
1531 *
1532 */
1533 __private_extern__ kern_return_t
memory_manager_default_check(void)1534 memory_manager_default_check(void)
1535 {
1536 memory_object_default_t current;
1537
1538 lck_mtx_lock(&memory_manager_default_lock);
1539 current = memory_manager_default;
1540 if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1541 static boolean_t logged; /* initialized to 0 */
1542 boolean_t complain = !logged;
1543 logged = TRUE;
1544 lck_mtx_unlock(&memory_manager_default_lock);
1545 if (complain) {
1546 printf("Warning: No default memory manager\n");
1547 }
1548 return KERN_FAILURE;
1549 } else {
1550 lck_mtx_unlock(&memory_manager_default_lock);
1551 return KERN_SUCCESS;
1552 }
1553 }
1554
1555 /* Allow manipulation of individual page state. This is actually part of */
1556 /* the UPL regimen but takes place on the object rather than on a UPL */
1557
1558 kern_return_t
memory_object_page_op(memory_object_control_t control,memory_object_offset_t offset,int ops,ppnum_t * phys_entry,int * flags)1559 memory_object_page_op(
1560 memory_object_control_t control,
1561 memory_object_offset_t offset,
1562 int ops,
1563 ppnum_t *phys_entry,
1564 int *flags)
1565 {
1566 vm_object_t object;
1567
1568 object = memory_object_control_to_vm_object(control);
1569 if (object == VM_OBJECT_NULL) {
1570 return KERN_INVALID_ARGUMENT;
1571 }
1572
1573 return vm_object_page_op(object, offset, ops, phys_entry, flags);
1574 }
1575
1576 /*
1577 * memory_object_range_op offers performance enhancement over
1578 * memory_object_page_op for page_op functions which do not require page
1579 * level state to be returned from the call. Page_op was created to provide
1580 * a low-cost alternative to page manipulation via UPLs when only a single
1581 * page was involved. The range_op call establishes the ability in the _op
1582 * family of functions to work on multiple pages where the lack of page level
1583 * state handling allows the caller to avoid the overhead of the upl structures.
1584 */
1585
1586 kern_return_t
memory_object_range_op(memory_object_control_t control,memory_object_offset_t offset_beg,memory_object_offset_t offset_end,int ops,int * range)1587 memory_object_range_op(
1588 memory_object_control_t control,
1589 memory_object_offset_t offset_beg,
1590 memory_object_offset_t offset_end,
1591 int ops,
1592 int *range)
1593 {
1594 vm_object_t object;
1595
1596 object = memory_object_control_to_vm_object(control);
1597 if (object == VM_OBJECT_NULL) {
1598 return KERN_INVALID_ARGUMENT;
1599 }
1600
1601 if (offset_end - offset_beg > (uint32_t) -1) {
1602 /* range is too big and would overflow "*range" */
1603 return KERN_INVALID_ARGUMENT;
1604 }
1605
1606 return vm_object_range_op(object,
1607 offset_beg,
1608 offset_end,
1609 ops,
1610 (uint32_t *) range);
1611 }
1612
1613
1614 void
memory_object_mark_used(memory_object_control_t control)1615 memory_object_mark_used(
1616 memory_object_control_t control)
1617 {
1618 vm_object_t object;
1619
1620 if (control == NULL) {
1621 return;
1622 }
1623
1624 object = memory_object_control_to_vm_object(control);
1625
1626 if (object != VM_OBJECT_NULL) {
1627 vm_object_cache_remove(object);
1628 }
1629 }
1630
1631
1632 void
memory_object_mark_unused(memory_object_control_t control,__unused boolean_t rage)1633 memory_object_mark_unused(
1634 memory_object_control_t control,
1635 __unused boolean_t rage)
1636 {
1637 vm_object_t object;
1638
1639 if (control == NULL) {
1640 return;
1641 }
1642
1643 object = memory_object_control_to_vm_object(control);
1644
1645 if (object != VM_OBJECT_NULL) {
1646 vm_object_cache_add(object);
1647 }
1648 }
1649
1650 void
memory_object_mark_io_tracking(memory_object_control_t control)1651 memory_object_mark_io_tracking(
1652 memory_object_control_t control)
1653 {
1654 vm_object_t object;
1655
1656 if (control == NULL) {
1657 return;
1658 }
1659 object = memory_object_control_to_vm_object(control);
1660
1661 if (object != VM_OBJECT_NULL) {
1662 vm_object_lock(object);
1663 object->io_tracking = TRUE;
1664 vm_object_unlock(object);
1665 }
1666 }
1667
1668 void
memory_object_mark_trusted(memory_object_control_t control)1669 memory_object_mark_trusted(
1670 memory_object_control_t control)
1671 {
1672 vm_object_t object;
1673
1674 if (control == NULL) {
1675 return;
1676 }
1677 object = memory_object_control_to_vm_object(control);
1678
1679 if (object != VM_OBJECT_NULL) {
1680 vm_object_lock(object);
1681 VM_OBJECT_SET_PAGER_TRUSTED(object, TRUE);
1682 vm_object_unlock(object);
1683 }
1684 }
1685
1686 #if FBDP_DEBUG_OBJECT_NO_PAGER
1687 kern_return_t
memory_object_mark_as_tracked(memory_object_control_t control,bool new_value,bool * old_value)1688 memory_object_mark_as_tracked(
1689 memory_object_control_t control,
1690 bool new_value,
1691 bool *old_value)
1692 {
1693 vm_object_t object;
1694
1695 if (control == NULL) {
1696 return KERN_INVALID_ARGUMENT;
1697 }
1698 object = memory_object_control_to_vm_object(control);
1699
1700 if (object == VM_OBJECT_NULL) {
1701 return KERN_FAILURE;
1702 }
1703
1704 vm_object_lock(object);
1705 *old_value = object->fbdp_tracked;
1706 VM_OBJECT_SET_FBDP_TRACKED(object, new_value);
1707 vm_object_unlock(object);
1708
1709 return KERN_SUCCESS;
1710 }
1711 #endif /* FBDP_DEBUG_OBJECT_NO_PAGER */
1712
1713 #if CONFIG_SECLUDED_MEMORY
1714 void
memory_object_mark_eligible_for_secluded(memory_object_control_t control,boolean_t eligible_for_secluded)1715 memory_object_mark_eligible_for_secluded(
1716 memory_object_control_t control,
1717 boolean_t eligible_for_secluded)
1718 {
1719 vm_object_t object;
1720
1721 if (control == NULL) {
1722 return;
1723 }
1724 object = memory_object_control_to_vm_object(control);
1725
1726 if (object == VM_OBJECT_NULL) {
1727 return;
1728 }
1729
1730 vm_object_lock(object);
1731 if (eligible_for_secluded &&
1732 secluded_for_filecache && /* global boot-arg */
1733 !object->eligible_for_secluded) {
1734 object->eligible_for_secluded = TRUE;
1735 vm_page_secluded.eligible_for_secluded += object->resident_page_count;
1736 } else if (!eligible_for_secluded &&
1737 object->eligible_for_secluded) {
1738 object->eligible_for_secluded = FALSE;
1739 vm_page_secluded.eligible_for_secluded -= object->resident_page_count;
1740 if (object->resident_page_count) {
1741 /* XXX FBDP TODO: flush pages from secluded queue? */
1742 // printf("FBDP TODO: flush %d pages from %p from secluded queue\n", object->resident_page_count, object);
1743 }
1744 }
1745 vm_object_unlock(object);
1746 }
1747 #endif /* CONFIG_SECLUDED_MEMORY */
1748
1749 void
memory_object_mark_for_realtime(memory_object_control_t control,bool for_realtime)1750 memory_object_mark_for_realtime(
1751 memory_object_control_t control,
1752 bool for_realtime)
1753 {
1754 vm_object_t object;
1755
1756 if (control == NULL) {
1757 return;
1758 }
1759 object = memory_object_control_to_vm_object(control);
1760
1761 if (object == VM_OBJECT_NULL) {
1762 return;
1763 }
1764
1765 vm_object_lock(object);
1766 VM_OBJECT_SET_FOR_REALTIME(object, for_realtime);
1767 vm_object_unlock(object);
1768 }
1769
1770 kern_return_t
memory_object_pages_resident(memory_object_control_t control,boolean_t * has_pages_resident)1771 memory_object_pages_resident(
1772 memory_object_control_t control,
1773 boolean_t * has_pages_resident)
1774 {
1775 vm_object_t object;
1776
1777 *has_pages_resident = FALSE;
1778
1779 object = memory_object_control_to_vm_object(control);
1780 if (object == VM_OBJECT_NULL) {
1781 return KERN_INVALID_ARGUMENT;
1782 }
1783
1784 if (object->resident_page_count) {
1785 *has_pages_resident = TRUE;
1786 }
1787
1788 return KERN_SUCCESS;
1789 }
1790
1791 kern_return_t
memory_object_signed(memory_object_control_t control,boolean_t is_signed)1792 memory_object_signed(
1793 memory_object_control_t control,
1794 boolean_t is_signed)
1795 {
1796 vm_object_t object;
1797
1798 object = memory_object_control_to_vm_object(control);
1799 if (object == VM_OBJECT_NULL) {
1800 return KERN_INVALID_ARGUMENT;
1801 }
1802
1803 vm_object_lock(object);
1804 object->code_signed = is_signed;
1805 vm_object_unlock(object);
1806
1807 return KERN_SUCCESS;
1808 }
1809
1810 boolean_t
memory_object_is_signed(memory_object_control_t control)1811 memory_object_is_signed(
1812 memory_object_control_t control)
1813 {
1814 boolean_t is_signed;
1815 vm_object_t object;
1816
1817 object = memory_object_control_to_vm_object(control);
1818 if (object == VM_OBJECT_NULL) {
1819 return FALSE;
1820 }
1821
1822 vm_object_lock_shared(object);
1823 is_signed = object->code_signed;
1824 vm_object_unlock(object);
1825
1826 return is_signed;
1827 }
1828
1829 boolean_t
memory_object_is_shared_cache(memory_object_control_t control)1830 memory_object_is_shared_cache(
1831 memory_object_control_t control)
1832 {
1833 vm_object_t object = VM_OBJECT_NULL;
1834
1835 object = memory_object_control_to_vm_object(control);
1836 if (object == VM_OBJECT_NULL) {
1837 return FALSE;
1838 }
1839
1840 return object->object_is_shared_cache;
1841 }
1842
1843 __private_extern__ memory_object_control_t
memory_object_control_allocate(vm_object_t object)1844 memory_object_control_allocate(
1845 vm_object_t object)
1846 {
1847 return object;
1848 }
1849
1850 __private_extern__ void
memory_object_control_collapse(memory_object_control_t * control,vm_object_t object)1851 memory_object_control_collapse(
1852 memory_object_control_t *control,
1853 vm_object_t object)
1854 {
1855 *control = object;
1856 }
1857
1858 __private_extern__ vm_object_t
memory_object_control_to_vm_object(memory_object_control_t control)1859 memory_object_control_to_vm_object(
1860 memory_object_control_t control)
1861 {
1862 return control;
1863 }
1864
1865 __private_extern__ vm_object_t
memory_object_to_vm_object(memory_object_t mem_obj)1866 memory_object_to_vm_object(
1867 memory_object_t mem_obj)
1868 {
1869 memory_object_control_t mo_control;
1870
1871 if (mem_obj == MEMORY_OBJECT_NULL) {
1872 return VM_OBJECT_NULL;
1873 }
1874 mo_control = mem_obj->mo_control;
1875 if (mo_control == NULL) {
1876 return VM_OBJECT_NULL;
1877 }
1878 return memory_object_control_to_vm_object(mo_control);
1879 }
1880
1881 void
memory_object_control_reference(__unused memory_object_control_t control)1882 memory_object_control_reference(
1883 __unused memory_object_control_t control)
1884 {
1885 return;
1886 }
1887
1888 /*
1889 * We only every issue one of these references, so kill it
1890 * when that gets released (should switch the real reference
1891 * counting in true port-less EMMI).
1892 */
1893 void
memory_object_control_deallocate(__unused memory_object_control_t control)1894 memory_object_control_deallocate(
1895 __unused memory_object_control_t control)
1896 {
1897 }
1898
1899 void
memory_object_control_disable(memory_object_control_t * control)1900 memory_object_control_disable(
1901 memory_object_control_t *control)
1902 {
1903 assert(*control != VM_OBJECT_NULL);
1904 *control = VM_OBJECT_NULL;
1905 }
1906
1907 memory_object_t
convert_port_to_memory_object(__unused mach_port_t port)1908 convert_port_to_memory_object(
1909 __unused mach_port_t port)
1910 {
1911 return MEMORY_OBJECT_NULL;
1912 }
1913
1914
1915 mach_port_t
convert_memory_object_to_port(__unused memory_object_t object)1916 convert_memory_object_to_port(
1917 __unused memory_object_t object)
1918 {
1919 return MACH_PORT_NULL;
1920 }
1921
1922
1923 /* Routine memory_object_reference */
1924 void
memory_object_reference(memory_object_t memory_object)1925 memory_object_reference(
1926 memory_object_t memory_object)
1927 {
1928 (memory_object->mo_pager_ops->memory_object_reference)(
1929 memory_object);
1930 }
1931
1932 /* Routine memory_object_deallocate */
1933 void
memory_object_deallocate(memory_object_t memory_object)1934 memory_object_deallocate(
1935 memory_object_t memory_object)
1936 {
1937 (memory_object->mo_pager_ops->memory_object_deallocate)(
1938 memory_object);
1939 }
1940
1941
1942 /* Routine memory_object_init */
1943 kern_return_t
memory_object_init(memory_object_t memory_object,memory_object_control_t memory_control,memory_object_cluster_size_t memory_object_page_size)1944 memory_object_init
1945 (
1946 memory_object_t memory_object,
1947 memory_object_control_t memory_control,
1948 memory_object_cluster_size_t memory_object_page_size
1949 )
1950 {
1951 return (memory_object->mo_pager_ops->memory_object_init)(
1952 memory_object,
1953 memory_control,
1954 memory_object_page_size);
1955 }
1956
1957 /* Routine memory_object_terminate */
1958 kern_return_t
memory_object_terminate(memory_object_t memory_object)1959 memory_object_terminate
1960 (
1961 memory_object_t memory_object
1962 )
1963 {
1964 return (memory_object->mo_pager_ops->memory_object_terminate)(
1965 memory_object);
1966 }
1967
1968 /* Routine memory_object_data_request */
1969 kern_return_t
memory_object_data_request(memory_object_t memory_object,memory_object_offset_t offset,memory_object_cluster_size_t length,vm_prot_t desired_access,memory_object_fault_info_t fault_info)1970 memory_object_data_request
1971 (
1972 memory_object_t memory_object,
1973 memory_object_offset_t offset,
1974 memory_object_cluster_size_t length,
1975 vm_prot_t desired_access,
1976 memory_object_fault_info_t fault_info
1977 )
1978 {
1979 return (memory_object->mo_pager_ops->memory_object_data_request)(
1980 memory_object,
1981 offset,
1982 length,
1983 desired_access,
1984 fault_info);
1985 }
1986
1987 /* Routine memory_object_data_return */
1988 kern_return_t
memory_object_data_return(memory_object_t memory_object,memory_object_offset_t offset,memory_object_cluster_size_t size,memory_object_offset_t * resid_offset,int * io_error,boolean_t dirty,boolean_t kernel_copy,int upl_flags)1989 memory_object_data_return
1990 (
1991 memory_object_t memory_object,
1992 memory_object_offset_t offset,
1993 memory_object_cluster_size_t size,
1994 memory_object_offset_t *resid_offset,
1995 int *io_error,
1996 boolean_t dirty,
1997 boolean_t kernel_copy,
1998 int upl_flags
1999 )
2000 {
2001 return (memory_object->mo_pager_ops->memory_object_data_return)(
2002 memory_object,
2003 offset,
2004 size,
2005 resid_offset,
2006 io_error,
2007 dirty,
2008 kernel_copy,
2009 upl_flags);
2010 }
2011
2012 /* Routine memory_object_data_initialize */
2013 kern_return_t
memory_object_data_initialize(memory_object_t memory_object,memory_object_offset_t offset,memory_object_cluster_size_t size)2014 memory_object_data_initialize
2015 (
2016 memory_object_t memory_object,
2017 memory_object_offset_t offset,
2018 memory_object_cluster_size_t size
2019 )
2020 {
2021 return (memory_object->mo_pager_ops->memory_object_data_initialize)(
2022 memory_object,
2023 offset,
2024 size);
2025 }
2026
2027 /*
2028 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2029 * each time a "named" VM object gets mapped directly or indirectly
2030 * (copy-on-write mapping). A "named" VM object has an extra reference held
2031 * by the pager to keep it alive until the pager decides that the
2032 * memory object (and its VM object) can be reclaimed.
2033 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2034 * the mappings of that memory object have been removed.
2035 *
2036 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2037 * are serialized (through object->mapping_in_progress), to ensure that the
2038 * pager gets a consistent view of the mapping status of the memory object.
2039 *
2040 * This allows the pager to keep track of how many times a memory object
2041 * has been mapped and with which protections, to decide when it can be
2042 * reclaimed.
2043 */
2044
2045 /* Routine memory_object_map */
2046 kern_return_t
memory_object_map(memory_object_t memory_object,vm_prot_t prot)2047 memory_object_map
2048 (
2049 memory_object_t memory_object,
2050 vm_prot_t prot
2051 )
2052 {
2053 return (memory_object->mo_pager_ops->memory_object_map)(
2054 memory_object,
2055 prot);
2056 }
2057
2058 /* Routine memory_object_last_unmap */
2059 kern_return_t
memory_object_last_unmap(memory_object_t memory_object)2060 memory_object_last_unmap
2061 (
2062 memory_object_t memory_object
2063 )
2064 {
2065 return (memory_object->mo_pager_ops->memory_object_last_unmap)(
2066 memory_object);
2067 }
2068
2069 boolean_t
memory_object_backing_object(memory_object_t memory_object,memory_object_offset_t offset,vm_object_t * backing_object,vm_object_offset_t * backing_offset)2070 memory_object_backing_object
2071 (
2072 memory_object_t memory_object,
2073 memory_object_offset_t offset,
2074 vm_object_t *backing_object,
2075 vm_object_offset_t *backing_offset)
2076 {
2077 if (memory_object->mo_pager_ops->memory_object_backing_object == NULL) {
2078 return FALSE;
2079 }
2080 return (memory_object->mo_pager_ops->memory_object_backing_object)(
2081 memory_object,
2082 offset,
2083 backing_object,
2084 backing_offset);
2085 }
2086
2087 upl_t
convert_port_to_upl(__unused ipc_port_t port)2088 convert_port_to_upl(
2089 __unused ipc_port_t port)
2090 {
2091 return NULL;
2092 }
2093
2094 mach_port_t
convert_upl_to_port(__unused upl_t upl)2095 convert_upl_to_port(
2096 __unused upl_t upl)
2097 {
2098 return MACH_PORT_NULL;
2099 }
2100