1 /*
2 * Copyright (c) 2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/memory_entry.h>
30 #include <mach/memory_entry_server.h>
31 #include <mach/vm_map_server.h>
32 #include <mach/mach_vm_server.h>
33 #include <vm/vm_purgeable_internal.h>
34 #include <mach/mach_host_server.h>
35 #include <IOKit/IOBSD.h>
36 #include <vm/vm_memory_entry_xnu.h>
37 #include <vm/vm_map_internal.h>
38 #include <vm/memory_object_internal.h>
39 #include <vm/vm_protos_internal.h>
40 #include <vm/vm_object_internal.h>
41 #include <vm/vm_iokit.h>
42
43 static void mach_memory_entry_no_senders(ipc_port_t, mach_port_mscount_t);
44
45 IPC_KOBJECT_DEFINE(IKOT_NAMED_ENTRY,
46 .iko_op_stable = true,
47 .iko_op_no_senders = mach_memory_entry_no_senders);
48
49 /*
50 * mach_make_memory_entry_64
51 *
52 * Think of it as a two-stage vm_remap() operation. First
53 * you get a handle. Second, you get map that handle in
54 * somewhere else. Rather than doing it all at once (and
55 * without needing access to the other whole map).
56 */
57 kern_return_t
mach_make_memory_entry_64(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_handle)58 mach_make_memory_entry_64(
59 vm_map_t target_map,
60 memory_object_size_ut *size_u,
61 memory_object_offset_ut offset_u,
62 vm_prot_ut permission_u,
63 ipc_port_t *object_handle,
64 ipc_port_t parent_handle)
65 {
66 return mach_make_memory_entry_internal(target_map,
67 size_u,
68 offset_u,
69 permission_u,
70 VM_NAMED_ENTRY_KERNEL_FLAGS_NONE,
71 object_handle,
72 parent_handle);
73 }
74
75 static inline void
vm_memory_entry_decode_perm(vm_prot_t permission,unsigned int * access,vm_prot_t * protections,bool * mask_protections,bool * use_data_addr,bool * use_4K_compat)76 vm_memory_entry_decode_perm(
77 vm_prot_t permission,
78 unsigned int *access,
79 vm_prot_t *protections,
80 bool *mask_protections,
81 bool *use_data_addr,
82 bool *use_4K_compat)
83 {
84 *protections = permission & VM_PROT_ALL;
85 *mask_protections = permission & VM_PROT_IS_MASK;
86 *access = GET_MAP_MEM(permission);
87 *use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0);
88 *use_4K_compat = ((permission & MAP_MEM_4K_DATA_ADDR) != 0);
89 }
90
91 static inline vm_map_offset_t
vm_memory_entry_get_offset_in_page(vm_map_offset_t offset,vm_map_offset_t map_start,bool use_data_addr,bool use_4K_compat)92 vm_memory_entry_get_offset_in_page(
93 vm_map_offset_t offset,
94 vm_map_offset_t map_start,
95 bool use_data_addr,
96 bool use_4K_compat)
97 {
98 vm_map_offset_t offset_in_page;
99
100 if (use_data_addr || use_4K_compat) {
101 offset_in_page = offset - map_start;
102 if (use_4K_compat) {
103 offset_in_page &= ~((signed)(0xFFF));
104 }
105 } else {
106 offset_in_page = 0;
107 }
108
109 return offset_in_page;
110 }
111
112 static inline kern_return_t
mach_make_memory_entry_cleanup(kern_return_t kr,vm_map_t target_map __unused,memory_object_size_ut * size_u,vm_map_offset_ut offset_u __unused,vm_prot_t permission __unused,vm_named_entry_t user_entry __unused,ipc_port_t * object_handle)113 mach_make_memory_entry_cleanup(
114 kern_return_t kr,
115 vm_map_t target_map __unused,
116 memory_object_size_ut *size_u,
117 vm_map_offset_ut offset_u __unused,
118 vm_prot_t permission __unused,
119 vm_named_entry_t user_entry __unused,
120 ipc_port_t *object_handle)
121 {
122 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
123 "%p kr 0x%x\n", target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u),
124 VM_SANITIZE_UNSAFE_UNWRAP(*size_u), permission, user_entry,
125 vm_sanitize_get_kr(kr));
126 /*
127 * Set safe size and object_handle value on failed return
128 */
129 *size_u = vm_sanitize_wrap_size(0);
130 *object_handle = IPC_PORT_NULL;
131 return vm_sanitize_get_kr(kr);
132 }
133
134 static __attribute__((always_inline, warn_unused_result))
135 kern_return_t
mach_make_memory_entry_mem_only_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size)136 mach_make_memory_entry_mem_only_sanitize(
137 vm_map_t target_map,
138 memory_object_size_ut size_u,
139 vm_map_offset_ut offset_u,
140 vm_map_offset_t *map_start,
141 vm_map_offset_t *map_end,
142 vm_map_size_t *map_size)
143 {
144 /*
145 * This code path doesn't use offset and size. They don't need to be
146 * validated. However inorder to maintain backward compatibility some
147 * checks on offset and size have been left.
148 */
149 return vm_sanitize_addr_size(offset_u, size_u,
150 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
151 target_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
152 map_start, map_end, map_size);
153 }
154
155 static kern_return_t
mach_make_memory_entry_mem_only(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_t permission,ipc_port_t * object_handle,vm_named_entry_t parent_entry)156 mach_make_memory_entry_mem_only(
157 vm_map_t target_map,
158 memory_object_size_ut *size_u,
159 memory_object_offset_ut offset_u,
160 vm_prot_t permission,
161 ipc_port_t *object_handle,
162 vm_named_entry_t parent_entry)
163 {
164 boolean_t parent_is_object;
165 vm_object_t object;
166 unsigned int access;
167 vm_prot_t protections;
168 bool mask_protections;
169 unsigned int wimg_mode;
170 bool use_data_addr;
171 bool use_4K_compat;
172 vm_named_entry_t user_entry __unused = NULL;
173 kern_return_t kr;
174 vm_map_size_t map_size;
175 vm_map_offset_t map_start, map_end;
176
177 /*
178 * Sanitize addr and size. Permimssions have been sanitized prior to
179 * dispatch
180 */
181 kr = mach_make_memory_entry_mem_only_sanitize(target_map,
182 *size_u,
183 offset_u,
184 &map_start,
185 &map_end,
186 &map_size);
187 if (__improbable(kr != KERN_SUCCESS)) {
188 return mach_make_memory_entry_cleanup(kr, target_map,
189 size_u, offset_u, permission, user_entry, object_handle);
190 }
191
192 vm_memory_entry_decode_perm(permission, &access, &protections,
193 &mask_protections, &use_data_addr, &use_4K_compat);
194
195 if (use_data_addr || use_4K_compat || parent_entry == NULL) {
196 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
197 size_u, offset_u, permission, user_entry, object_handle);
198 }
199
200 parent_is_object = parent_entry->is_object;
201 if (!parent_is_object) {
202 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
203 size_u, offset_u, permission, user_entry, object_handle);
204 }
205
206 if ((access != parent_entry->access) &&
207 !(parent_entry->protection & VM_PROT_WRITE)) {
208 return mach_make_memory_entry_cleanup(KERN_INVALID_RIGHT, target_map,
209 size_u, offset_u, permission, user_entry, object_handle);
210 }
211
212 object = vm_named_entry_to_vm_object(parent_entry);
213 if (parent_is_object && object != VM_OBJECT_NULL) {
214 wimg_mode = object->wimg_bits;
215 } else {
216 wimg_mode = VM_WIMG_USE_DEFAULT;
217 }
218 vm_prot_to_wimg(access, &wimg_mode);
219 if (access != MAP_MEM_NOOP) {
220 parent_entry->access = access;
221 }
222 if (parent_is_object && object &&
223 (access != MAP_MEM_NOOP) &&
224 (!(object->nophyscache))) {
225 if (object->wimg_bits != wimg_mode) {
226 vm_object_lock(object);
227 vm_object_change_wimg_mode(object, wimg_mode);
228 vm_object_unlock(object);
229 }
230 }
231 if (object_handle) {
232 *object_handle = IP_NULL;
233 }
234 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
235 "%p kr 0x%x\n", target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u),
236 VM_SANITIZE_UNSAFE_UNWRAP(*size_u), permission, user_entry, KERN_SUCCESS);
237 /*
238 * TODO: Size isn't being set in this path
239 */
240 return KERN_SUCCESS;
241 }
242
243 #if CONFIG_PROB_GZALLOC
244 static inline vm_map_offset_ut
vm_memory_entry_pgz_decode_offset(vm_map_t target_map,vm_map_offset_ut offset_u,memory_object_size_ut * size_u __unused)245 vm_memory_entry_pgz_decode_offset(
246 vm_map_t target_map,
247 vm_map_offset_ut offset_u,
248 memory_object_size_ut *size_u __unused)
249 {
250 if (target_map == NULL || target_map->pmap == kernel_pmap) {
251 vm_map_offset_t pgz_offset;
252
253 /*
254 * It's ok to unsafe unwrap because PGZ does not ship to
255 * customers.
256 */
257 pgz_offset = pgz_decode(VM_SANITIZE_UNSAFE_UNWRAP(offset_u),
258 VM_SANITIZE_UNSAFE_UNWRAP(*size_u));
259 return vm_sanitize_wrap_addr(pgz_offset);
260 }
261 return offset_u;
262 }
263 #endif /* CONFIG_PROB_GZALLOC */
264
265 static __attribute__((always_inline, warn_unused_result))
266 kern_return_t
mach_make_memory_entry_generic_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size,vm_map_offset_t * offset)267 mach_make_memory_entry_generic_sanitize(
268 vm_map_t target_map,
269 memory_object_size_ut size_u,
270 vm_map_offset_ut offset_u,
271 vm_map_offset_t *map_start,
272 vm_map_offset_t *map_end,
273 vm_map_size_t *map_size,
274 vm_map_offset_t *offset)
275 {
276 kern_return_t kr;
277
278 /*
279 * Validate start and end
280 */
281 kr = vm_sanitize_addr_size(offset_u, size_u,
282 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
283 target_map, VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH,
284 map_start, map_end, map_size);
285 if (__improbable(kr != KERN_SUCCESS)) {
286 return kr;
287 }
288 /*
289 * Validate offset
290 */
291 kr = vm_sanitize_offset(offset_u, VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
292 *map_start, *map_end, offset);
293 if (__improbable(kr != KERN_SUCCESS)) {
294 return kr;
295 }
296
297 return KERN_SUCCESS;
298 }
299
300 static kern_return_t
mach_make_memory_entry_named_create(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle)301 mach_make_memory_entry_named_create(
302 vm_map_t target_map,
303 memory_object_size_ut *size_u,
304 vm_map_offset_ut offset_u,
305 vm_prot_t permission,
306 vm_named_entry_kernel_flags_t vmne_kflags,
307 ipc_port_t *object_handle)
308 {
309 vm_object_t object;
310 unsigned int access;
311 vm_prot_t protections;
312 bool mask_protections;
313 unsigned int wimg_mode;
314 bool use_data_addr;
315 bool use_4K_compat;
316 int ledger_flags = 0;
317 task_t owner;
318 bool fully_owned = false;
319 vm_named_entry_t user_entry = NULL;
320 kern_return_t kr;
321 vm_map_size_t map_size;
322 vm_map_offset_t map_start, map_end, offset;
323
324 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
325 return mach_make_memory_entry_cleanup(KERN_SUCCESS, target_map,
326 size_u, offset_u, permission, user_entry, object_handle);
327 }
328
329 #if CONFIG_PROB_GZALLOC
330 /*
331 * If offset is PGZ protected we need PGZ to fix it up to the right
332 * value prior to validation and use.
333 */
334 offset_u = vm_memory_entry_pgz_decode_offset(target_map, offset_u, size_u);
335 #endif /* CONFIG_PROB_GZALLOC */
336
337 /*
338 * Sanitize addr and size. Permimssions have been sanitized prior to
339 * dispatch
340 */
341 kr = mach_make_memory_entry_generic_sanitize(target_map,
342 *size_u,
343 offset_u,
344 &map_start,
345 &map_end,
346 &map_size,
347 &offset);
348 if (__improbable(kr != KERN_SUCCESS)) {
349 return mach_make_memory_entry_cleanup(kr, target_map,
350 size_u, offset_u, permission, user_entry, object_handle);
351 }
352
353 assert(map_size != 0);
354
355 vm_memory_entry_decode_perm(permission, &access, &protections,
356 &mask_protections, &use_data_addr, &use_4K_compat);
357
358 if (use_data_addr || use_4K_compat) {
359 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
360 size_u, offset_u, permission, user_entry, object_handle);
361 }
362
363 /*
364 * Force the creation of the VM object now.
365 */
366 #if __LP64__
367 if (map_size > ANON_MAX_SIZE) {
368 return mach_make_memory_entry_cleanup(KERN_FAILURE, target_map,
369 size_u, offset_u, permission, user_entry, object_handle);
370 }
371 #endif /* __LP64__ */
372
373 object = vm_object_allocate(map_size);
374 assert(object != VM_OBJECT_NULL);
375 vm_object_lock(object);
376
377 /*
378 * XXX
379 * We use this path when we want to make sure that
380 * nobody messes with the object (coalesce, for
381 * example) before we map it.
382 * We might want to use these objects for transposition via
383 * vm_object_transpose() too, so we don't want any copy or
384 * shadow objects either...
385 */
386 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
387 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
388
389 owner = current_task();
390 if ((permission & MAP_MEM_PURGABLE) ||
391 vmne_kflags.vmnekf_ledger_tag) {
392 assert(object->vo_owner == NULL);
393 assert(object->resident_page_count == 0);
394 assert(object->wired_page_count == 0);
395 assert(owner != TASK_NULL);
396 if (vmne_kflags.vmnekf_ledger_no_footprint) {
397 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
398 object->vo_no_footprint = TRUE;
399 }
400 if (permission & MAP_MEM_PURGABLE) {
401 if (!(permission & VM_PROT_WRITE)) {
402 /* if we can't write, we can't purge */
403 vm_object_unlock(object);
404 vm_object_deallocate(object);
405 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT,
406 target_map, size_u, offset_u, permission, user_entry,
407 object_handle);
408 }
409 VM_OBJECT_SET_PURGABLE(object, VM_PURGABLE_NONVOLATILE);
410 if (permission & MAP_MEM_PURGABLE_KERNEL_ONLY) {
411 VM_OBJECT_SET_PURGEABLE_ONLY_BY_KERNEL(object, TRUE);
412 }
413 #if __arm64__
414 if (owner->task_legacy_footprint) {
415 /*
416 * For ios11, we failed to account for
417 * this memory. Keep doing that for
418 * legacy apps (built before ios12),
419 * for backwards compatibility's sake...
420 */
421 owner = kernel_task;
422 }
423 #endif /* __arm64__ */
424 vm_purgeable_nonvolatile_enqueue(object, owner);
425 /* all memory in this named entry is "owned" */
426 fully_owned = true;
427 }
428 }
429
430 if (vmne_kflags.vmnekf_ledger_tag) {
431 /*
432 * Bill this object to the current task's
433 * ledgers for the given tag.
434 */
435 if (vmne_kflags.vmnekf_ledger_no_footprint) {
436 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
437 }
438 kr = vm_object_ownership_change(
439 object,
440 vmne_kflags.vmnekf_ledger_tag,
441 owner, /* new owner */
442 ledger_flags,
443 FALSE); /* task_objq locked? */
444 if (kr != KERN_SUCCESS) {
445 vm_object_unlock(object);
446 vm_object_deallocate(object);
447 return mach_make_memory_entry_cleanup(kr, target_map,
448 size_u, offset_u, permission, user_entry, object_handle);
449 }
450 /* all memory in this named entry is "owned" */
451 fully_owned = true;
452 }
453
454 #if CONFIG_SECLUDED_MEMORY
455 if (secluded_for_iokit && /* global boot-arg */
456 ((permission & MAP_MEM_GRAB_SECLUDED))) {
457 object->can_grab_secluded = TRUE;
458 assert(!object->eligible_for_secluded);
459 }
460 #endif /* CONFIG_SECLUDED_MEMORY */
461
462 /*
463 * The VM object is brand new and nobody else knows about it,
464 * so we don't need to lock it.
465 */
466
467 wimg_mode = object->wimg_bits;
468 vm_prot_to_wimg(access, &wimg_mode);
469 if (access != MAP_MEM_NOOP) {
470 object->wimg_bits = wimg_mode;
471 }
472
473 vm_object_unlock(object);
474
475 /* the object has no pages, so no WIMG bits to update here */
476
477 user_entry = mach_memory_entry_allocate(object_handle);
478 vm_named_entry_associate_vm_object(
479 user_entry,
480 object,
481 0,
482 map_size,
483 (protections & VM_PROT_ALL));
484 user_entry->internal = TRUE;
485 user_entry->is_sub_map = FALSE;
486 user_entry->offset = 0;
487 user_entry->data_offset = 0;
488 user_entry->protection = protections;
489 user_entry->access = access;
490 user_entry->size = map_size;
491 user_entry->is_fully_owned = fully_owned;
492
493 /* user_object pager and internal fields are not used */
494 /* when the object field is filled in. */
495
496 *size_u = vm_sanitize_wrap_size(user_entry->size - user_entry->data_offset);
497 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
498 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
499 permission, user_entry, KERN_SUCCESS);
500 return KERN_SUCCESS;
501 }
502
503 static kern_return_t
mach_make_memory_entry_copy(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,ipc_port_t * object_handle)504 mach_make_memory_entry_copy(
505 vm_map_t target_map,
506 memory_object_size_ut *size_u,
507 vm_map_offset_ut offset_u,
508 vm_prot_t permission,
509 ipc_port_t *object_handle)
510 {
511 unsigned int access;
512 vm_prot_t protections;
513 bool mask_protections;
514 bool use_data_addr;
515 bool use_4K_compat;
516 vm_named_entry_t user_entry = NULL;
517 vm_map_copy_t copy;
518 /*
519 * Stash the offset in the page for use by vm_map_enter_mem_object()
520 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
521 */
522 vm_object_offset_t offset_in_page;
523 kern_return_t kr;
524 vm_map_size_t map_size;
525 vm_map_offset_t map_start, map_end, offset;
526
527 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
528 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
529 size_u, offset_u, permission, user_entry, object_handle);
530 }
531
532 #if CONFIG_PROB_GZALLOC
533 /*
534 * If offset is PGZ protected we need PGZ to fix it up to the right
535 * value prior to validation and use.
536 */
537 offset_u = vm_memory_entry_pgz_decode_offset(target_map, offset_u, size_u);
538 #endif /* CONFIG_PROB_GZALLOC */
539
540 /*
541 * Sanitize addr and size. Permimssions have been sanitized prior to
542 * dispatch
543 */
544 kr = mach_make_memory_entry_generic_sanitize(target_map,
545 *size_u,
546 offset_u,
547 &map_start,
548 &map_end,
549 &map_size,
550 &offset);
551 if (__improbable(kr != KERN_SUCCESS)) {
552 return mach_make_memory_entry_cleanup(kr, target_map,
553 size_u, offset_u, permission, user_entry, object_handle);
554 }
555
556 assert(map_size != 0);
557
558 vm_memory_entry_decode_perm(permission, &access, &protections,
559 &mask_protections, &use_data_addr, &use_4K_compat);
560
561 if (target_map == VM_MAP_NULL) {
562 return mach_make_memory_entry_cleanup(KERN_INVALID_TASK, target_map,
563 size_u, offset_u, permission, user_entry, object_handle);
564 }
565
566 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
567 use_data_addr, use_4K_compat);
568
569 kr = vm_map_copyin_internal(target_map,
570 map_start,
571 map_size,
572 VM_MAP_COPYIN_ENTRY_LIST,
573 ©);
574 if (kr != KERN_SUCCESS) {
575 return mach_make_memory_entry_cleanup(kr, target_map,
576 size_u, offset_u, permission, user_entry, object_handle);
577 }
578 assert(copy != VM_MAP_COPY_NULL);
579
580 user_entry = mach_memory_entry_allocate(object_handle);
581 user_entry->backing.copy = copy;
582 user_entry->internal = FALSE;
583 user_entry->is_sub_map = FALSE;
584 user_entry->is_copy = TRUE;
585 user_entry->offset = 0;
586 user_entry->protection = protections;
587 user_entry->size = map_size;
588 user_entry->data_offset = offset_in_page;
589
590 /* is all memory in this named entry "owned"? */
591 vm_map_entry_t entry;
592 user_entry->is_fully_owned = TRUE;
593 for (entry = vm_map_copy_first_entry(copy);
594 entry != vm_map_copy_to_entry(copy);
595 entry = entry->vme_next) {
596 if (entry->is_sub_map ||
597 VME_OBJECT(entry) == VM_OBJECT_NULL ||
598 VM_OBJECT_OWNER(VME_OBJECT(entry)) == TASK_NULL) {
599 /* this memory is not "owned" */
600 user_entry->is_fully_owned = FALSE;
601 break;
602 }
603 }
604
605 *size_u = vm_sanitize_wrap_size(user_entry->size - user_entry->data_offset);
606 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> "
607 "entry %p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
608 permission, user_entry, KERN_SUCCESS);
609 return KERN_SUCCESS;
610 }
611
612 static kern_return_t
mach_make_memory_entry_share(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,ipc_port_t * object_handle,ipc_port_t parent_handle,vm_named_entry_t parent_entry)613 mach_make_memory_entry_share(
614 vm_map_t target_map,
615 memory_object_size_ut *size_u,
616 vm_map_offset_ut offset_u,
617 vm_prot_t permission,
618 ipc_port_t *object_handle,
619 ipc_port_t parent_handle,
620 vm_named_entry_t parent_entry)
621 {
622 vm_object_t object;
623 unsigned int access;
624 vm_prot_t protections;
625 bool mask_protections;
626 bool use_data_addr;
627 bool use_4K_compat;
628 vm_named_entry_t user_entry = NULL;
629 vm_map_copy_t copy;
630 vm_prot_t cur_prot, max_prot;
631 vm_map_kernel_flags_t vmk_flags;
632 vm_map_entry_t parent_copy_entry;
633 /*
634 * Stash the offset in the page for use by vm_map_enter_mem_object()
635 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
636 */
637 vm_object_offset_t offset_in_page;
638 unsigned int wimg_mode;
639 kern_return_t kr;
640 vm_map_size_t map_size;
641 vm_map_offset_t map_start, map_end, offset;
642
643 if (VM_SANITIZE_UNSAFE_IS_ZERO(*size_u)) {
644 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
645 size_u, offset_u, permission, user_entry, object_handle);
646 }
647
648 #if CONFIG_PROB_GZALLOC
649 /*
650 * If offset is PGZ protected we need PGZ to fix it up to the right
651 * value prior to validation and use.
652 */
653 offset_u = vm_memory_entry_pgz_decode_offset(target_map, offset_u, size_u);
654 #endif /* CONFIG_PROB_GZALLOC */
655
656 /*
657 * Sanitize addr and size. Permimssions have been sanitized prior to
658 * dispatch
659 */
660 kr = mach_make_memory_entry_generic_sanitize(target_map,
661 *size_u,
662 offset_u,
663 &map_start,
664 &map_end,
665 &map_size,
666 &offset);
667 if (__improbable(kr != KERN_SUCCESS)) {
668 return mach_make_memory_entry_cleanup(kr, target_map,
669 size_u, offset_u, permission, user_entry, object_handle);
670 }
671
672 assert(map_size != 0);
673
674 vm_memory_entry_decode_perm(permission, &access, &protections,
675 &mask_protections, &use_data_addr, &use_4K_compat);
676
677 if (target_map == VM_MAP_NULL) {
678 return mach_make_memory_entry_cleanup(KERN_INVALID_TASK, target_map,
679 size_u, offset_u, permission, user_entry, object_handle);
680 }
681
682 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
683 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
684 parent_copy_entry = VM_MAP_ENTRY_NULL;
685 if (!(permission & MAP_MEM_VM_SHARE)) {
686 vm_map_t tmp_map, real_map;
687 vm_map_version_t version;
688 vm_object_t tmp_object;
689 vm_object_offset_t obj_off;
690 vm_prot_t prot;
691 boolean_t wired;
692 bool contended;
693
694 /* resolve any pending submap copy-on-write... */
695 if (protections & VM_PROT_WRITE) {
696 tmp_map = target_map;
697 vm_map_lock_read(tmp_map);
698 kr = vm_map_lookup_and_lock_object(&tmp_map,
699 map_start,
700 protections | (mask_protections ? VM_PROT_IS_MASK : 0),
701 OBJECT_LOCK_EXCLUSIVE,
702 &version,
703 &tmp_object,
704 &obj_off,
705 &prot,
706 &wired,
707 NULL, /* fault_info */
708 &real_map,
709 &contended);
710 if (kr != KERN_SUCCESS) {
711 vm_map_unlock_read(tmp_map);
712 } else {
713 vm_object_unlock(tmp_object);
714 vm_map_unlock_read(tmp_map);
715 if (real_map != tmp_map) {
716 vm_map_unlock_read(real_map);
717 }
718 }
719 }
720 /* ... and carry on */
721
722 /* stop extracting if VM object changes */
723 vmk_flags.vmkf_copy_single_object = TRUE;
724 if ((permission & MAP_MEM_NAMED_REUSE) &&
725 parent_entry != NULL &&
726 parent_entry->is_object) {
727 vm_map_copy_t parent_copy;
728 parent_copy = parent_entry->backing.copy;
729 /*
730 * Assert that the vm_map_copy is coming from the right
731 * zone and hasn't been forged
732 */
733 vm_map_copy_require(parent_copy);
734 assert(parent_copy->cpy_hdr.nentries == 1);
735 parent_copy_entry = vm_map_copy_first_entry(parent_copy);
736 assert(!parent_copy_entry->is_sub_map);
737 }
738 }
739
740 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
741 use_data_addr, use_4K_compat);
742
743 if (mask_protections) {
744 /*
745 * caller is asking for whichever proctections are
746 * available: no required protections.
747 */
748 cur_prot = VM_PROT_NONE;
749 max_prot = VM_PROT_NONE;
750 vmk_flags.vmkf_remap_legacy_mode = true;
751 } else {
752 /*
753 * Caller wants a memory entry with "protections".
754 * Make sure we extract only memory that matches that.
755 */
756 cur_prot = protections;
757 max_prot = protections;
758 }
759 if (target_map->pmap == kernel_pmap) {
760 /*
761 * Get "reserved" map entries to avoid deadlocking
762 * on the kernel map or a kernel submap if we
763 * run out of VM map entries and need to refill that
764 * zone.
765 */
766 vmk_flags.vmkf_copy_pageable = FALSE;
767 } else {
768 vmk_flags.vmkf_copy_pageable = TRUE;
769 }
770 vmk_flags.vmkf_copy_same_map = FALSE;
771 assert(map_size != 0);
772 kr = vm_map_copy_extract(target_map,
773 map_start,
774 map_size,
775 FALSE, /* copy */
776 ©,
777 &cur_prot,
778 &max_prot,
779 VM_INHERIT_SHARE,
780 vmk_flags);
781 if (kr != KERN_SUCCESS) {
782 return mach_make_memory_entry_cleanup(kr, target_map,
783 size_u, offset_u, permission, user_entry, object_handle);
784 }
785 assert(copy != VM_MAP_COPY_NULL);
786
787 if (mask_protections) {
788 /*
789 * We just want as much of "original_protections"
790 * as we can get out of the actual "cur_prot".
791 */
792 protections &= cur_prot;
793 if (protections == VM_PROT_NONE) {
794 /* no access at all: fail */
795 vm_map_copy_discard(copy);
796 return mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE,
797 target_map, size_u, offset_u, permission, user_entry,
798 object_handle);
799 }
800 } else {
801 /*
802 * We want exactly "original_protections"
803 * out of "cur_prot".
804 */
805 assert((cur_prot & protections) == protections);
806 assert((max_prot & protections) == protections);
807 /* XXX FBDP TODO: no longer needed? */
808 if ((cur_prot & protections) != protections) {
809 vm_map_copy_discard(copy);
810 return mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE,
811 target_map, size_u, offset_u, permission, user_entry,
812 object_handle);
813 }
814 }
815
816 if (!(permission & MAP_MEM_VM_SHARE)) {
817 vm_map_entry_t copy_entry;
818
819 /* limit size to what's actually covered by "copy" */
820 assert(copy->cpy_hdr.nentries == 1);
821 copy_entry = vm_map_copy_first_entry(copy);
822 map_size = copy_entry->vme_end - copy_entry->vme_start;
823
824 if ((permission & MAP_MEM_NAMED_REUSE) &&
825 parent_copy_entry != VM_MAP_ENTRY_NULL &&
826 VME_OBJECT(copy_entry) == VME_OBJECT(parent_copy_entry) &&
827 VME_OFFSET(copy_entry) == VME_OFFSET(parent_copy_entry) &&
828 parent_entry->offset == 0 &&
829 parent_entry->size == map_size &&
830 (parent_entry->data_offset == offset_in_page)) {
831 /* we have a match: re-use "parent_entry" */
832
833 /* release our new "copy" */
834 vm_map_copy_discard(copy);
835 /* get extra send right on handle */
836 parent_handle = ipc_port_copy_send_any(parent_handle);
837
838 *size_u = vm_sanitize_wrap_size(parent_entry->size -
839 parent_entry->data_offset);
840 *object_handle = parent_handle;
841 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> "
842 "entry %p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
843 permission, user_entry, KERN_SUCCESS);
844 return KERN_SUCCESS;
845 }
846
847 /* no match: we need to create a new entry */
848 object = VME_OBJECT(copy_entry);
849 vm_object_lock(object);
850 wimg_mode = object->wimg_bits;
851 if (!(object->nophyscache)) {
852 vm_prot_to_wimg(access, &wimg_mode);
853 }
854 if (object->wimg_bits != wimg_mode) {
855 vm_object_change_wimg_mode(object, wimg_mode);
856 }
857 vm_object_unlock(object);
858 }
859
860 user_entry = mach_memory_entry_allocate(object_handle);
861 user_entry->backing.copy = copy;
862 user_entry->is_sub_map = FALSE;
863 user_entry->is_object = FALSE;
864 user_entry->internal = FALSE;
865 user_entry->protection = protections;
866 user_entry->size = map_size;
867 user_entry->data_offset = offset_in_page;
868
869 if (permission & MAP_MEM_VM_SHARE) {
870 vm_map_entry_t copy_entry;
871
872 user_entry->is_copy = TRUE;
873 user_entry->offset = 0;
874
875 /* is all memory in this named entry "owned"? */
876 user_entry->is_fully_owned = TRUE;
877 for (copy_entry = vm_map_copy_first_entry(copy);
878 copy_entry != vm_map_copy_to_entry(copy);
879 copy_entry = copy_entry->vme_next) {
880 if (copy_entry->is_sub_map) {
881 /* submaps can't be owned */
882 user_entry->is_fully_owned = FALSE;
883 break;
884 }
885 if (VM_OBJECT_OWNER(VME_OBJECT(copy_entry)) == TASK_NULL) {
886 object = VME_OBJECT(copy_entry);
887 if (object && !object->internal) {
888 /* external objects can be "owned" */
889 continue;
890 }
891 /* this memory is not "owned" */
892 user_entry->is_fully_owned = FALSE;
893 break;
894 }
895 }
896 } else {
897 user_entry->is_object = TRUE;
898 user_entry->internal = object->internal;
899 user_entry->offset = VME_OFFSET(vm_map_copy_first_entry(copy));
900 user_entry->access = GET_MAP_MEM(permission);
901 /* is all memory in this named entry "owned"? */
902 user_entry->is_fully_owned = FALSE;
903 object = vm_named_entry_to_vm_object(user_entry);
904 if (VM_OBJECT_OWNER(object) != TASK_NULL) {
905 /* object is owned */
906 user_entry->is_fully_owned = TRUE;
907 } else if (object && !object->internal) {
908 /* external objects can become "owned" */
909 user_entry->is_fully_owned = TRUE;
910 }
911 }
912
913 *size_u = vm_sanitize_wrap_size(user_entry->size -
914 user_entry->data_offset);
915 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
916 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
917 permission, user_entry, KERN_SUCCESS);
918 return KERN_SUCCESS;
919 }
920
921 static __attribute__((always_inline, warn_unused_result))
922 kern_return_t
mach_make_memory_entry_from_parent_entry_sanitize(vm_map_t target_map,memory_object_size_ut size_u,vm_map_offset_ut offset_u,vm_prot_t permission,vm_named_entry_t parent_entry,vm_map_offset_t * map_start,vm_map_offset_t * map_end,vm_map_size_t * map_size,vm_map_offset_t * offset,vm_map_offset_t * user_entry_offset)923 mach_make_memory_entry_from_parent_entry_sanitize(
924 vm_map_t target_map,
925 memory_object_size_ut size_u,
926 vm_map_offset_ut offset_u,
927 vm_prot_t permission,
928 vm_named_entry_t parent_entry,
929 vm_map_offset_t *map_start,
930 vm_map_offset_t *map_end,
931 vm_map_size_t *map_size,
932 vm_map_offset_t *offset,
933 vm_map_offset_t *user_entry_offset)
934 {
935 bool mask_protections;
936 unsigned int access;
937 vm_prot_t protections;
938 bool use_data_addr;
939 bool use_4K_compat;
940 vm_map_offset_t start_mask = vm_map_page_mask(target_map);
941 kern_return_t kr;
942
943 vm_memory_entry_decode_perm(permission, &access, &protections,
944 &mask_protections, &use_data_addr, &use_4K_compat);
945
946 if (use_data_addr || use_4K_compat) {
947 /*
948 * Validate offset doesn't overflow when added to parent entry's offset
949 */
950 if (vm_sanitize_add_overflow(offset_u, parent_entry->data_offset,
951 &offset_u)) {
952 return KERN_INVALID_ARGUMENT;
953 }
954 start_mask = PAGE_MASK;
955 }
956
957 /*
958 * Currently the map_start is truncated using page mask from target_map
959 * when use_data_addr || use_4K_compat is false, while map_end uses
960 * PAGE_MASK. In order to maintain that behavior, we
961 * request for unaligned values and perform the truncing/rounding
962 * explicitly.
963 */
964 kr = vm_sanitize_addr_size(offset_u, size_u,
965 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY, PAGE_MASK,
966 VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH | VM_SANITIZE_FLAGS_GET_UNALIGNED_VALUES,
967 map_start, map_end, map_size);
968 if (__improbable(kr != KERN_SUCCESS)) {
969 return kr;
970 }
971
972 *map_start = vm_map_trunc_page_mask(*map_start, start_mask);
973 *map_end = vm_map_round_page_mask(*map_end, PAGE_MASK);
974 *map_size = *map_end - *map_start;
975
976 /*
977 * Additional checks to make sure explicitly computed aligned start and end
978 * still make sense.
979 */
980 if (__improbable(*map_end < *map_start) || (*map_end > parent_entry->size)) {
981 return KERN_INVALID_ARGUMENT;
982 }
983
984 /*
985 * Validate offset
986 */
987 kr = vm_sanitize_offset(offset_u, VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
988 *map_start, *map_end, offset);
989 if (__improbable(kr != KERN_SUCCESS)) {
990 return kr;
991 }
992
993 if (__improbable(os_add_overflow(parent_entry->offset, *map_start,
994 user_entry_offset))) {
995 return KERN_INVALID_ARGUMENT;
996 }
997
998 return KERN_SUCCESS;
999 }
1000
1001 static kern_return_t
mach_make_memory_entry_from_parent_entry(vm_map_t target_map,memory_object_size_ut * size_u,vm_map_offset_ut offset_u,vm_prot_t permission,ipc_port_t * object_handle,vm_named_entry_t parent_entry)1002 mach_make_memory_entry_from_parent_entry(
1003 vm_map_t target_map,
1004 memory_object_size_ut *size_u,
1005 vm_map_offset_ut offset_u,
1006 vm_prot_t permission,
1007 ipc_port_t *object_handle,
1008 vm_named_entry_t parent_entry)
1009 {
1010 vm_object_t object;
1011 unsigned int access;
1012 vm_prot_t protections;
1013 bool mask_protections;
1014 bool use_data_addr;
1015 bool use_4K_compat;
1016 vm_named_entry_t user_entry = NULL;
1017 kern_return_t kr;
1018 /*
1019 * Stash the offset in the page for use by vm_map_enter_mem_object()
1020 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
1021 */
1022 vm_object_offset_t offset_in_page;
1023 vm_map_offset_t map_start, map_end;
1024 vm_map_size_t map_size;
1025 vm_map_offset_t user_entry_offset, offset;
1026
1027 vm_memory_entry_decode_perm(permission, &access, &protections,
1028 &mask_protections, &use_data_addr, &use_4K_compat);
1029
1030 /*
1031 * Sanitize addr and size. Permimssions have been sanitized prior to
1032 * dispatch
1033 */
1034 kr = mach_make_memory_entry_from_parent_entry_sanitize(target_map,
1035 *size_u,
1036 offset_u,
1037 permission,
1038 parent_entry,
1039 &map_start,
1040 &map_end,
1041 &map_size,
1042 &offset,
1043 &user_entry_offset);
1044 if (__improbable(kr != KERN_SUCCESS)) {
1045 return mach_make_memory_entry_cleanup(kr, target_map,
1046 size_u, offset_u, permission, user_entry, object_handle);
1047 }
1048
1049 if (use_data_addr || use_4K_compat) {
1050 /*
1051 * submaps and pagers should only be accessible from within
1052 * the kernel, which shouldn't use the data address flag, so can fail here.
1053 */
1054 if (parent_entry->is_sub_map) {
1055 panic("Shouldn't be using data address with a parent entry that is a submap.");
1056 }
1057 }
1058
1059 if (mask_protections) {
1060 /*
1061 * The caller asked us to use the "protections" as
1062 * a mask, so restrict "protections" to what this
1063 * mapping actually allows.
1064 */
1065 protections &= parent_entry->protection;
1066 }
1067 if ((protections & parent_entry->protection) != protections) {
1068 return mach_make_memory_entry_cleanup(KERN_PROTECTION_FAILURE, target_map,
1069 size_u, offset_u, permission, user_entry, object_handle);
1070 }
1071
1072 offset_in_page = vm_memory_entry_get_offset_in_page(offset, map_start,
1073 use_data_addr, use_4K_compat);
1074
1075 user_entry = mach_memory_entry_allocate(object_handle);
1076 user_entry->size = map_size;
1077 user_entry->offset = user_entry_offset;
1078 user_entry->data_offset = offset_in_page;
1079 user_entry->is_sub_map = parent_entry->is_sub_map;
1080 user_entry->is_copy = parent_entry->is_copy;
1081 user_entry->protection = protections;
1082
1083 if (access != MAP_MEM_NOOP) {
1084 user_entry->access = access;
1085 }
1086
1087 if (parent_entry->is_sub_map) {
1088 vm_map_t map = parent_entry->backing.map;
1089 vm_map_reference(map);
1090 user_entry->backing.map = map;
1091 } else {
1092 object = vm_named_entry_to_vm_object(parent_entry);
1093 assert(object != VM_OBJECT_NULL);
1094 assert(object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC);
1095 vm_named_entry_associate_vm_object(
1096 user_entry,
1097 object,
1098 user_entry->offset,
1099 user_entry->size,
1100 (user_entry->protection & VM_PROT_ALL));
1101 assert(user_entry->is_object);
1102 /* we now point to this object, hold on */
1103 vm_object_lock(object);
1104 vm_object_reference_locked(object);
1105 #if VM_OBJECT_TRACKING_OP_TRUESHARE
1106 if (!object->true_share &&
1107 vm_object_tracking_btlog) {
1108 btlog_record(vm_object_tracking_btlog, object,
1109 VM_OBJECT_TRACKING_OP_TRUESHARE,
1110 btref_get(__builtin_frame_address(0), 0));
1111 }
1112 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
1113
1114 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
1115 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
1116 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1117 }
1118 vm_object_unlock(object);
1119 }
1120 *size_u = vm_sanitize_wrap_size(user_entry->size -
1121 user_entry->data_offset);
1122 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry "
1123 "%p kr 0x%x\n", target_map, offset, VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
1124 permission, user_entry, KERN_SUCCESS);
1125 return KERN_SUCCESS;
1126 }
1127
1128 static inline kern_return_t
mach_make_memory_entry_sanitize_perm(vm_prot_ut permission_u,vm_prot_t * permission)1129 mach_make_memory_entry_sanitize_perm(
1130 vm_prot_ut permission_u,
1131 vm_prot_t *permission)
1132 {
1133 return vm_sanitize_memory_entry_perm(permission_u,
1134 VM_SANITIZE_CALLER_MACH_MAKE_MEMORY_ENTRY,
1135 VM_SANITIZE_FLAGS_CHECK_USER_MEM_MAP_FLAGS,
1136 VM_PROT_IS_MASK, permission);
1137 }
1138
1139 kern_return_t
mach_make_memory_entry_internal(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,vm_named_entry_kernel_flags_t vmne_kflags,ipc_port_t * object_handle,ipc_port_t parent_handle)1140 mach_make_memory_entry_internal(
1141 vm_map_t target_map,
1142 memory_object_size_ut *size_u,
1143 memory_object_offset_ut offset_u,
1144 vm_prot_ut permission_u,
1145 vm_named_entry_kernel_flags_t vmne_kflags,
1146 ipc_port_t *object_handle,
1147 ipc_port_t parent_handle)
1148 {
1149 vm_named_entry_t user_entry __unused = NULL;
1150 vm_named_entry_t parent_entry;
1151 kern_return_t kr;
1152 vm_prot_t permission;
1153
1154 DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x\n",
1155 target_map, VM_SANITIZE_UNSAFE_UNWRAP(offset_u), VM_SANITIZE_UNSAFE_UNWRAP(*size_u),
1156 VM_SANITIZE_UNSAFE_UNWRAP(permission_u));
1157
1158 /*
1159 * Validate permissions as we need to dispatch the corresponding flavor
1160 */
1161 kr = mach_make_memory_entry_sanitize_perm(permission_u, &permission);
1162 if (__improbable(kr != KERN_SUCCESS)) {
1163 return mach_make_memory_entry_cleanup(kr, target_map,
1164 size_u, offset_u, permission, user_entry, object_handle);
1165 }
1166
1167 if (permission & MAP_MEM_LEDGER_TAGGED) {
1168 vmne_kflags.vmnekf_ledger_tag = VM_LEDGER_TAG_DEFAULT;
1169 }
1170
1171 parent_entry = mach_memory_entry_from_port(parent_handle);
1172 if (parent_entry && parent_entry->is_copy) {
1173 return mach_make_memory_entry_cleanup(KERN_INVALID_ARGUMENT, target_map,
1174 size_u, offset_u, permission, user_entry, object_handle);
1175 }
1176
1177 if (permission & MAP_MEM_ONLY) {
1178 return mach_make_memory_entry_mem_only(target_map, size_u, offset_u,
1179 permission, object_handle, parent_entry);
1180 }
1181
1182 if (permission & MAP_MEM_NAMED_CREATE) {
1183 return mach_make_memory_entry_named_create(target_map, size_u, offset_u,
1184 permission, vmne_kflags, object_handle);
1185 }
1186
1187 if (permission & MAP_MEM_VM_COPY) {
1188 return mach_make_memory_entry_copy(target_map, size_u, offset_u,
1189 permission, object_handle);
1190 }
1191
1192 if ((permission & MAP_MEM_VM_SHARE)
1193 || parent_entry == NULL
1194 || (permission & MAP_MEM_NAMED_REUSE)) {
1195 return mach_make_memory_entry_share(target_map, size_u, offset_u,
1196 permission, object_handle, parent_handle, parent_entry);
1197 }
1198
1199 /*
1200 * This function will compute map start, end and size by including the
1201 * parent entry's offset. Therefore redo validation.
1202 */
1203 return mach_make_memory_entry_from_parent_entry(target_map, size_u,
1204 offset_u, permission, object_handle, parent_entry);
1205 }
1206
1207 kern_return_t
_mach_make_memory_entry(vm_map_t target_map,memory_object_size_ut * size_u,memory_object_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_entry)1208 _mach_make_memory_entry(
1209 vm_map_t target_map,
1210 memory_object_size_ut *size_u,
1211 memory_object_offset_ut offset_u,
1212 vm_prot_ut permission_u,
1213 ipc_port_t *object_handle,
1214 ipc_port_t parent_entry)
1215 {
1216 return mach_make_memory_entry_64(target_map, size_u,
1217 offset_u, permission_u, object_handle, parent_entry);
1218 }
1219
1220 kern_return_t
mach_make_memory_entry(vm_map_t target_map,vm_size_ut * size_u,vm_offset_ut offset_u,vm_prot_ut permission_u,ipc_port_t * object_handle,ipc_port_t parent_entry)1221 mach_make_memory_entry(
1222 vm_map_t target_map,
1223 vm_size_ut *size_u,
1224 vm_offset_ut offset_u,
1225 vm_prot_ut permission_u,
1226 ipc_port_t *object_handle,
1227 ipc_port_t parent_entry)
1228 {
1229 kern_return_t kr;
1230
1231 kr = mach_make_memory_entry_64(target_map, size_u,
1232 offset_u, permission_u, object_handle, parent_entry);
1233 return kr;
1234 }
1235
1236 __private_extern__ vm_named_entry_t
mach_memory_entry_allocate(ipc_port_t * user_handle_p)1237 mach_memory_entry_allocate(ipc_port_t *user_handle_p)
1238 {
1239 vm_named_entry_t user_entry;
1240
1241 user_entry = kalloc_type(struct vm_named_entry,
1242 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1243 named_entry_lock_init(user_entry);
1244
1245 *user_handle_p = ipc_kobject_alloc_port((ipc_kobject_t)user_entry,
1246 IKOT_NAMED_ENTRY,
1247 IPC_KOBJECT_ALLOC_MAKE_SEND | IPC_KOBJECT_ALLOC_NSREQUEST);
1248
1249 #if VM_NAMED_ENTRY_DEBUG
1250 /* backtrace at allocation time, for debugging only */
1251 user_entry->named_entry_bt = btref_get(__builtin_frame_address(0), 0);
1252 #endif /* VM_NAMED_ENTRY_DEBUG */
1253 return user_entry;
1254 }
1255
1256 static __attribute__((always_inline, warn_unused_result))
1257 kern_return_t
mach_memory_object_memory_entry_64_sanitize(vm_object_size_ut size_u,vm_prot_ut permission_u,vm_object_size_t * size,vm_prot_t * permission)1258 mach_memory_object_memory_entry_64_sanitize(
1259 vm_object_size_ut size_u,
1260 vm_prot_ut permission_u,
1261 vm_object_size_t *size,
1262 vm_prot_t *permission)
1263 {
1264 kern_return_t kr;
1265
1266 kr = vm_sanitize_object_size(size_u,
1267 VM_SANITIZE_CALLER_MACH_MEMORY_OBJECT_MEMORY_ENTRY,
1268 VM_SANITIZE_FLAGS_SIZE_ZERO_FAILS, size);
1269 if (__improbable(kr != KERN_SUCCESS)) {
1270 return kr;
1271 }
1272 kr = vm_sanitize_memory_entry_perm(permission_u,
1273 VM_SANITIZE_CALLER_MACH_MEMORY_OBJECT_MEMORY_ENTRY,
1274 VM_SANITIZE_FLAGS_NONE, VM_PROT_NONE,
1275 permission);
1276 if (__improbable(kr != KERN_SUCCESS)) {
1277 return kr;
1278 }
1279
1280 return KERN_SUCCESS;
1281 }
1282
1283 /*
1284 * mach_memory_object_memory_entry_64
1285 *
1286 * Create a named entry backed by the provided pager.
1287 *
1288 */
1289 kern_return_t
mach_memory_object_memory_entry_64(host_t host,boolean_t internal,vm_object_size_ut size_u,vm_prot_ut permission_u,memory_object_t pager,ipc_port_t * entry_handle)1290 mach_memory_object_memory_entry_64(
1291 host_t host,
1292 boolean_t internal,
1293 vm_object_size_ut size_u,
1294 vm_prot_ut permission_u,
1295 memory_object_t pager,
1296 ipc_port_t *entry_handle)
1297 {
1298 vm_named_entry_t user_entry;
1299 ipc_port_t user_handle;
1300 vm_object_t object;
1301 vm_object_size_t size;
1302 vm_prot_t permission;
1303 kern_return_t kr;
1304
1305 if (host == HOST_NULL) {
1306 return KERN_INVALID_HOST;
1307 }
1308
1309 /*
1310 * Validate size and permission
1311 */
1312 kr = mach_memory_object_memory_entry_64_sanitize(size_u,
1313 permission_u,
1314 &size,
1315 &permission);
1316 if (__improbable(kr != KERN_SUCCESS)) {
1317 return vm_sanitize_get_kr(kr);
1318 }
1319
1320 if (pager == MEMORY_OBJECT_NULL && internal) {
1321 object = vm_object_allocate(size);
1322 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
1323 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1324 }
1325 } else {
1326 object = memory_object_to_vm_object(pager);
1327 if (object != VM_OBJECT_NULL) {
1328 vm_object_reference(object);
1329 }
1330 }
1331 if (object == VM_OBJECT_NULL) {
1332 return KERN_INVALID_ARGUMENT;
1333 }
1334
1335 user_entry = mach_memory_entry_allocate(&user_handle);
1336 user_entry->size = size;
1337 user_entry->offset = 0;
1338 user_entry->protection = permission & VM_PROT_ALL;
1339 user_entry->access = GET_MAP_MEM(permission);
1340 user_entry->is_sub_map = FALSE;
1341
1342 vm_named_entry_associate_vm_object(user_entry, object, 0, size,
1343 (user_entry->protection & VM_PROT_ALL));
1344 user_entry->internal = object->internal;
1345 assert(object->internal == internal);
1346 if (VM_OBJECT_OWNER(object) != TASK_NULL) {
1347 /* all memory in this entry is "owned" */
1348 user_entry->is_fully_owned = TRUE;
1349 } else if (object && !object->internal) {
1350 /* external objects can become "owned" */
1351 user_entry->is_fully_owned = TRUE;
1352 }
1353
1354 *entry_handle = user_handle;
1355 return KERN_SUCCESS;
1356 }
1357
1358 kern_return_t
mach_memory_object_memory_entry(host_t host,boolean_t internal,vm_size_ut size_u,vm_prot_ut permission_u,memory_object_t pager,ipc_port_t * entry_handle)1359 mach_memory_object_memory_entry(
1360 host_t host,
1361 boolean_t internal,
1362 vm_size_ut size_u,
1363 vm_prot_ut permission_u,
1364 memory_object_t pager,
1365 ipc_port_t *entry_handle)
1366 {
1367 return mach_memory_object_memory_entry_64( host, internal,
1368 size_u, permission_u, pager, entry_handle);
1369 }
1370
1371 kern_return_t
mach_memory_entry_purgable_control(ipc_port_t entry_port,vm_purgable_t control,int * state)1372 mach_memory_entry_purgable_control(
1373 ipc_port_t entry_port,
1374 vm_purgable_t control,
1375 int *state)
1376 {
1377 if (control == VM_PURGABLE_SET_STATE_FROM_KERNEL) {
1378 /* not allowed from user-space */
1379 return KERN_INVALID_ARGUMENT;
1380 }
1381
1382 return memory_entry_purgeable_control_internal(entry_port, control, state);
1383 }
1384
1385 kern_return_t
memory_entry_purgeable_control_internal(ipc_port_t entry_port,vm_purgable_t control,int * state)1386 memory_entry_purgeable_control_internal(
1387 ipc_port_t entry_port,
1388 vm_purgable_t control,
1389 int *state)
1390 {
1391 kern_return_t kr;
1392 vm_named_entry_t mem_entry;
1393 vm_object_t object;
1394
1395 mem_entry = mach_memory_entry_from_port(entry_port);
1396 if (mem_entry == NULL) {
1397 return KERN_INVALID_ARGUMENT;
1398 }
1399
1400 if (control != VM_PURGABLE_SET_STATE &&
1401 control != VM_PURGABLE_GET_STATE &&
1402 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
1403 return KERN_INVALID_ARGUMENT;
1404 }
1405
1406 if ((control == VM_PURGABLE_SET_STATE ||
1407 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
1408 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
1409 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
1410 return KERN_INVALID_ARGUMENT;
1411 }
1412
1413 named_entry_lock(mem_entry);
1414
1415 if (mem_entry->is_sub_map ||
1416 mem_entry->is_copy) {
1417 named_entry_unlock(mem_entry);
1418 return KERN_INVALID_ARGUMENT;
1419 }
1420
1421 assert(mem_entry->is_object);
1422 object = vm_named_entry_to_vm_object(mem_entry);
1423 if (object == VM_OBJECT_NULL) {
1424 named_entry_unlock(mem_entry);
1425 return KERN_INVALID_ARGUMENT;
1426 }
1427
1428 vm_object_lock(object);
1429
1430 /* check that named entry covers entire object ? */
1431 if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) {
1432 vm_object_unlock(object);
1433 named_entry_unlock(mem_entry);
1434 return KERN_INVALID_ARGUMENT;
1435 }
1436
1437 named_entry_unlock(mem_entry);
1438
1439 kr = vm_object_purgable_control(object, control, state);
1440
1441 vm_object_unlock(object);
1442
1443 return kr;
1444 }
1445
1446 static kern_return_t
memory_entry_access_tracking_internal(ipc_port_t entry_port,int * access_tracking,uint32_t * access_tracking_reads,uint32_t * access_tracking_writes)1447 memory_entry_access_tracking_internal(
1448 ipc_port_t entry_port,
1449 int *access_tracking,
1450 uint32_t *access_tracking_reads,
1451 uint32_t *access_tracking_writes)
1452 {
1453 vm_named_entry_t mem_entry;
1454 vm_object_t object;
1455 kern_return_t kr;
1456
1457 mem_entry = mach_memory_entry_from_port(entry_port);
1458 if (mem_entry == NULL) {
1459 return KERN_INVALID_ARGUMENT;
1460 }
1461
1462 named_entry_lock(mem_entry);
1463
1464 if (mem_entry->is_sub_map ||
1465 mem_entry->is_copy) {
1466 named_entry_unlock(mem_entry);
1467 return KERN_INVALID_ARGUMENT;
1468 }
1469
1470 assert(mem_entry->is_object);
1471 object = vm_named_entry_to_vm_object(mem_entry);
1472 if (object == VM_OBJECT_NULL) {
1473 named_entry_unlock(mem_entry);
1474 return KERN_INVALID_ARGUMENT;
1475 }
1476
1477 #if VM_OBJECT_ACCESS_TRACKING
1478 vm_object_access_tracking(object,
1479 access_tracking,
1480 access_tracking_reads,
1481 access_tracking_writes);
1482 kr = KERN_SUCCESS;
1483 #else /* VM_OBJECT_ACCESS_TRACKING */
1484 (void) access_tracking;
1485 (void) access_tracking_reads;
1486 (void) access_tracking_writes;
1487 kr = KERN_NOT_SUPPORTED;
1488 #endif /* VM_OBJECT_ACCESS_TRACKING */
1489
1490 named_entry_unlock(mem_entry);
1491
1492 return kr;
1493 }
1494
1495 kern_return_t
mach_memory_entry_access_tracking(ipc_port_t entry_port,int * access_tracking,uint32_t * access_tracking_reads,uint32_t * access_tracking_writes)1496 mach_memory_entry_access_tracking(
1497 ipc_port_t entry_port,
1498 int *access_tracking,
1499 uint32_t *access_tracking_reads,
1500 uint32_t *access_tracking_writes)
1501 {
1502 return memory_entry_access_tracking_internal(entry_port,
1503 access_tracking,
1504 access_tracking_reads,
1505 access_tracking_writes);
1506 }
1507
1508 #if DEVELOPMENT || DEBUG
1509 /* For dtrace probe in mach_memory_entry_ownership */
1510 extern int proc_selfpid(void);
1511 extern char *proc_name_address(void *p);
1512 #endif /* DEVELOPMENT || DEBUG */
1513
1514 /* Kernel call only, MIG uses *_from_user() below */
1515 kern_return_t
mach_memory_entry_ownership(ipc_port_t entry_port,task_t owner,int ledger_tag,int ledger_flags)1516 mach_memory_entry_ownership(
1517 ipc_port_t entry_port,
1518 task_t owner,
1519 int ledger_tag,
1520 int ledger_flags)
1521 {
1522 task_t cur_task;
1523 kern_return_t kr;
1524 vm_named_entry_t mem_entry;
1525 vm_object_t object;
1526
1527 if (ledger_flags & ~VM_LEDGER_FLAGS_ALL) {
1528 /* reject unexpected flags */
1529 return KERN_INVALID_ARGUMENT;
1530 }
1531
1532 cur_task = current_task();
1533 if (cur_task == kernel_task) {
1534 /* kernel thread: no entitlement needed */
1535 } else if (ledger_flags & VM_LEDGER_FLAG_FROM_KERNEL) {
1536 /* call is from trusted kernel code: no entitlement needed */
1537 } else if ((owner != cur_task && owner != TASK_NULL) ||
1538 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT) ||
1539 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) ||
1540 ledger_tag == VM_LEDGER_TAG_NETWORK) {
1541 bool transfer_ok = false;
1542
1543 /*
1544 * An entitlement is required to:
1545 * + tranfer memory ownership to someone else,
1546 * + request that the memory not count against the footprint,
1547 * + tag as "network" (since that implies "no footprint")
1548 *
1549 * Exception: task with task_no_footprint_for_debug == 1 on internal build
1550 */
1551 if (!cur_task->task_can_transfer_memory_ownership &&
1552 IOCurrentTaskHasEntitlement("com.apple.private.memory.ownership_transfer")) {
1553 cur_task->task_can_transfer_memory_ownership = TRUE;
1554 }
1555 if (cur_task->task_can_transfer_memory_ownership) {
1556 /* we're allowed to transfer ownership to any task */
1557 transfer_ok = true;
1558 }
1559 #if DEVELOPMENT || DEBUG
1560 if (!transfer_ok &&
1561 ledger_tag == VM_LEDGER_TAG_DEFAULT &&
1562 (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) &&
1563 cur_task->task_no_footprint_for_debug) {
1564 int to_panic = 0;
1565 static bool init_bootarg = false;
1566
1567 /*
1568 * Allow performance tools running on internal builds to hide memory usage from phys_footprint even
1569 * WITHOUT an entitlement. This can be enabled by per task sysctl vm.task_no_footprint_for_debug=1
1570 * with the ledger tag VM_LEDGER_TAG_DEFAULT and flag VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG.
1571 *
1572 * If the boot-arg "panic_on_no_footprint_for_debug" is set, the kernel will
1573 * panic here in order to detect any abuse of this feature, which is intended solely for
1574 * memory debugging purpose.
1575 */
1576 if (!init_bootarg) {
1577 PE_parse_boot_argn("panic_on_no_footprint_for_debug", &to_panic, sizeof(to_panic));
1578 init_bootarg = true;
1579 }
1580 if (to_panic) {
1581 panic("%s: panic_on_no_footprint_for_debug is triggered by pid %d procname %s", __func__, proc_selfpid(), get_bsdtask_info(cur_task)? proc_name_address(get_bsdtask_info(cur_task)) : "?");
1582 }
1583
1584 /*
1585 * Flushing out user space processes using this interface:
1586 * $ dtrace -n 'task_no_footprint_for_debug {printf("%d[%s]\n", pid, execname); stack(); ustack();}'
1587 */
1588 DTRACE_VM(task_no_footprint_for_debug);
1589 transfer_ok = true;
1590 }
1591 #endif /* DEVELOPMENT || DEBUG */
1592 if (!transfer_ok) {
1593 #define TRANSFER_ENTITLEMENT_MAX_LENGTH 1024 /* XXX ? */
1594 const char *our_id, *their_id;
1595 our_id = IOTaskGetEntitlement(current_task(), "com.apple.developer.memory.transfer-send");
1596 their_id = IOTaskGetEntitlement(owner, "com.apple.developer.memory.transfer-accept");
1597 if (our_id && their_id &&
1598 !strncmp(our_id, their_id, TRANSFER_ENTITLEMENT_MAX_LENGTH)) {
1599 /* allow transfer between tasks that have matching entitlements */
1600 if (strnlen(our_id, TRANSFER_ENTITLEMENT_MAX_LENGTH) < TRANSFER_ENTITLEMENT_MAX_LENGTH &&
1601 strnlen(their_id, TRANSFER_ENTITLEMENT_MAX_LENGTH) < TRANSFER_ENTITLEMENT_MAX_LENGTH) {
1602 transfer_ok = true;
1603 } else {
1604 /* complain about entitlement(s) being too long... */
1605 assertf((strlen(our_id) <= TRANSFER_ENTITLEMENT_MAX_LENGTH &&
1606 strlen(their_id) <= TRANSFER_ENTITLEMENT_MAX_LENGTH),
1607 "our_id:%lu their_id:%lu",
1608 strlen(our_id), strlen(their_id));
1609 }
1610 }
1611 }
1612 if (!transfer_ok) {
1613 /* transfer denied */
1614 return KERN_NO_ACCESS;
1615 }
1616
1617 if (ledger_flags & VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG) {
1618 /*
1619 * We've made it past the checks above, so we either
1620 * have the entitlement or the sysctl.
1621 * Convert to VM_LEDGER_FLAG_NO_FOOTPRINT.
1622 */
1623 ledger_flags &= ~VM_LEDGER_FLAG_NO_FOOTPRINT_FOR_DEBUG;
1624 ledger_flags |= VM_LEDGER_FLAG_NO_FOOTPRINT;
1625 }
1626 }
1627
1628 if (ledger_tag == VM_LEDGER_TAG_UNCHANGED) {
1629 /* leave "ledger_tag" unchanged */
1630 } else if (ledger_tag < 0 ||
1631 ledger_tag > VM_LEDGER_TAG_MAX) {
1632 return KERN_INVALID_ARGUMENT;
1633 }
1634 if (owner == TASK_NULL) {
1635 /* leave "owner" unchanged */
1636 owner = VM_OBJECT_OWNER_UNCHANGED;
1637 }
1638
1639 mem_entry = mach_memory_entry_from_port(entry_port);
1640 if (mem_entry == NULL) {
1641 return KERN_INVALID_ARGUMENT;
1642 }
1643
1644 named_entry_lock(mem_entry);
1645
1646 if (mem_entry->is_sub_map ||
1647 !mem_entry->is_fully_owned) {
1648 named_entry_unlock(mem_entry);
1649 return KERN_INVALID_ARGUMENT;
1650 }
1651
1652 if (mem_entry->is_object) {
1653 object = vm_named_entry_to_vm_object(mem_entry);
1654 if (object == VM_OBJECT_NULL) {
1655 named_entry_unlock(mem_entry);
1656 return KERN_INVALID_ARGUMENT;
1657 }
1658 vm_object_lock(object);
1659 if (object->internal) {
1660 /* check that named entry covers entire object ? */
1661 if (mem_entry->offset != 0 ||
1662 object->vo_size != mem_entry->size) {
1663 vm_object_unlock(object);
1664 named_entry_unlock(mem_entry);
1665 return KERN_INVALID_ARGUMENT;
1666 }
1667 }
1668 named_entry_unlock(mem_entry);
1669 kr = vm_object_ownership_change(object,
1670 ledger_tag,
1671 owner,
1672 ledger_flags,
1673 FALSE); /* task_objq_locked */
1674 vm_object_unlock(object);
1675 } else if (mem_entry->is_copy) {
1676 vm_map_copy_t copy;
1677 vm_map_entry_t entry;
1678
1679 copy = mem_entry->backing.copy;
1680 named_entry_unlock(mem_entry);
1681 for (entry = vm_map_copy_first_entry(copy);
1682 entry != vm_map_copy_to_entry(copy);
1683 entry = entry->vme_next) {
1684 object = VME_OBJECT(entry);
1685 if (entry->is_sub_map ||
1686 object == VM_OBJECT_NULL) {
1687 kr = KERN_INVALID_ARGUMENT;
1688 break;
1689 }
1690 vm_object_lock(object);
1691 if (object->internal) {
1692 if (VME_OFFSET(entry) != 0 ||
1693 entry->vme_end - entry->vme_start != object->vo_size) {
1694 vm_object_unlock(object);
1695 kr = KERN_INVALID_ARGUMENT;
1696 break;
1697 }
1698 }
1699 kr = vm_object_ownership_change(object,
1700 ledger_tag,
1701 owner,
1702 ledger_flags,
1703 FALSE); /* task_objq_locked */
1704 vm_object_unlock(object);
1705 if (kr != KERN_SUCCESS) {
1706 kr = KERN_INVALID_ARGUMENT;
1707 break;
1708 }
1709 }
1710 } else {
1711 named_entry_unlock(mem_entry);
1712 return KERN_INVALID_ARGUMENT;
1713 }
1714
1715 return kr;
1716 }
1717
1718 /* MIG call from userspace */
1719 kern_return_t
mach_memory_entry_ownership_from_user(ipc_port_t entry_port,mach_port_t owner_port,int ledger_tag,int ledger_flags)1720 mach_memory_entry_ownership_from_user(
1721 ipc_port_t entry_port,
1722 mach_port_t owner_port,
1723 int ledger_tag,
1724 int ledger_flags)
1725 {
1726 task_t owner = TASK_NULL;
1727 kern_return_t kr;
1728
1729 if (ledger_flags & ~VM_LEDGER_FLAGS_USER) {
1730 return KERN_INVALID_ARGUMENT;
1731 }
1732
1733 if (IP_VALID(owner_port)) {
1734 if (ip_kotype(owner_port) == IKOT_TASK_ID_TOKEN) {
1735 task_id_token_t token = convert_port_to_task_id_token(owner_port);
1736 (void)task_identity_token_get_task_grp(token, &owner, TASK_GRP_MIG);
1737 task_id_token_release(token);
1738 /* token ref released */
1739 } else {
1740 owner = convert_port_to_task_mig(owner_port);
1741 }
1742 }
1743 /* hold task ref on owner (Nullable) */
1744
1745 if (owner && task_is_a_corpse(owner)) {
1746 /* identity token can represent a corpse, disallow it */
1747 task_deallocate_mig(owner);
1748 owner = TASK_NULL;
1749 }
1750
1751 /* mach_memory_entry_ownership() will handle TASK_NULL owner */
1752 kr = mach_memory_entry_ownership(entry_port, owner, /* Nullable */
1753 ledger_tag, ledger_flags);
1754
1755 if (owner) {
1756 task_deallocate_mig(owner);
1757 }
1758
1759 if (kr == KERN_SUCCESS) {
1760 /* MIG rule, consume port right on success */
1761 ipc_port_release_send(owner_port);
1762 }
1763 return kr;
1764 }
1765
1766 kern_return_t
mach_memory_entry_get_page_counts(ipc_port_t entry_port,unsigned int * resident_page_count,unsigned int * dirty_page_count)1767 mach_memory_entry_get_page_counts(
1768 ipc_port_t entry_port,
1769 unsigned int *resident_page_count,
1770 unsigned int *dirty_page_count)
1771 {
1772 kern_return_t kr;
1773 vm_named_entry_t mem_entry;
1774 vm_object_t object;
1775 vm_object_offset_t offset;
1776 vm_object_size_t size;
1777
1778 mem_entry = mach_memory_entry_from_port(entry_port);
1779 if (mem_entry == NULL) {
1780 return KERN_INVALID_ARGUMENT;
1781 }
1782
1783 named_entry_lock(mem_entry);
1784
1785 if (mem_entry->is_sub_map ||
1786 mem_entry->is_copy) {
1787 named_entry_unlock(mem_entry);
1788 return KERN_INVALID_ARGUMENT;
1789 }
1790
1791 assert(mem_entry->is_object);
1792 object = vm_named_entry_to_vm_object(mem_entry);
1793 if (object == VM_OBJECT_NULL) {
1794 named_entry_unlock(mem_entry);
1795 return KERN_INVALID_ARGUMENT;
1796 }
1797
1798 vm_object_lock(object);
1799
1800 offset = mem_entry->offset;
1801 size = mem_entry->size;
1802 size = vm_object_round_page(offset + size) - vm_object_trunc_page(offset);
1803 offset = vm_object_trunc_page(offset);
1804
1805 named_entry_unlock(mem_entry);
1806
1807 kr = vm_object_get_page_counts(object, offset, size, resident_page_count, dirty_page_count);
1808
1809 vm_object_unlock(object);
1810
1811 return kr;
1812 }
1813
1814 kern_return_t
mach_memory_entry_phys_page_offset(ipc_port_t entry_port,vm_object_offset_t * offset_p)1815 mach_memory_entry_phys_page_offset(
1816 ipc_port_t entry_port,
1817 vm_object_offset_t *offset_p)
1818 {
1819 vm_named_entry_t mem_entry;
1820 vm_object_t object;
1821 vm_object_offset_t offset;
1822 vm_object_offset_t data_offset;
1823
1824 mem_entry = mach_memory_entry_from_port(entry_port);
1825 if (mem_entry == NULL) {
1826 return KERN_INVALID_ARGUMENT;
1827 }
1828
1829 named_entry_lock(mem_entry);
1830
1831 if (mem_entry->is_sub_map ||
1832 mem_entry->is_copy) {
1833 named_entry_unlock(mem_entry);
1834 return KERN_INVALID_ARGUMENT;
1835 }
1836
1837 assert(mem_entry->is_object);
1838 object = vm_named_entry_to_vm_object(mem_entry);
1839 if (object == VM_OBJECT_NULL) {
1840 named_entry_unlock(mem_entry);
1841 return KERN_INVALID_ARGUMENT;
1842 }
1843
1844 offset = mem_entry->offset;
1845 data_offset = mem_entry->data_offset;
1846
1847 named_entry_unlock(mem_entry);
1848
1849 *offset_p = offset - vm_object_trunc_page(offset) + data_offset;
1850 return KERN_SUCCESS;
1851 }
1852
1853 static inline kern_return_t
mach_memory_entry_map_size_sanitize_locked(vm_map_t map,memory_object_offset_ut * offset_u,memory_object_size_ut size_u,vm_named_entry_t mem_entry,memory_object_offset_t * offset,memory_object_offset_t * end,mach_vm_size_t * map_size)1854 mach_memory_entry_map_size_sanitize_locked(
1855 vm_map_t map,
1856 memory_object_offset_ut *offset_u,
1857 memory_object_size_ut size_u,
1858 vm_named_entry_t mem_entry,
1859 memory_object_offset_t *offset,
1860 memory_object_offset_t *end,
1861 mach_vm_size_t *map_size)
1862 {
1863 kern_return_t kr;
1864
1865 if (mem_entry->is_object ||
1866 (mem_entry->is_copy &&
1867 (VM_MAP_COPY_PAGE_MASK(mem_entry->backing.copy) ==
1868 VM_MAP_PAGE_MASK(map)))) {
1869 if (__improbable(vm_sanitize_add_overflow(*offset_u, mem_entry->offset,
1870 offset_u))) {
1871 return KERN_INVALID_ARGUMENT;
1872 }
1873 }
1874
1875 if (__improbable(vm_sanitize_add_overflow(*offset_u, mem_entry->data_offset,
1876 offset_u))) {
1877 return KERN_INVALID_ARGUMENT;
1878 }
1879
1880 kr = vm_sanitize_addr_size(*offset_u, size_u,
1881 VM_SANITIZE_CALLER_MACH_MEMORY_ENTRY_MAP_SIZE, map,
1882 VM_SANITIZE_FLAGS_SIZE_ZERO_FALLTHROUGH, offset, end, map_size);
1883 if (__improbable(kr != KERN_SUCCESS)) {
1884 return vm_sanitize_get_kr(kr);
1885 }
1886
1887 return KERN_SUCCESS;
1888 }
1889
1890 kern_return_t
mach_memory_entry_map_size(ipc_port_t entry_port,vm_map_t map,memory_object_offset_ut offset_u,memory_object_size_ut size_u,mach_vm_size_t * map_size_out)1891 mach_memory_entry_map_size(
1892 ipc_port_t entry_port,
1893 vm_map_t map,
1894 memory_object_offset_ut offset_u,
1895 memory_object_size_ut size_u,
1896 mach_vm_size_t *map_size_out)
1897 {
1898 vm_named_entry_t mem_entry;
1899 vm_object_t object;
1900 vm_map_copy_t copy_map, target_copy_map;
1901 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
1902 kern_return_t kr;
1903 memory_object_offset_t offset;
1904 memory_object_offset_t end;
1905 mach_vm_size_t map_size;
1906
1907 *map_size_out = 0;
1908
1909 mem_entry = mach_memory_entry_from_port(entry_port);
1910 if (mem_entry == NULL) {
1911 return KERN_INVALID_ARGUMENT;
1912 }
1913
1914 named_entry_lock(mem_entry);
1915
1916 if (mem_entry->is_sub_map) {
1917 named_entry_unlock(mem_entry);
1918 return KERN_INVALID_ARGUMENT;
1919 }
1920
1921 /*
1922 * Sanitize offset and size before use
1923 */
1924 kr = mach_memory_entry_map_size_sanitize_locked(map,
1925 &offset_u,
1926 size_u,
1927 mem_entry,
1928 &offset,
1929 &end,
1930 &map_size);
1931 if (__improbable(kr != KERN_SUCCESS)) {
1932 named_entry_unlock(mem_entry);
1933 return kr;
1934 }
1935
1936 if (mem_entry->is_object) {
1937 object = vm_named_entry_to_vm_object(mem_entry);
1938 if (object == VM_OBJECT_NULL) {
1939 named_entry_unlock(mem_entry);
1940 return KERN_INVALID_ARGUMENT;
1941 }
1942
1943 named_entry_unlock(mem_entry);
1944 *map_size_out = map_size;
1945 return KERN_SUCCESS;
1946 }
1947
1948 if (!mem_entry->is_copy) {
1949 panic("unsupported type of mem_entry %p", mem_entry);
1950 }
1951
1952 assert(mem_entry->is_copy);
1953 if (VM_MAP_COPY_PAGE_MASK(mem_entry->backing.copy) == VM_MAP_PAGE_MASK(map)) {
1954 DEBUG4K_SHARE("map %p (%d) mem_entry %p offset 0x%llx + 0x%llx + 0x%llx size 0x%llx -> map_size 0x%llx\n", map, VM_MAP_PAGE_MASK(map), mem_entry, mem_entry->offset, mem_entry->data_offset, offset, VM_SANITIZE_UNSAFE_UNWRAP(size_u), map_size);
1955 named_entry_unlock(mem_entry);
1956 *map_size_out = map_size;
1957 return KERN_SUCCESS;
1958 }
1959
1960 DEBUG4K_SHARE("mem_entry %p copy %p (%d) map %p (%d) offset 0x%llx size 0x%llx\n", mem_entry, mem_entry->backing.copy, VM_MAP_COPY_PAGE_SHIFT(mem_entry->backing.copy), map, VM_MAP_PAGE_SHIFT(map), offset, VM_SANITIZE_UNSAFE_UNWRAP(size_u));
1961 copy_map = mem_entry->backing.copy;
1962 target_copy_map = VM_MAP_COPY_NULL;
1963 DEBUG4K_ADJUST("adjusting...\n");
1964 kr = vm_map_copy_adjust_to_target(copy_map,
1965 offset_u,
1966 size_u,
1967 map,
1968 FALSE,
1969 &target_copy_map,
1970 &overmap_start,
1971 &overmap_end,
1972 &trimmed_start);
1973 if (kr == KERN_SUCCESS) {
1974 if (target_copy_map->size != copy_map->size) {
1975 DEBUG4K_ADJUST("copy %p (%d) map %p (%d) offset 0x%llx size 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx map_size 0x%llx -> 0x%llx\n", copy_map, VM_MAP_COPY_PAGE_SHIFT(copy_map), map, VM_MAP_PAGE_SHIFT(map), (uint64_t)offset, (uint64_t)VM_SANITIZE_UNSAFE_UNWRAP(size_u), (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)copy_map->size, (uint64_t)target_copy_map->size);
1976 }
1977 *map_size_out = target_copy_map->size;
1978 if (target_copy_map != copy_map) {
1979 vm_map_copy_discard(target_copy_map);
1980 }
1981 target_copy_map = VM_MAP_COPY_NULL;
1982 }
1983 named_entry_unlock(mem_entry);
1984 return kr;
1985 }
1986
1987 /*
1988 * mach_memory_entry_port_release:
1989 *
1990 * Release a send right on a named entry port. This is the correct
1991 * way to destroy a named entry. When the last right on the port is
1992 * released, mach_memory_entry_no_senders() willl be called.
1993 */
1994 void
mach_memory_entry_port_release(ipc_port_t port)1995 mach_memory_entry_port_release(
1996 ipc_port_t port)
1997 {
1998 assert(ip_kotype(port) == IKOT_NAMED_ENTRY);
1999 ipc_port_release_send(port);
2000 }
2001
2002 vm_named_entry_t
mach_memory_entry_from_port(ipc_port_t port)2003 mach_memory_entry_from_port(ipc_port_t port)
2004 {
2005 if (IP_VALID(port)) {
2006 return ipc_kobject_get_stable(port, IKOT_NAMED_ENTRY);
2007 }
2008 return NULL;
2009 }
2010
2011 /*
2012 * mach_memory_entry_no_senders:
2013 *
2014 * Destroys the memory entry associated with a mach port.
2015 * Memory entries have the exact same lifetime as their owning port.
2016 *
2017 * Releasing a memory entry is done by calling
2018 * mach_memory_entry_port_release() on its owning port.
2019 */
2020 static void
mach_memory_entry_no_senders(ipc_port_t port,mach_port_mscount_t mscount)2021 mach_memory_entry_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
2022 {
2023 vm_named_entry_t named_entry;
2024
2025 named_entry = ipc_kobject_dealloc_port(port, mscount, IKOT_NAMED_ENTRY);
2026
2027 if (named_entry->is_sub_map) {
2028 vm_map_deallocate(named_entry->backing.map);
2029 } else if (named_entry->is_copy) {
2030 vm_map_copy_discard(named_entry->backing.copy);
2031 } else if (named_entry->is_object) {
2032 assert(named_entry->backing.copy->cpy_hdr.nentries == 1);
2033 vm_map_copy_discard(named_entry->backing.copy);
2034 } else {
2035 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
2036 }
2037
2038 #if VM_NAMED_ENTRY_DEBUG
2039 btref_put(named_entry->named_entry_bt);
2040 #endif /* VM_NAMED_ENTRY_DEBUG */
2041
2042 named_entry_lock_destroy(named_entry);
2043 kfree_type(struct vm_named_entry, named_entry);
2044 }
2045
2046 #if XNU_PLATFORM_MacOSX
2047 /* Allow manipulation of individual page state. This is actually part of */
2048 /* the UPL regimen but takes place on the memory entry rather than on a UPL */
2049
2050 kern_return_t
mach_memory_entry_page_op(ipc_port_t entry_port,vm_object_offset_ut offset_u,int ops,ppnum_t * phys_entry,int * flags)2051 mach_memory_entry_page_op(
2052 ipc_port_t entry_port,
2053 vm_object_offset_ut offset_u,
2054 int ops,
2055 ppnum_t *phys_entry,
2056 int *flags)
2057 {
2058 vm_named_entry_t mem_entry;
2059 vm_object_t object;
2060 kern_return_t kr;
2061 /*
2062 * Unwrap offset as no mathematical operations are
2063 * performed on it.
2064 */
2065 vm_object_offset_t offset = VM_SANITIZE_UNSAFE_UNWRAP(offset_u);
2066
2067 mem_entry = mach_memory_entry_from_port(entry_port);
2068 if (mem_entry == NULL) {
2069 return KERN_INVALID_ARGUMENT;
2070 }
2071
2072 named_entry_lock(mem_entry);
2073
2074 if (mem_entry->is_sub_map ||
2075 mem_entry->is_copy) {
2076 named_entry_unlock(mem_entry);
2077 return KERN_INVALID_ARGUMENT;
2078 }
2079
2080 assert(mem_entry->is_object);
2081 object = vm_named_entry_to_vm_object(mem_entry);
2082 if (object == VM_OBJECT_NULL) {
2083 named_entry_unlock(mem_entry);
2084 return KERN_INVALID_ARGUMENT;
2085 }
2086
2087 vm_object_reference(object);
2088 named_entry_unlock(mem_entry);
2089
2090 kr = vm_object_page_op(object, offset, ops, phys_entry, flags);
2091
2092 vm_object_deallocate(object);
2093
2094 return kr;
2095 }
2096
2097 /*
2098 * mach_memory_entry_range_op offers performance enhancement over
2099 * mach_memory_entry_page_op for page_op functions which do not require page
2100 * level state to be returned from the call. Page_op was created to provide
2101 * a low-cost alternative to page manipulation via UPLs when only a single
2102 * page was involved. The range_op call establishes the ability in the _op
2103 * family of functions to work on multiple pages where the lack of page level
2104 * state handling allows the caller to avoid the overhead of the upl structures.
2105 */
2106
2107 kern_return_t
mach_memory_entry_range_op(ipc_port_t entry_port,vm_object_offset_ut offset_beg_u,vm_object_offset_ut offset_end_u,int ops,int * range)2108 mach_memory_entry_range_op(
2109 ipc_port_t entry_port,
2110 vm_object_offset_ut offset_beg_u,
2111 vm_object_offset_ut offset_end_u,
2112 int ops,
2113 int *range)
2114 {
2115 vm_named_entry_t mem_entry;
2116 vm_object_t object;
2117 kern_return_t kr;
2118 vm_object_offset_t offset_range;
2119 /*
2120 * Unwrap offset beginning and end as no mathematical operations are
2121 * performed on these quantities.
2122 */
2123 vm_object_offset_t offset_beg = VM_SANITIZE_UNSAFE_UNWRAP(offset_beg_u);
2124 vm_object_offset_t offset_end = VM_SANITIZE_UNSAFE_UNWRAP(offset_end_u);
2125
2126 mem_entry = mach_memory_entry_from_port(entry_port);
2127 if (mem_entry == NULL) {
2128 return KERN_INVALID_ARGUMENT;
2129 }
2130
2131 named_entry_lock(mem_entry);
2132
2133 if (__improbable(os_sub_overflow(offset_end, offset_beg, &offset_range) ||
2134 (offset_range > (uint32_t) -1))) {
2135 /* range is too big and would overflow "*range" */
2136 named_entry_unlock(mem_entry);
2137 return KERN_INVALID_ARGUMENT;
2138 }
2139
2140 if (mem_entry->is_sub_map ||
2141 mem_entry->is_copy) {
2142 named_entry_unlock(mem_entry);
2143 return KERN_INVALID_ARGUMENT;
2144 }
2145
2146 assert(mem_entry->is_object);
2147 object = vm_named_entry_to_vm_object(mem_entry);
2148 if (object == VM_OBJECT_NULL) {
2149 named_entry_unlock(mem_entry);
2150 return KERN_INVALID_ARGUMENT;
2151 }
2152
2153 vm_object_reference(object);
2154 named_entry_unlock(mem_entry);
2155
2156 kr = vm_object_range_op(object,
2157 offset_beg,
2158 offset_end,
2159 ops,
2160 (uint32_t *) range);
2161
2162 vm_object_deallocate(object);
2163
2164 return kr;
2165 }
2166 #endif /* XNU_PLATFORM_MacOSX */
2167
2168 kern_return_t
memory_entry_check_for_adjustment(vm_map_t src_map,ipc_port_t port,vm_map_offset_t * overmap_start,vm_map_offset_t * overmap_end)2169 memory_entry_check_for_adjustment(
2170 vm_map_t src_map,
2171 ipc_port_t port,
2172 vm_map_offset_t *overmap_start,
2173 vm_map_offset_t *overmap_end)
2174 {
2175 kern_return_t kr = KERN_SUCCESS;
2176 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
2177
2178 assert(port);
2179 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
2180
2181 vm_named_entry_t named_entry;
2182
2183 named_entry = mach_memory_entry_from_port(port);
2184 named_entry_lock(named_entry);
2185 copy_map = named_entry->backing.copy;
2186 target_copy_map = copy_map;
2187
2188 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
2189 vm_map_offset_t trimmed_start;
2190
2191 trimmed_start = 0;
2192 DEBUG4K_ADJUST("adjusting...\n");
2193 kr = vm_map_copy_adjust_to_target(
2194 copy_map,
2195 vm_sanitize_wrap_addr(0), /* offset */
2196 vm_sanitize_wrap_size(copy_map->size), /* size */
2197 src_map,
2198 FALSE, /* copy */
2199 &target_copy_map,
2200 overmap_start,
2201 overmap_end,
2202 &trimmed_start);
2203 assert(trimmed_start == 0);
2204 }
2205 named_entry_unlock(named_entry);
2206
2207 return kr;
2208 }
2209