xref: /xnu-11215/bsd/kern/kern_proc.c (revision 8d741a5d)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. All advertising materials mentioning features or use of this software
42  *    must display the following acknowledgement:
43  *	This product includes software developed by the University of
44  *	California, Berkeley and its contributors.
45  * 4. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)kern_proc.c	8.4 (Berkeley) 1/4/94
62  */
63 /*
64  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65  * support for mandatory and extensible security protections.  This notice
66  * is included in support of clause 2.2 (b) of the Apple Public License,
67  * Version 2.0.
68  */
69 /* HISTORY
70  *  04-Aug-97  Umesh Vaishampayan ([email protected])
71  *	Added current_proc_EXTERNAL() function for the use of kernel
72  *      lodable modules.
73  *
74  *  05-Jun-95 Mac Gillon (mgillon) at NeXT
75  *	New version based on 3.3NS and 4.4
76  */
77 
78 
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/kernel.h>
82 #include <sys/proc_internal.h>
83 #include <sys/acct.h>
84 #include <sys/wait.h>
85 #include <sys/file_internal.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/lock.h>
89 #include <sys/mbuf.h>
90 #include <sys/ioctl.h>
91 #include <sys/tty.h>
92 #include <sys/signalvar.h>
93 #include <sys/syslog.h>
94 #include <sys/sysctl.h>
95 #include <sys/sysproto.h>
96 #include <sys/kauth.h>
97 #include <sys/codesign.h>
98 #include <sys/kernel_types.h>
99 #include <sys/ubc.h>
100 #include <kern/clock.h>
101 #include <kern/debug.h>
102 #include <kern/kalloc.h>
103 #include <kern/smr_hash.h>
104 #include <kern/task.h>
105 #include <kern/coalition.h>
106 #include <sys/coalition.h>
107 #include <kern/assert.h>
108 #include <kern/sched_prim.h>
109 #include <vm/vm_protos.h>
110 #include <vm/vm_map_xnu.h>          /* vm_map_switch_protect() */
111 #include <vm/vm_pageout.h>
112 #include <vm/vm_compressor_xnu.h>
113 #include <mach/task.h>
114 #include <mach/message.h>
115 #include <sys/priv.h>
116 #include <sys/proc_info.h>
117 #include <sys/bsdtask_info.h>
118 #include <sys/persona.h>
119 #include <sys/sysent.h>
120 #include <sys/reason.h>
121 #include <sys/proc_require.h>
122 #include <sys/kern_debug.h>
123 #include <IOKit/IOBSD.h>        /* IOTaskHasEntitlement() */
124 #include <kern/ipc_kobject.h>   /* ipc_kobject_set_kobjidx() */
125 #include <kern/ast.h>           /* proc_filedesc_ast */
126 #include <libkern/amfi/amfi.h>
127 #include <mach-o/loader.h>
128 #include <os/base.h>            /* OS_STRINGIFY */
129 
130 #if CONFIG_CSR
131 #include <sys/csr.h>
132 #endif
133 
134 #include <sys/kern_memorystatus.h>
135 
136 #if CONFIG_MACF
137 #include <security/mac_framework.h>
138 #include <security/mac_mach_internal.h>
139 #endif
140 #include <security/audit/audit.h>
141 
142 #include <libkern/crypto/sha1.h>
143 #include <IOKit/IOKitKeys.h>
144 #include <mach/mach_traps.h>
145 #include <mach/task_access.h>
146 #include <kern/extmod_statistics.h>
147 #include <security/mac.h>
148 #include <sys/socketvar.h>
149 #include <sys/kern_memorystatus_freeze.h>
150 #include <net/necp.h>
151 #include <bsm/audit_kevents.h>
152 
153 
154 #if SKYWALK
155 #include <skywalk/core/skywalk_var.h>
156 #endif /* SKYWALK */
157 /*
158  * Structure associated with user cacheing.
159  */
160 struct uidinfo {
161 	LIST_ENTRY(uidinfo) ui_hash;
162 	uid_t   ui_uid;
163 	size_t    ui_proccnt;
164 };
165 #define UIHASH(uid)     (&uihashtbl[(uid) & uihash])
166 static LIST_HEAD(uihashhead, uidinfo) * uihashtbl;
167 static u_long uihash;          /* size of hash table - 1 */
168 
169 /*
170  * Other process lists
171  */
172 static struct smr_hash pid_hash;
173 static struct smr_hash pgrp_hash;
174 
175 SECURITY_READ_ONLY_LATE(struct sesshashhead *) sesshashtbl;
176 SECURITY_READ_ONLY_LATE(u_long) sesshash;
177 
178 struct proclist allproc = LIST_HEAD_INITIALIZER(allproc);
179 struct proclist zombproc = LIST_HEAD_INITIALIZER(zombproc);
180 extern struct tty cons;
181 extern size_t proc_struct_size;
182 extern size_t proc_and_task_size;
183 
184 extern int cs_debug;
185 
186 #if DEVELOPMENT || DEBUG
187 static TUNABLE(bool, syscallfilter_disable, "-disable_syscallfilter", false);
188 #endif // DEVELOPMENT || DEBUG
189 
190 #if DEBUG
191 #define __PROC_INTERNAL_DEBUG 1
192 #endif
193 #if CONFIG_COREDUMP
194 /* Name to give to core files */
195 #if defined(XNU_TARGET_OS_BRIDGE)
196 __XNU_PRIVATE_EXTERN const char * defaultcorefiledir = "/private/var/internal";
197 __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/private/var/internal/%N.core"};
198 __XNU_PRIVATE_EXTERN const char * defaultdrivercorefiledir = "/private/var/internal";
199 __XNU_PRIVATE_EXTERN char drivercorefilename[MAXPATHLEN + 1] = {"/private/var/internal/%N.core"};
200 #elif defined(XNU_TARGET_OS_OSX)
201 __XNU_PRIVATE_EXTERN const char * defaultcorefiledir = "/cores";
202 __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/cores/core.%P"};
203 __XNU_PRIVATE_EXTERN const char * defaultdrivercorefiledir = "/private/var/dextcores";
204 __XNU_PRIVATE_EXTERN char drivercorefilename[MAXPATHLEN + 1] = {"/private/var/dextcores/%N.core"};
205 #else
206 __XNU_PRIVATE_EXTERN const char * defaultcorefiledir = "/private/var/cores";
207 __XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN + 1] = {"/private/var/cores/%N.core"};
208 __XNU_PRIVATE_EXTERN const char * defaultdrivercorefiledir = "/private/var/dextcores";
209 __XNU_PRIVATE_EXTERN char drivercorefilename[MAXPATHLEN + 1] = {"/private/var/dextcores/%N.core"};
210 #endif
211 #endif
212 
213 #if PROC_REF_DEBUG
214 #include <kern/backtrace.h>
215 #endif
216 
217 static LCK_MTX_DECLARE_ATTR(proc_klist_mlock, &proc_mlock_grp, &proc_lck_attr);
218 
219 ZONE_DEFINE(pgrp_zone, "pgrp",
220     sizeof(struct pgrp), ZC_ZFREE_CLEARMEM);
221 ZONE_DEFINE(session_zone, "session",
222     sizeof(struct session), ZC_ZFREE_CLEARMEM);
223 ZONE_DEFINE_ID(ZONE_ID_PROC_RO, "proc_ro", struct proc_ro,
224     ZC_READONLY | ZC_ZFREE_CLEARMEM);
225 
226 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
227 
228 static void orphanpg(struct pgrp * pg);
229 void proc_name_kdp(proc_t t, char * buf, int size);
230 boolean_t proc_binary_uuid_kdp(task_t task, uuid_t uuid);
231 boolean_t current_thread_aborted(void);
232 int proc_threadname_kdp(void * uth, char * buf, size_t size);
233 void proc_starttime_kdp(void * p, unaligned_u64 *tv_sec, unaligned_u64 *tv_usec, unaligned_u64 *abstime);
234 void proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
235 uint64_t proc_getcsflags_kdp(void * p);
236 const char * proc_name_address(void * p);
237 char * proc_longname_address(void *);
238 
239 static void pgrp_destroy(struct pgrp *pgrp);
240 static void pgrp_replace(proc_t p, struct pgrp *pgrp);
241 static int csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaddittoken);
242 static boolean_t proc_parent_is_currentproc(proc_t p);
243 
244 #if CONFIG_PROC_RESOURCE_LIMITS
245 extern void task_filedesc_ast(task_t task, int current_size, int soft_limit, int hard_limit);
246 extern void task_kqworkloop_ast(task_t task, int current_size, int soft_limit, int hard_limit);
247 #endif
248 
249 struct fixjob_iterargs {
250 	struct pgrp * pg;
251 	struct session * mysession;
252 	int entering;
253 };
254 
255 int fixjob_callback(proc_t, void *);
256 
257 uint64_t
get_current_unique_pid(void)258 get_current_unique_pid(void)
259 {
260 	proc_t  p = current_proc();
261 
262 	if (p) {
263 		return proc_uniqueid(p);
264 	} else {
265 		return 0;
266 	}
267 }
268 
269 /*
270  * Initialize global process hashing structures.
271  */
272 static void
procinit(void)273 procinit(void)
274 {
275 	smr_hash_init(&pid_hash, maxproc / 4);
276 	smr_hash_init(&pgrp_hash, maxproc / 4);
277 	sesshashtbl = hashinit(maxproc / 4, M_PROC, &sesshash);
278 	uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash);
279 }
280 STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, procinit);
281 
282 /*
283  * Change the count associated with number of processes
284  * a given user is using. This routine protects the uihash
285  * with the list lock
286  */
287 size_t
chgproccnt(uid_t uid,int diff)288 chgproccnt(uid_t uid, int diff)
289 {
290 	struct uidinfo *uip;
291 	struct uidinfo *newuip = NULL;
292 	struct uihashhead *uipp;
293 	size_t retval;
294 
295 again:
296 	proc_list_lock();
297 	uipp = UIHASH(uid);
298 	for (uip = uipp->lh_first; uip != 0; uip = uip->ui_hash.le_next) {
299 		if (uip->ui_uid == uid) {
300 			break;
301 		}
302 	}
303 	if (uip) {
304 		uip->ui_proccnt += diff;
305 		if (uip->ui_proccnt > 0) {
306 			retval = uip->ui_proccnt;
307 			proc_list_unlock();
308 			goto out;
309 		}
310 		LIST_REMOVE(uip, ui_hash);
311 		retval = 0;
312 		proc_list_unlock();
313 		kfree_type(struct uidinfo, uip);
314 		goto out;
315 	}
316 	if (diff <= 0) {
317 		if (diff == 0) {
318 			retval = 0;
319 			proc_list_unlock();
320 			goto out;
321 		}
322 		panic("chgproccnt: lost user");
323 	}
324 	if (newuip != NULL) {
325 		uip = newuip;
326 		newuip = NULL;
327 		LIST_INSERT_HEAD(uipp, uip, ui_hash);
328 		uip->ui_uid = uid;
329 		uip->ui_proccnt = diff;
330 		retval = diff;
331 		proc_list_unlock();
332 		goto out;
333 	}
334 	proc_list_unlock();
335 	newuip = kalloc_type(struct uidinfo, Z_WAITOK | Z_NOFAIL);
336 	goto again;
337 out:
338 	kfree_type(struct uidinfo, newuip);
339 	return retval;
340 }
341 
342 /*
343  * Is p an inferior of the current process?
344  */
345 int
inferior(proc_t p)346 inferior(proc_t p)
347 {
348 	int retval = 0;
349 
350 	proc_list_lock();
351 	for (; p != current_proc(); p = p->p_pptr) {
352 		if (proc_getpid(p) == 0) {
353 			goto out;
354 		}
355 	}
356 	retval = 1;
357 out:
358 	proc_list_unlock();
359 	return retval;
360 }
361 
362 /*
363  * Is p an inferior of t ?
364  */
365 int
isinferior(proc_t p,proc_t t)366 isinferior(proc_t p, proc_t t)
367 {
368 	int retval = 0;
369 	int nchecked = 0;
370 	proc_t start = p;
371 
372 	/* if p==t they are not inferior */
373 	if (p == t) {
374 		return 0;
375 	}
376 
377 	proc_list_lock();
378 	for (; p != t; p = p->p_pptr) {
379 		nchecked++;
380 
381 		/* Detect here if we're in a cycle */
382 		if ((proc_getpid(p) == 0) || (p->p_pptr == start) || (nchecked >= nprocs)) {
383 			goto out;
384 		}
385 	}
386 	retval = 1;
387 out:
388 	proc_list_unlock();
389 	return retval;
390 }
391 
392 int
proc_isinferior(int pid1,int pid2)393 proc_isinferior(int pid1, int pid2)
394 {
395 	proc_t p = PROC_NULL;
396 	proc_t t = PROC_NULL;
397 	int retval = 0;
398 
399 	if (((p = proc_find(pid1)) != (proc_t)0) && ((t = proc_find(pid2)) != (proc_t)0)) {
400 		retval = isinferior(p, t);
401 	}
402 
403 	if (p != PROC_NULL) {
404 		proc_rele(p);
405 	}
406 	if (t != PROC_NULL) {
407 		proc_rele(t);
408 	}
409 
410 	return retval;
411 }
412 
413 /*
414  * Returns process identity of a given process. Calling this function is not
415  * racy for a current process or if a reference to the process is held.
416  */
417 struct proc_ident
proc_ident(proc_t p)418 proc_ident(proc_t p)
419 {
420 	struct proc_ident ident = {
421 		.p_pid = proc_pid(p),
422 		.p_uniqueid = proc_uniqueid(p),
423 		.p_idversion = proc_pidversion(p),
424 	};
425 
426 	return ident;
427 }
428 
429 proc_t
proc_find_ident(struct proc_ident const * ident)430 proc_find_ident(struct proc_ident const *ident)
431 {
432 	proc_t proc = PROC_NULL;
433 
434 	proc = proc_find(ident->p_pid);
435 	if (proc == PROC_NULL) {
436 		return PROC_NULL;
437 	}
438 
439 	if (proc_uniqueid(proc) != ident->p_uniqueid ||
440 	    proc_pidversion(proc) != ident->p_idversion) {
441 		proc_rele(proc);
442 		return PROC_NULL;
443 	}
444 
445 	return proc;
446 }
447 
448 void
uthread_reset_proc_refcount(uthread_t uth)449 uthread_reset_proc_refcount(uthread_t uth)
450 {
451 	uth->uu_proc_refcount = 0;
452 
453 #if PROC_REF_DEBUG
454 	if (kern_feature_override(KF_DISABLE_PROCREF_TRACKING_OVRD)) {
455 		return;
456 	}
457 
458 	struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
459 	uint32_t n = uth->uu_proc_ref_info->upri_pindex;
460 
461 	uth->uu_proc_ref_info->upri_pindex = 0;
462 
463 	if (n) {
464 		for (unsigned i = 0; i < n; i++) {
465 			btref_put(upri->upri_proc_stacks[i]);
466 		}
467 		bzero(upri->upri_proc_stacks, sizeof(btref_t) * n);
468 		bzero(upri->upri_proc_ps, sizeof(proc_t) * n);
469 	}
470 #endif /* PROC_REF_DEBUG */
471 }
472 
473 #if PROC_REF_DEBUG
474 void
uthread_init_proc_refcount(uthread_t uth)475 uthread_init_proc_refcount(uthread_t uth)
476 {
477 	if (kern_feature_override(KF_DISABLE_PROCREF_TRACKING_OVRD)) {
478 		return;
479 	}
480 
481 	uth->uu_proc_ref_info = kalloc_type(struct uthread_proc_ref_info,
482 	    Z_ZERO | Z_WAITOK | Z_NOFAIL);
483 }
484 
485 void
uthread_destroy_proc_refcount(uthread_t uth)486 uthread_destroy_proc_refcount(uthread_t uth)
487 {
488 	if (kern_feature_override(KF_DISABLE_PROCREF_TRACKING_OVRD)) {
489 		return;
490 	}
491 
492 	struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
493 	uint32_t n = uth->uu_proc_ref_info->upri_pindex;
494 
495 	for (unsigned i = 0; i < n; i++) {
496 		btref_put(upri->upri_proc_stacks[i]);
497 	}
498 
499 	kfree_type(struct uthread_proc_ref_info, uth->uu_proc_ref_info);
500 }
501 
502 void
uthread_assert_zero_proc_refcount(uthread_t uth)503 uthread_assert_zero_proc_refcount(uthread_t uth)
504 {
505 	if (kern_feature_override(KF_DISABLE_PROCREF_TRACKING_OVRD)) {
506 		return;
507 	}
508 
509 	if (__improbable(uth->uu_proc_refcount != 0)) {
510 		panic("Unexpected non zero uu_proc_refcount = %d (%p)",
511 		    uth->uu_proc_refcount, uth);
512 	}
513 }
514 #endif /* PROC_REF_DEBUG */
515 
516 bool
proc_list_exited(proc_t p)517 proc_list_exited(proc_t p)
518 {
519 	return os_ref_get_raw_mask(&p->p_refcount) & P_REF_DEAD;
520 }
521 
522 #if CONFIG_DEBUG_SYSCALL_REJECTION
523 uint64_t
uthread_get_syscall_rejection_flags(void * uthread)524 uthread_get_syscall_rejection_flags(void *uthread)
525 {
526 	uthread_t uth = (uthread_t) uthread;
527 	return uth->syscall_rejection_flags;
528 }
529 
530 uint64_t*
uthread_get_syscall_rejection_mask(void * uthread)531 uthread_get_syscall_rejection_mask(void *uthread)
532 {
533 	uthread_t uth = (uthread_t) uthread;
534 	return uth->syscall_rejection_mask;
535 }
536 
537 uint64_t*
uthread_get_syscall_rejection_once_mask(void * uthread)538 uthread_get_syscall_rejection_once_mask(void *uthread)
539 {
540 	uthread_t uth = (uthread_t) uthread;
541 	return uth->syscall_rejection_once_mask;
542 }
543 
544 bool
uthread_syscall_rejection_is_enabled(void * uthread)545 uthread_syscall_rejection_is_enabled(void *uthread)
546 {
547 	uthread_t uth = (uthread_t) uthread;
548 	return (debug_syscall_rejection_mode != 0) || (uth->syscall_rejection_flags & SYSCALL_REJECTION_FLAGS_FORCE_FATAL);
549 }
550 #endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
551 
552 #if PROC_REF_DEBUG
553 __attribute__((noinline))
554 #endif /* PROC_REF_DEBUG */
555 static void
record_procref(proc_t p __unused,int count)556 record_procref(proc_t p __unused, int count)
557 {
558 	uthread_t uth;
559 
560 	uth = current_uthread();
561 	uth->uu_proc_refcount += count;
562 
563 #if PROC_REF_DEBUG
564 	if (kern_feature_override(KF_DISABLE_PROCREF_TRACKING_OVRD)) {
565 		return;
566 	}
567 	struct uthread_proc_ref_info *upri = uth->uu_proc_ref_info;
568 
569 	if (upri->upri_pindex < NUM_PROC_REFS_TO_TRACK) {
570 		upri->upri_proc_stacks[upri->upri_pindex] =
571 		    btref_get(__builtin_frame_address(0), BTREF_GET_NOWAIT);
572 		upri->upri_proc_ps[upri->upri_pindex] = p;
573 		upri->upri_pindex++;
574 	}
575 #endif /* PROC_REF_DEBUG */
576 }
577 
578 /*!
579  * @function proc_ref_try_fast()
580  *
581  * @brief
582  * Tries to take a proc ref, unless it is in flux (being made, or dead).
583  *
584  * @returns
585  * - the new refcount value (including bits) on success,
586  * - 0 on failure.
587  */
588 static inline uint32_t
proc_ref_try_fast(proc_t p)589 proc_ref_try_fast(proc_t p)
590 {
591 	uint32_t bits;
592 
593 	proc_require(p, PROC_REQUIRE_ALLOW_ALL);
594 
595 	bits = os_ref_retain_try_mask(&p->p_refcount, P_REF_BITS,
596 	    P_REF_NEW | P_REF_DEAD, NULL);
597 	if (bits) {
598 		record_procref(p, 1);
599 	}
600 	return bits;
601 }
602 
603 /*!
604  * @function proc_ref_wait()
605  *
606  * @brief
607  * Waits for the specified bits to clear, on the specified event.
608  */
609 __attribute__((noinline))
610 static void
proc_ref_wait(proc_t p,event_t event,proc_ref_bits_t mask,bool locked)611 proc_ref_wait(proc_t p, event_t event, proc_ref_bits_t mask, bool locked)
612 {
613 	assert_wait(event, THREAD_UNINT | THREAD_WAIT_NOREPORT);
614 
615 	if (os_ref_get_raw_mask(&p->p_refcount) & mask) {
616 		uthread_t uth = current_uthread();
617 
618 		if (locked) {
619 			proc_list_unlock();
620 		}
621 		uth->uu_wchan = event;
622 		uth->uu_wmesg = "proc_refwait";
623 		thread_block(THREAD_CONTINUE_NULL);
624 		uth->uu_wchan = NULL;
625 		uth->uu_wmesg = NULL;
626 		if (locked) {
627 			proc_list_lock();
628 		}
629 	} else {
630 		clear_wait(current_thread(), THREAD_AWAKENED);
631 	}
632 }
633 
634 /*!
635  * @function proc_ref_wait_for_exec()
636  *
637  * @brief
638  * Routine called by processes trying to acquire a ref while
639  * an exec is in flight.
640  *
641  * @discussion
642  * This function is called with a proc ref held on the proc,
643  * which will be given up until the @c P_REF_*_EXEC flags clear.
644  *
645  * @param p       the proc, the caller owns a proc ref
646  * @param bits    the result of @c proc_ref_try_fast() prior to calling this.
647  * @param locked  whether the caller holds the @c proc_list_lock().
648  */
649 __attribute__((noinline))
650 static proc_t
proc_ref_wait_for_exec(proc_t p,uint32_t bits,int locked)651 proc_ref_wait_for_exec(proc_t p, uint32_t bits, int locked)
652 {
653 	const proc_ref_bits_t mask = P_REF_WILL_EXEC | P_REF_IN_EXEC;
654 
655 	/*
656 	 * the proc is in the middle of exec,
657 	 * trade our ref for a "wait ref",
658 	 * and wait for the proc_refwake_did_exec() call.
659 	 *
660 	 * Note: it's very unlikely that we'd loop back into the wait,
661 	 *       it would only happen if the target proc would be
662 	 *       in exec again by the time we woke up.
663 	 */
664 	os_ref_retain_raw(&p->p_waitref, &p_refgrp);
665 
666 	do {
667 		proc_rele(p);
668 		proc_ref_wait(p, &p->p_waitref, mask, locked);
669 		bits = proc_ref_try_fast(p);
670 	} while (__improbable(bits & mask));
671 
672 	proc_wait_release(p);
673 
674 	return bits ? p : PROC_NULL;
675 }
676 
677 static inline bool
proc_ref_needs_wait_for_exec(uint32_t bits)678 proc_ref_needs_wait_for_exec(uint32_t bits)
679 {
680 	if (__probable((bits & (P_REF_WILL_EXEC | P_REF_IN_EXEC)) == 0)) {
681 		return false;
682 	}
683 
684 	if (bits & P_REF_IN_EXEC) {
685 		return true;
686 	}
687 
688 	/*
689 	 * procs can't have outstanding refs while execing.
690 	 *
691 	 * In order to achieve, that, proc_refdrain_will_exec()
692 	 * will drain outstanding references. It signals its intent
693 	 * with the P_REF_WILL_EXEC flag, and moves to P_REF_IN_EXEC
694 	 * when this is achieved.
695 	 *
696 	 * Most threads will block in proc_ref() when any of those
697 	 * flags is set. However, threads that already have
698 	 * an oustanding ref on this proc might want another
699 	 * before dropping them. To avoid deadlocks, we need
700 	 * to let threads with any oustanding reference take one
701 	 * when only P_REF_WILL_EXEC is set (which causes exec
702 	 * to be delayed).
703 	 *
704 	 * Note: the current thread will _always_ appear like it holds
705 	 *       one ref due to having taken one speculatively.
706 	 */
707 	assert(current_uthread()->uu_proc_refcount >= 1);
708 	return current_uthread()->uu_proc_refcount == 1;
709 }
710 
711 int
proc_rele(proc_t p)712 proc_rele(proc_t p)
713 {
714 	uint32_t o_bits, n_bits;
715 
716 	proc_require(p, PROC_REQUIRE_ALLOW_ALL);
717 
718 	os_atomic_rmw_loop(&p->p_refcount, o_bits, n_bits, release, {
719 		n_bits = o_bits - (1u << P_REF_BITS);
720 		if ((n_bits >> P_REF_BITS) == 1) {
721 		        n_bits &= ~P_REF_DRAINING;
722 		}
723 	});
724 	record_procref(p, -1);
725 
726 	/*
727 	 * p might be freed after this point.
728 	 */
729 
730 	if (__improbable((o_bits & P_REF_DRAINING) && !(n_bits & P_REF_DRAINING))) {
731 		/*
732 		 * This wakeup can cause spurious ones,
733 		 * but proc_refdrain() can deal with those.
734 		 *
735 		 * Because the proc_zone memory is sequestered,
736 		 * this is safe to wakeup a possible "freed" address.
737 		 */
738 		wakeup(&p->p_refcount);
739 	}
740 	return 0;
741 }
742 
743 bool
proc_is_shadow(proc_t p)744 proc_is_shadow(proc_t p)
745 {
746 	return os_ref_get_raw_mask(&p->p_refcount) & P_REF_SHADOW;
747 }
748 
749 proc_t
proc_self(void)750 proc_self(void)
751 {
752 	proc_t p = current_proc();
753 
754 	/*
755 	 * Do not go through the logic of "wait for exec", it is meaningless.
756 	 * Only fail taking a ref for oneself if the proc is about to die.
757 	 */
758 	return proc_ref_try_fast(p) ? p : PROC_NULL;
759 }
760 
761 proc_t
proc_ref(proc_t p,int locked)762 proc_ref(proc_t p, int locked)
763 {
764 	uint32_t bits;
765 
766 	bits = proc_ref_try_fast(p);
767 	if (__improbable(!bits)) {
768 		return PROC_NULL;
769 	}
770 
771 	if (__improbable(proc_ref_needs_wait_for_exec(bits))) {
772 		return proc_ref_wait_for_exec(p, bits, locked);
773 	}
774 
775 	return p;
776 }
777 
778 static void
proc_wait_free(smr_node_t node)779 proc_wait_free(smr_node_t node)
780 {
781 	struct proc *p = __container_of(node, struct proc, p_smr_node);
782 
783 	proc_release_proc_task_struct(p);
784 }
785 
786 void
proc_wait_release(proc_t p)787 proc_wait_release(proc_t p)
788 {
789 	if (__probable(os_ref_release_raw(&p->p_waitref, &p_refgrp) == 0)) {
790 		smr_proc_task_call(&p->p_smr_node, proc_and_task_size,
791 		    proc_wait_free);
792 	}
793 }
794 
795 proc_t
proc_find_zombref(int pid)796 proc_find_zombref(int pid)
797 {
798 	proc_t p;
799 
800 	proc_list_lock();
801 
802 again:
803 	p = phash_find_locked(pid);
804 
805 	/* should we bail? */
806 	if ((p == PROC_NULL) || !proc_list_exited(p)) {
807 		proc_list_unlock();
808 		return PROC_NULL;
809 	}
810 
811 	/* If someone else is controlling the (unreaped) zombie - wait */
812 	if ((p->p_listflag & P_LIST_WAITING) != 0) {
813 		(void)msleep(&p->p_stat, &proc_list_mlock, PWAIT, "waitcoll", 0);
814 		goto again;
815 	}
816 	p->p_listflag |=  P_LIST_WAITING;
817 
818 	proc_list_unlock();
819 
820 	return p;
821 }
822 
823 void
proc_drop_zombref(proc_t p)824 proc_drop_zombref(proc_t p)
825 {
826 	proc_list_lock();
827 	if ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
828 		p->p_listflag &= ~P_LIST_WAITING;
829 		wakeup(&p->p_stat);
830 	}
831 	proc_list_unlock();
832 }
833 
834 
835 void
proc_refdrain(proc_t p)836 proc_refdrain(proc_t p)
837 {
838 	uint32_t bits = os_ref_get_raw_mask(&p->p_refcount);
839 
840 	assert(proc_list_exited(p));
841 
842 	while ((bits >> P_REF_BITS) > 1) {
843 		if (os_atomic_cmpxchgv(&p->p_refcount, bits,
844 		    bits | P_REF_DRAINING, &bits, relaxed)) {
845 			proc_ref_wait(p, &p->p_refcount, P_REF_DRAINING, false);
846 		}
847 	}
848 }
849 
850 proc_t
proc_refdrain_will_exec(proc_t p)851 proc_refdrain_will_exec(proc_t p)
852 {
853 	const proc_ref_bits_t will_exec_mask = P_REF_WILL_EXEC | P_REF_DRAINING;
854 
855 	/*
856 	 * All the calls to proc_ref will wait
857 	 * for the flag to get cleared before returning a ref.
858 	 *
859 	 * (except for the case documented in proc_ref_needs_wait_for_exec()).
860 	 */
861 
862 	if (p == initproc) {
863 		/* Do not wait in ref drain for launchd exec */
864 		os_atomic_or(&p->p_refcount, P_REF_IN_EXEC, relaxed);
865 	} else {
866 		for (;;) {
867 			uint32_t o_ref, n_ref;
868 
869 			os_atomic_rmw_loop(&p->p_refcount, o_ref, n_ref, relaxed, {
870 				if ((o_ref >> P_REF_BITS) == 1) {
871 				        /*
872 				         * We drained successfully,
873 				         * move on to P_REF_IN_EXEC
874 				         */
875 				        n_ref = o_ref & ~will_exec_mask;
876 				        n_ref |= P_REF_IN_EXEC;
877 				} else {
878 				        /*
879 				         * Outstanding refs exit,
880 				         * mark our desire to stall
881 				         * proc_ref() callers with
882 				         * P_REF_WILL_EXEC.
883 				         */
884 				        n_ref = o_ref | will_exec_mask;
885 				}
886 			});
887 
888 			if (n_ref & P_REF_IN_EXEC) {
889 				break;
890 			}
891 
892 			proc_ref_wait(p, &p->p_refcount, P_REF_DRAINING, false);
893 		}
894 	}
895 
896 	/* Return a ref to the caller */
897 	os_ref_retain_mask(&p->p_refcount, P_REF_BITS, NULL);
898 	record_procref(p, 1);
899 
900 	return p;
901 }
902 
903 void
proc_refwake_did_exec(proc_t p)904 proc_refwake_did_exec(proc_t p)
905 {
906 	os_atomic_andnot(&p->p_refcount, P_REF_IN_EXEC, release);
907 	wakeup(&p->p_waitref);
908 }
909 
910 void
proc_ref_hold_proc_task_struct(proc_t proc)911 proc_ref_hold_proc_task_struct(proc_t proc)
912 {
913 	os_atomic_or(&proc->p_refcount, P_REF_PROC_HOLD, relaxed);
914 }
915 
916 static void
proc_free(proc_t proc,proc_ro_t proc_ro)917 proc_free(proc_t proc, proc_ro_t proc_ro)
918 {
919 	kauth_cred_t cred;
920 
921 	assert(proc_ro != NULL);
922 
923 	cred = smr_serialized_load(&proc_ro->p_ucred);
924 	kauth_cred_set(&cred, NOCRED);
925 
926 	zfree_ro(ZONE_ID_PROC_RO, proc_ro);
927 
928 	zfree(proc_task_zone, proc);
929 }
930 
931 void
proc_release_proc_task_struct(proc_t proc)932 proc_release_proc_task_struct(proc_t proc)
933 {
934 	uint32_t old_ref = os_atomic_andnot_orig(&proc->p_refcount, P_REF_PROC_HOLD, relaxed);
935 	if ((old_ref & P_REF_TASK_HOLD) == 0) {
936 		proc_free(proc, proc->p_proc_ro);
937 	}
938 }
939 
940 void
task_ref_hold_proc_task_struct(task_t task)941 task_ref_hold_proc_task_struct(task_t task)
942 {
943 	proc_t proc_from_task = task_get_proc_raw(task);
944 	os_atomic_or(&proc_from_task->p_refcount, P_REF_TASK_HOLD, relaxed);
945 }
946 
947 void
task_release_proc_task_struct(task_t task,proc_ro_t proc_ro)948 task_release_proc_task_struct(task_t task, proc_ro_t proc_ro)
949 {
950 	proc_t proc_from_task = task_get_proc_raw(task);
951 	uint32_t old_ref = os_atomic_andnot_orig(&proc_from_task->p_refcount, P_REF_TASK_HOLD, relaxed);
952 
953 	if ((old_ref & P_REF_PROC_HOLD) == 0) {
954 		proc_free(proc_from_task, proc_ro);
955 	}
956 }
957 
958 proc_t
proc_parentholdref(proc_t p)959 proc_parentholdref(proc_t p)
960 {
961 	proc_t parent = PROC_NULL;
962 	proc_t pp;
963 
964 	proc_list_lock();
965 loop:
966 	pp = p->p_pptr;
967 	if ((pp == PROC_NULL) || (pp->p_stat == SZOMB) || ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED))) {
968 		parent = PROC_NULL;
969 		goto out;
970 	}
971 
972 	if ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == P_LIST_CHILDDRSTART) {
973 		pp->p_listflag |= P_LIST_CHILDDRWAIT;
974 		msleep(&pp->p_childrencnt, &proc_list_mlock, 0, "proc_parent", 0);
975 		goto loop;
976 	}
977 
978 	if ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == 0) {
979 		pp->p_parentref++;
980 		parent = pp;
981 		goto out;
982 	}
983 
984 out:
985 	proc_list_unlock();
986 	return parent;
987 }
988 int
proc_parentdropref(proc_t p,int listlocked)989 proc_parentdropref(proc_t p, int listlocked)
990 {
991 	if (listlocked == 0) {
992 		proc_list_lock();
993 	}
994 
995 	if (p->p_parentref > 0) {
996 		p->p_parentref--;
997 		if ((p->p_parentref == 0) && ((p->p_listflag & P_LIST_PARENTREFWAIT) == P_LIST_PARENTREFWAIT)) {
998 			p->p_listflag &= ~P_LIST_PARENTREFWAIT;
999 			wakeup(&p->p_parentref);
1000 		}
1001 	} else {
1002 		panic("proc_parentdropref  -ve ref");
1003 	}
1004 	if (listlocked == 0) {
1005 		proc_list_unlock();
1006 	}
1007 
1008 	return 0;
1009 }
1010 
1011 void
proc_childdrainstart(proc_t p)1012 proc_childdrainstart(proc_t p)
1013 {
1014 #if __PROC_INTERNAL_DEBUG
1015 	if ((p->p_listflag & P_LIST_CHILDDRSTART) == P_LIST_CHILDDRSTART) {
1016 		panic("proc_childdrainstart: childdrain already started");
1017 	}
1018 #endif
1019 	p->p_listflag |= P_LIST_CHILDDRSTART;
1020 	/* wait for all that hold parentrefs to drop */
1021 	while (p->p_parentref > 0) {
1022 		p->p_listflag |= P_LIST_PARENTREFWAIT;
1023 		msleep(&p->p_parentref, &proc_list_mlock, 0, "proc_childdrainstart", 0);
1024 	}
1025 }
1026 
1027 
1028 void
proc_childdrainend(proc_t p)1029 proc_childdrainend(proc_t p)
1030 {
1031 #if __PROC_INTERNAL_DEBUG
1032 	if (p->p_childrencnt > 0) {
1033 		panic("exiting: children stil hanging around");
1034 	}
1035 #endif
1036 	p->p_listflag |= P_LIST_CHILDDRAINED;
1037 	if ((p->p_listflag & (P_LIST_CHILDLKWAIT | P_LIST_CHILDDRWAIT)) != 0) {
1038 		p->p_listflag &= ~(P_LIST_CHILDLKWAIT | P_LIST_CHILDDRWAIT);
1039 		wakeup(&p->p_childrencnt);
1040 	}
1041 }
1042 
1043 void
proc_checkdeadrefs(__unused proc_t p)1044 proc_checkdeadrefs(__unused proc_t p)
1045 {
1046 	uint32_t bits;
1047 
1048 	bits = os_ref_release_raw_mask(&p->p_refcount, P_REF_BITS, NULL);
1049 	bits &= ~(P_REF_SHADOW | P_REF_PROC_HOLD | P_REF_TASK_HOLD);
1050 	if (bits != P_REF_DEAD) {
1051 		panic("proc being freed and unexpected refcount %p:%d:0x%x", p,
1052 		    bits >> P_REF_BITS, bits & P_REF_MASK);
1053 	}
1054 #if __PROC_INTERNAL_DEBUG
1055 	if (p->p_childrencnt != 0) {
1056 		panic("proc being freed and pending children cnt %p:%d", p, p->p_childrencnt);
1057 	}
1058 	if (p->p_parentref != 0) {
1059 		panic("proc being freed and pending parentrefs %p:%d", p, p->p_parentref);
1060 	}
1061 #endif
1062 }
1063 
1064 
1065 __attribute__((always_inline, visibility("hidden")))
1066 void
proc_require(proc_t proc,proc_require_flags_t flags)1067 proc_require(proc_t proc, proc_require_flags_t flags)
1068 {
1069 	if ((flags & PROC_REQUIRE_ALLOW_NULL) && proc == PROC_NULL) {
1070 		return;
1071 	}
1072 	zone_id_require(ZONE_ID_PROC_TASK, proc_and_task_size, proc);
1073 }
1074 
1075 pid_t
proc_getpid(proc_t p)1076 proc_getpid(proc_t p)
1077 {
1078 	if (p == kernproc) {
1079 		return 0;
1080 	}
1081 
1082 	return p->p_pid;
1083 }
1084 
1085 int
proc_pid(proc_t p)1086 proc_pid(proc_t p)
1087 {
1088 	if (p != NULL) {
1089 		proc_require(p, PROC_REQUIRE_ALLOW_ALL);
1090 		return proc_getpid(p);
1091 	}
1092 	return -1;
1093 }
1094 
1095 int
proc_ppid(proc_t p)1096 proc_ppid(proc_t p)
1097 {
1098 	if (p != NULL) {
1099 		proc_require(p, PROC_REQUIRE_ALLOW_ALL);
1100 		return p->p_ppid;
1101 	}
1102 	return -1;
1103 }
1104 
1105 int
proc_original_ppid(proc_t p)1106 proc_original_ppid(proc_t p)
1107 {
1108 	if (p != NULL) {
1109 		proc_require(p, PROC_REQUIRE_ALLOW_ALL);
1110 		return p->p_original_ppid;
1111 	}
1112 	return -1;
1113 }
1114 
1115 int
proc_starttime(proc_t p,struct timeval * tv)1116 proc_starttime(proc_t p, struct timeval *tv)
1117 {
1118 	if (p != NULL && tv != NULL) {
1119 		tv->tv_sec = p->p_start.tv_sec;
1120 		tv->tv_usec = p->p_start.tv_usec;
1121 		return 0;
1122 	}
1123 	return EINVAL;
1124 }
1125 
1126 int
proc_selfpid(void)1127 proc_selfpid(void)
1128 {
1129 	return proc_getpid(current_proc());
1130 }
1131 
1132 int
proc_selfppid(void)1133 proc_selfppid(void)
1134 {
1135 	return current_proc()->p_ppid;
1136 }
1137 
1138 uint64_t
proc_selfcsflags(void)1139 proc_selfcsflags(void)
1140 {
1141 	return proc_getcsflags(current_proc());
1142 }
1143 
1144 int
proc_csflags(proc_t p,uint64_t * flags)1145 proc_csflags(proc_t p, uint64_t *flags)
1146 {
1147 	if (p && flags) {
1148 		proc_require(p, PROC_REQUIRE_ALLOW_ALL);
1149 		*flags = proc_getcsflags(p);
1150 		return 0;
1151 	}
1152 	return EINVAL;
1153 }
1154 
1155 boolean_t
proc_is_simulated(const proc_t p)1156 proc_is_simulated(const proc_t p)
1157 {
1158 #ifdef XNU_TARGET_OS_OSX
1159 	if (p != NULL) {
1160 		switch (proc_platform(p)) {
1161 		case PLATFORM_IOSSIMULATOR:
1162 		case PLATFORM_TVOSSIMULATOR:
1163 		case PLATFORM_WATCHOSSIMULATOR:
1164 			return TRUE;
1165 		default:
1166 			return FALSE;
1167 		}
1168 	}
1169 #else /* !XNU_TARGET_OS_OSX */
1170 	(void)p;
1171 #endif
1172 	return FALSE;
1173 }
1174 
1175 uint32_t
proc_platform(const proc_t p)1176 proc_platform(const proc_t p)
1177 {
1178 	if (p != NULL) {
1179 		return proc_get_ro(p)->p_platform_data.p_platform;
1180 	}
1181 	return (uint32_t)-1;
1182 }
1183 
1184 uint32_t
proc_min_sdk(proc_t p)1185 proc_min_sdk(proc_t p)
1186 {
1187 	if (p != NULL) {
1188 		return proc_get_ro(p)->p_platform_data.p_min_sdk;
1189 	}
1190 	return (uint32_t)-1;
1191 }
1192 
1193 uint32_t
proc_sdk(proc_t p)1194 proc_sdk(proc_t p)
1195 {
1196 	if (p != NULL) {
1197 		return proc_get_ro(p)->p_platform_data.p_sdk;
1198 	}
1199 	return (uint32_t)-1;
1200 }
1201 
1202 void
proc_setplatformdata(proc_t p,uint32_t platform,uint32_t min_sdk,uint32_t sdk)1203 proc_setplatformdata(proc_t p, uint32_t platform, uint32_t min_sdk, uint32_t sdk)
1204 {
1205 	proc_ro_t ro;
1206 	struct proc_platform_ro_data platform_data;
1207 
1208 	ro = proc_get_ro(p);
1209 	platform_data = ro->p_platform_data;
1210 	platform_data.p_platform = platform;
1211 	platform_data.p_min_sdk = min_sdk;
1212 	platform_data.p_sdk = sdk;
1213 
1214 	zalloc_ro_update_field(ZONE_ID_PROC_RO, ro, p_platform_data, &platform_data);
1215 }
1216 
1217 #if CONFIG_DTRACE
1218 int
dtrace_proc_selfpid(void)1219 dtrace_proc_selfpid(void)
1220 {
1221 	return proc_selfpid();
1222 }
1223 
1224 int
dtrace_proc_selfppid(void)1225 dtrace_proc_selfppid(void)
1226 {
1227 	return proc_selfppid();
1228 }
1229 
1230 uid_t
dtrace_proc_selfruid(void)1231 dtrace_proc_selfruid(void)
1232 {
1233 	return current_proc()->p_ruid;
1234 }
1235 #endif /* CONFIG_DTRACE */
1236 
1237 /*!
1238  * @function proc_parent()
1239  *
1240  * @brief
1241  * Returns a ref on the parent of @c p.
1242  *
1243  * @discussion
1244  * Returns a reference on the parent, or @c PROC_NULL
1245  * if both @c p and its parent are zombies.
1246  *
1247  * If the parent is currently dying, then this function waits
1248  * for the situation to be resolved.
1249  *
1250  * This function never returns @c PROC_NULL if @c p isn't
1251  * a zombie (@c p_stat is @c SZOMB) yet.
1252  */
1253 proc_t
proc_parent(proc_t p)1254 proc_parent(proc_t p)
1255 {
1256 	proc_t parent;
1257 	proc_t pp;
1258 
1259 	proc_list_lock();
1260 
1261 	while (1) {
1262 		pp = p->p_pptr;
1263 		parent = proc_ref(pp, true);
1264 		/* Check if we got a proc ref and it is still the parent */
1265 		if (parent != PROC_NULL) {
1266 			if (parent == p->p_pptr) {
1267 				/*
1268 				 * We have a ref on the parent and it is still
1269 				 * our parent, return the ref
1270 				 */
1271 				proc_list_unlock();
1272 				return parent;
1273 			}
1274 
1275 			/*
1276 			 * Our parent changed while we slept on proc_ref,
1277 			 * drop the ref on old parent and retry.
1278 			 */
1279 			proc_rele(parent);
1280 			continue;
1281 		}
1282 
1283 		if (pp != p->p_pptr) {
1284 			/*
1285 			 * We didn't get a ref, but parent changed from what
1286 			 * we last saw before we slept in proc_ref, try again
1287 			 * with new parent.
1288 			 */
1289 			continue;
1290 		}
1291 
1292 		if ((pp->p_listflag & P_LIST_CHILDDRAINED) == 0) {
1293 			/* Parent did not change, but we also did not get a
1294 			 * ref on parent, sleep if the parent has not drained
1295 			 * its children and then retry.
1296 			 */
1297 			pp->p_listflag |= P_LIST_CHILDLKWAIT;
1298 			msleep(&pp->p_childrencnt, &proc_list_mlock, 0, "proc_parent", 0);
1299 			continue;
1300 		}
1301 
1302 		/* Parent has died and drained its children and we still
1303 		 * point to it, return NULL.
1304 		 */
1305 		proc_list_unlock();
1306 		return PROC_NULL;
1307 	}
1308 }
1309 
1310 static boolean_t
proc_parent_is_currentproc(proc_t p)1311 proc_parent_is_currentproc(proc_t p)
1312 {
1313 	boolean_t ret = FALSE;
1314 
1315 	proc_list_lock();
1316 	if (p->p_pptr == current_proc()) {
1317 		ret = TRUE;
1318 	}
1319 
1320 	proc_list_unlock();
1321 	return ret;
1322 }
1323 
1324 void
proc_name(int pid,char * buf,int size)1325 proc_name(int pid, char * buf, int size)
1326 {
1327 	proc_t p;
1328 
1329 	if (size <= 0) {
1330 		return;
1331 	}
1332 
1333 	bzero(buf, size);
1334 
1335 	if ((p = proc_find(pid)) != PROC_NULL) {
1336 		strlcpy(buf, &p->p_comm[0], MIN((int)sizeof(p->p_comm), size));
1337 		proc_rele(p);
1338 	}
1339 }
1340 
1341 void
proc_name_kdp(proc_t p,char * buf,int size)1342 proc_name_kdp(proc_t p, char * buf, int size)
1343 {
1344 	if (p == PROC_NULL) {
1345 		return;
1346 	}
1347 
1348 	if ((size_t)size > sizeof(p->p_comm)) {
1349 		strlcpy(buf, &p->p_name[0], MIN((int)sizeof(p->p_name), size));
1350 	} else {
1351 		strlcpy(buf, &p->p_comm[0], MIN((int)sizeof(p->p_comm), size));
1352 	}
1353 }
1354 
1355 boolean_t
proc_binary_uuid_kdp(task_t task,uuid_t uuid)1356 proc_binary_uuid_kdp(task_t task, uuid_t uuid)
1357 {
1358 	proc_t p = get_bsdtask_info(task);
1359 	if (p == PROC_NULL) {
1360 		return FALSE;
1361 	}
1362 
1363 	proc_getexecutableuuid(p, uuid, sizeof(uuid_t));
1364 
1365 	return TRUE;
1366 }
1367 
1368 int
proc_threadname_kdp(void * uth,char * buf,size_t size)1369 proc_threadname_kdp(void * uth, char * buf, size_t size)
1370 {
1371 	if (size < MAXTHREADNAMESIZE) {
1372 		/* this is really just a protective measure for the future in
1373 		 * case the thread name size in stackshot gets out of sync with
1374 		 * the BSD max thread name size. Note that bsd_getthreadname
1375 		 * doesn't take input buffer size into account. */
1376 		return -1;
1377 	}
1378 
1379 	if (uth != NULL) {
1380 		bsd_getthreadname(uth, buf);
1381 	}
1382 	return 0;
1383 }
1384 
1385 
1386 /* note that this function is generally going to be called from stackshot,
1387  * and the arguments will be coming from a struct which is declared packed
1388  * thus the input arguments will in general be unaligned. We have to handle
1389  * that here. */
1390 void
proc_starttime_kdp(void * p,unaligned_u64 * tv_sec,unaligned_u64 * tv_usec,unaligned_u64 * abstime)1391 proc_starttime_kdp(void *p, unaligned_u64 *tv_sec, unaligned_u64 *tv_usec, unaligned_u64 *abstime)
1392 {
1393 	proc_t pp = (proc_t)p;
1394 	if (pp != PROC_NULL) {
1395 		if (tv_sec != NULL) {
1396 			*tv_sec = pp->p_start.tv_sec;
1397 		}
1398 		if (tv_usec != NULL) {
1399 			*tv_usec = pp->p_start.tv_usec;
1400 		}
1401 		if (abstime != NULL) {
1402 			if (pp->p_stats != NULL) {
1403 				*abstime = pp->p_stats->ps_start;
1404 			} else {
1405 				*abstime = 0;
1406 			}
1407 		}
1408 	}
1409 }
1410 
1411 void
proc_archinfo_kdp(void * p,cpu_type_t * cputype,cpu_subtype_t * cpusubtype)1412 proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype)
1413 {
1414 	proc_t pp = (proc_t)p;
1415 	if (pp != PROC_NULL) {
1416 		*cputype = pp->p_cputype;
1417 		*cpusubtype = pp->p_cpusubtype;
1418 	}
1419 }
1420 
1421 const char *
proc_name_address(void * p)1422 proc_name_address(void *p)
1423 {
1424 	return &((proc_t)p)->p_comm[0];
1425 }
1426 
1427 char *
proc_longname_address(void * p)1428 proc_longname_address(void *p)
1429 {
1430 	return &((proc_t)p)->p_name[0];
1431 }
1432 
1433 const char *
proc_best_name(proc_t p)1434 proc_best_name(proc_t p)
1435 {
1436 	if (p->p_name[0] != '\0') {
1437 		return &p->p_name[0];
1438 	}
1439 	return &p->p_comm[0];
1440 }
1441 
1442 void
proc_best_name_for_pid(int pid,char * buf,int size)1443 proc_best_name_for_pid(int pid, char * buf, int size)
1444 {
1445 	proc_t p;
1446 
1447 	if (size <= 0) {
1448 		return;
1449 	}
1450 
1451 	bzero(buf, size);
1452 
1453 	if ((p = proc_find(pid)) != PROC_NULL) {
1454 		if (p->p_name[0] != '\0') {
1455 			strlcpy(buf, &p->p_name[0], MIN((int)sizeof(p->p_name), size));
1456 		} else {
1457 			strlcpy(buf, &p->p_comm[0], MIN((int)sizeof(p->p_comm), size));
1458 		}
1459 		proc_rele(p);
1460 	}
1461 }
1462 
1463 void
proc_selfname(char * buf,int size)1464 proc_selfname(char * buf, int  size)
1465 {
1466 	proc_t p;
1467 
1468 	if (size <= 0) {
1469 		return;
1470 	}
1471 
1472 	bzero(buf, size);
1473 
1474 	if ((p = current_proc()) != (proc_t)0) {
1475 		strlcpy(buf, &p->p_name[0], MIN((int)sizeof(p->p_name), size));
1476 	}
1477 }
1478 
1479 void
proc_signal(int pid,int signum)1480 proc_signal(int pid, int signum)
1481 {
1482 	proc_t p;
1483 
1484 	if ((p = proc_find(pid)) != PROC_NULL) {
1485 		psignal(p, signum);
1486 		proc_rele(p);
1487 	}
1488 }
1489 
1490 int
proc_issignal(int pid,sigset_t mask)1491 proc_issignal(int pid, sigset_t mask)
1492 {
1493 	proc_t p;
1494 	int error = 0;
1495 
1496 	if ((p = proc_find(pid)) != PROC_NULL) {
1497 		error = proc_pendingsignals(p, mask);
1498 		proc_rele(p);
1499 	}
1500 
1501 	return error;
1502 }
1503 
1504 int
proc_noremotehang(proc_t p)1505 proc_noremotehang(proc_t p)
1506 {
1507 	int retval = 0;
1508 
1509 	if (p) {
1510 		retval = p->p_flag & P_NOREMOTEHANG;
1511 	}
1512 	return retval? 1: 0;
1513 }
1514 
1515 int
proc_exiting(proc_t p)1516 proc_exiting(proc_t p)
1517 {
1518 	int retval = 0;
1519 
1520 	if (p) {
1521 		retval = p->p_lflag & P_LEXIT;
1522 	}
1523 	return retval? 1: 0;
1524 }
1525 
1526 int
proc_in_teardown(proc_t p)1527 proc_in_teardown(proc_t p)
1528 {
1529 	int retval = 0;
1530 
1531 	if (p) {
1532 		retval = p->p_lflag & P_LPEXIT;
1533 	}
1534 	return retval? 1: 0;
1535 }
1536 
1537 int
proc_lvfork(proc_t p __unused)1538 proc_lvfork(proc_t p __unused)
1539 {
1540 	return 0;
1541 }
1542 
1543 int
proc_increment_ru_oublock(proc_t p,long * origvalp)1544 proc_increment_ru_oublock(proc_t p, long *origvalp)
1545 {
1546 	long origval;
1547 
1548 	if (p && p->p_stats) {
1549 		origval = OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
1550 		if (origvalp) {
1551 			*origvalp = origval;
1552 		}
1553 		return 0;
1554 	}
1555 
1556 	return EINVAL;
1557 }
1558 
1559 int
proc_isabortedsignal(proc_t p)1560 proc_isabortedsignal(proc_t p)
1561 {
1562 	if ((p != kernproc) && current_thread_aborted() &&
1563 	    (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) ||
1564 	    (p->p_sigacts.ps_sig < 1) || (p->p_sigacts.ps_sig >= NSIG) ||
1565 	    !hassigprop(p->p_sigacts.ps_sig, SA_CORE))) {
1566 		return 1;
1567 	}
1568 
1569 	return 0;
1570 }
1571 
1572 int
proc_forcequota(proc_t p)1573 proc_forcequota(proc_t p)
1574 {
1575 	int retval = 0;
1576 
1577 	if (p) {
1578 		retval = p->p_flag & P_FORCEQUOTA;
1579 	}
1580 	return retval? 1: 0;
1581 }
1582 
1583 int
proc_suser(proc_t p)1584 proc_suser(proc_t p)
1585 {
1586 	int error;
1587 
1588 	smr_proc_task_enter();
1589 	error = suser(proc_ucred_smr(p), &p->p_acflag);
1590 	smr_proc_task_leave();
1591 	return error;
1592 }
1593 
1594 task_t
proc_task(proc_t proc)1595 proc_task(proc_t proc)
1596 {
1597 	task_t task_from_proc = proc_get_task_raw(proc);
1598 	return (proc->p_lflag & P_LHASTASK) ? task_from_proc : NULL;
1599 }
1600 
1601 void
proc_set_task(proc_t proc,task_t task)1602 proc_set_task(proc_t proc, task_t task)
1603 {
1604 	task_t task_from_proc = proc_get_task_raw(proc);
1605 	if (task == NULL) {
1606 		proc->p_lflag &= ~P_LHASTASK;
1607 	} else {
1608 		if (task != task_from_proc) {
1609 			panic("proc_set_task trying to set random task %p", task);
1610 		}
1611 		proc->p_lflag |= P_LHASTASK;
1612 	}
1613 }
1614 
1615 task_t
proc_get_task_raw(proc_t proc)1616 proc_get_task_raw(proc_t proc)
1617 {
1618 	return (task_t)((uintptr_t)proc + proc_struct_size);
1619 }
1620 
1621 proc_t
task_get_proc_raw(task_t task)1622 task_get_proc_raw(task_t task)
1623 {
1624 	return (proc_t)((uintptr_t)task - proc_struct_size);
1625 }
1626 
1627 /*
1628  * Obtain the first thread in a process
1629  *
1630  * XXX This is a bad thing to do; it exists predominantly to support the
1631  * XXX use of proc_t's in places that should really be using
1632  * XXX thread_t's instead.  This maintains historical behaviour, but really
1633  * XXX needs an audit of the context (proxy vs. not) to clean up.
1634  */
1635 thread_t
proc_thread(proc_t proc)1636 proc_thread(proc_t proc)
1637 {
1638 	LCK_MTX_ASSERT(&proc->p_mlock, LCK_MTX_ASSERT_OWNED);
1639 
1640 	uthread_t uth = TAILQ_FIRST(&proc->p_uthlist);
1641 
1642 	if (uth != NULL) {
1643 		return get_machthread(uth);
1644 	}
1645 
1646 	return NULL;
1647 }
1648 
1649 kauth_cred_t
proc_ucred_unsafe(proc_t p)1650 proc_ucred_unsafe(proc_t p)
1651 {
1652 	kauth_cred_t cred = smr_serialized_load(&proc_get_ro(p)->p_ucred);
1653 
1654 	return kauth_cred_require(cred);
1655 }
1656 
1657 kauth_cred_t
proc_ucred_smr(proc_t p)1658 proc_ucred_smr(proc_t p)
1659 {
1660 	assert(smr_entered(&smr_proc_task));
1661 	return proc_ucred_unsafe(p);
1662 }
1663 
1664 kauth_cred_t
proc_ucred_locked(proc_t p)1665 proc_ucred_locked(proc_t p)
1666 {
1667 	LCK_MTX_ASSERT(&p->p_ucred_mlock, LCK_ASSERT_OWNED);
1668 	return proc_ucred_unsafe(p);
1669 }
1670 
1671 struct uthread *
current_uthread(void)1672 current_uthread(void)
1673 {
1674 	return get_bsdthread_info(current_thread());
1675 }
1676 
1677 
1678 int
proc_is64bit(proc_t p)1679 proc_is64bit(proc_t p)
1680 {
1681 	return IS_64BIT_PROCESS(p);
1682 }
1683 
1684 int
proc_is64bit_data(proc_t p)1685 proc_is64bit_data(proc_t p)
1686 {
1687 	assert(proc_task(p));
1688 	return (int)task_get_64bit_data(proc_task(p));
1689 }
1690 
1691 int
proc_isinitproc(proc_t p)1692 proc_isinitproc(proc_t p)
1693 {
1694 	if (initproc == NULL) {
1695 		return 0;
1696 	}
1697 	return p == initproc;
1698 }
1699 
1700 int
proc_pidversion(proc_t p)1701 proc_pidversion(proc_t p)
1702 {
1703 	return proc_get_ro(p)->p_idversion;
1704 }
1705 
1706 void
proc_setpidversion(proc_t p,int idversion)1707 proc_setpidversion(proc_t p, int idversion)
1708 {
1709 	zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p), p_idversion,
1710 	    &idversion);
1711 }
1712 
1713 uint32_t
proc_persona_id(proc_t p)1714 proc_persona_id(proc_t p)
1715 {
1716 	return (uint32_t)persona_id_from_proc(p);
1717 }
1718 
1719 uint32_t
proc_getuid(proc_t p)1720 proc_getuid(proc_t p)
1721 {
1722 	return p->p_uid;
1723 }
1724 
1725 uint32_t
proc_getgid(proc_t p)1726 proc_getgid(proc_t p)
1727 {
1728 	return p->p_gid;
1729 }
1730 
1731 uint64_t
proc_uniqueid(proc_t p)1732 proc_uniqueid(proc_t p)
1733 {
1734 	if (p == kernproc) {
1735 		return 0;
1736 	}
1737 
1738 	return proc_get_ro(p)->p_uniqueid;
1739 }
1740 
1741 uint64_t proc_uniqueid_task(void *p_arg, void *t);
1742 /*
1743  * During exec, two tasks point at the proc.  This function is used
1744  * to gives tasks a unique ID; we make the matching task have the
1745  * proc's uniqueid, and any other task gets the high-bit flipped.
1746  * (We need to try to avoid returning UINT64_MAX, which is the
1747  * which is the uniqueid of a task without a proc. (e.g. while exiting))
1748  *
1749  * Only used by get_task_uniqueid(); do not add additional callers.
1750  */
1751 uint64_t
proc_uniqueid_task(void * p_arg,void * t __unused)1752 proc_uniqueid_task(void *p_arg, void *t __unused)
1753 {
1754 	proc_t p = p_arg;
1755 	uint64_t uniqueid = proc_uniqueid(p);
1756 	return uniqueid ^ (__probable(!proc_is_shadow(p)) ? 0 : (1ull << 63));
1757 }
1758 
1759 uint64_t
proc_puniqueid(proc_t p)1760 proc_puniqueid(proc_t p)
1761 {
1762 	return p->p_puniqueid;
1763 }
1764 
1765 void
proc_coalitionids(__unused proc_t p,__unused uint64_t ids[COALITION_NUM_TYPES])1766 proc_coalitionids(__unused proc_t p, __unused uint64_t ids[COALITION_NUM_TYPES])
1767 {
1768 #if CONFIG_COALITIONS
1769 	task_coalition_ids(proc_task(p), ids);
1770 #else
1771 	memset(ids, 0, sizeof(uint64_t[COALITION_NUM_TYPES]));
1772 #endif
1773 	return;
1774 }
1775 
1776 uint64_t
proc_was_throttled(proc_t p)1777 proc_was_throttled(proc_t p)
1778 {
1779 	return p->was_throttled;
1780 }
1781 
1782 uint64_t
proc_did_throttle(proc_t p)1783 proc_did_throttle(proc_t p)
1784 {
1785 	return p->did_throttle;
1786 }
1787 
1788 int
proc_getcdhash(proc_t p,unsigned char * cdhash)1789 proc_getcdhash(proc_t p, unsigned char *cdhash)
1790 {
1791 	if (p == kernproc) {
1792 		return EINVAL;
1793 	}
1794 	return vn_getcdhash(p->p_textvp, p->p_textoff, cdhash);
1795 }
1796 
1797 uint64_t
proc_getcsflags(proc_t p)1798 proc_getcsflags(proc_t p)
1799 {
1800 	return proc_get_ro(p)->p_csflags;
1801 }
1802 
1803 /* This variant runs in stackshot context and must not take locks. */
1804 uint64_t
proc_getcsflags_kdp(void * p)1805 proc_getcsflags_kdp(void * p)
1806 {
1807 	proc_t proc = (proc_t)p;
1808 	if (p == PROC_NULL) {
1809 		return 0;
1810 	}
1811 	return proc_getcsflags(proc);
1812 }
1813 
1814 void
proc_csflags_update(proc_t p,uint64_t flags)1815 proc_csflags_update(proc_t p, uint64_t flags)
1816 {
1817 	uint32_t csflags = (uint32_t)flags;
1818 
1819 	if (p != kernproc) {
1820 		zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p),
1821 		    p_csflags, &csflags);
1822 	}
1823 }
1824 
1825 void
proc_csflags_set(proc_t p,uint64_t flags)1826 proc_csflags_set(proc_t p, uint64_t flags)
1827 {
1828 	proc_csflags_update(p, proc_getcsflags(p) | (uint32_t)flags);
1829 }
1830 
1831 void
proc_csflags_clear(proc_t p,uint64_t flags)1832 proc_csflags_clear(proc_t p, uint64_t flags)
1833 {
1834 	proc_csflags_update(p, proc_getcsflags(p) & ~(uint32_t)flags);
1835 }
1836 
1837 uint8_t *
proc_syscall_filter_mask(proc_t p)1838 proc_syscall_filter_mask(proc_t p)
1839 {
1840 	return proc_get_ro(p)->syscall_filter_mask;
1841 }
1842 
1843 void
proc_syscall_filter_mask_set(proc_t p,uint8_t * mask)1844 proc_syscall_filter_mask_set(proc_t p, uint8_t *mask)
1845 {
1846 	zalloc_ro_update_field(ZONE_ID_PROC_RO, proc_get_ro(p),
1847 	    syscall_filter_mask, &mask);
1848 }
1849 
1850 int
proc_exitstatus(proc_t p)1851 proc_exitstatus(proc_t p)
1852 {
1853 	return p->p_xstat & 0xffff;
1854 }
1855 
1856 bool
proc_is_zombie(proc_t p)1857 proc_is_zombie(proc_t p)
1858 {
1859 	return proc_list_exited(p);
1860 }
1861 
1862 void
proc_setexecutableuuid(proc_t p,const unsigned char * uuid)1863 proc_setexecutableuuid(proc_t p, const unsigned char *uuid)
1864 {
1865 	memcpy(p->p_uuid, uuid, sizeof(p->p_uuid));
1866 }
1867 
1868 const unsigned char *
proc_executableuuid_addr(proc_t p)1869 proc_executableuuid_addr(proc_t p)
1870 {
1871 	return &p->p_uuid[0];
1872 }
1873 
1874 void
proc_getexecutableuuid(proc_t p,unsigned char * uuidbuf,unsigned long size)1875 proc_getexecutableuuid(proc_t p, unsigned char *uuidbuf, unsigned long size)
1876 {
1877 	if (size >= sizeof(uuid_t)) {
1878 		memcpy(uuidbuf, proc_executableuuid_addr(p), sizeof(uuid_t));
1879 	}
1880 }
1881 
1882 void
proc_getresponsibleuuid(proc_t p,unsigned char * __counted_by (size)uuidbuf,unsigned long size)1883 proc_getresponsibleuuid(proc_t p, unsigned char *__counted_by(size)uuidbuf, unsigned long size)
1884 {
1885 	if (size >= sizeof(uuid_t)) {
1886 		memcpy(uuidbuf, p->p_responsible_uuid, sizeof(uuid_t));
1887 	}
1888 }
1889 
1890 void
proc_setresponsibleuuid(proc_t p,unsigned char * __counted_by (size)uuidbuf,unsigned long size)1891 proc_setresponsibleuuid(proc_t p, unsigned char *__counted_by(size)uuidbuf, unsigned long size)
1892 {
1893 	if (p != NULL && uuidbuf != NULL && size >= sizeof(uuid_t)) {
1894 		memcpy(p->p_responsible_uuid, uuidbuf, sizeof(uuid_t));
1895 	}
1896 	return;
1897 }
1898 
1899 /* Return vnode for executable with an iocount. Must be released with vnode_put() */
1900 vnode_t
proc_getexecutablevnode(proc_t p)1901 proc_getexecutablevnode(proc_t p)
1902 {
1903 	vnode_t tvp  = p->p_textvp;
1904 
1905 	if (tvp != NULLVP) {
1906 		if (vnode_getwithref(tvp) == 0) {
1907 			return tvp;
1908 		}
1909 	}
1910 
1911 	return NULLVP;
1912 }
1913 
1914 /*
1915  * Similar to proc_getexecutablevnode() but returns NULLVP if the vnode is
1916  * being reclaimed rather than blocks until reclaim is done.
1917  */
1918 vnode_t
proc_getexecutablevnode_noblock(proc_t p)1919 proc_getexecutablevnode_noblock(proc_t p)
1920 {
1921 	vnode_t tvp  = p->p_textvp;
1922 
1923 	if (tvp != NULLVP) {
1924 		if (vnode_getwithref_noblock(tvp) == 0) {
1925 			return tvp;
1926 		}
1927 	}
1928 
1929 	return NULLVP;
1930 }
1931 
1932 int
proc_gettty(proc_t p,vnode_t * vp)1933 proc_gettty(proc_t p, vnode_t *vp)
1934 {
1935 	struct session *procsp;
1936 	struct pgrp *pg;
1937 	int err = EINVAL;
1938 
1939 	if (!p || !vp) {
1940 		return EINVAL;
1941 	}
1942 
1943 	if ((pg = proc_pgrp(p, &procsp)) != PGRP_NULL) {
1944 		session_lock(procsp);
1945 		vnode_t ttyvp = procsp->s_ttyvp;
1946 		int ttyvid = procsp->s_ttyvid;
1947 		if (ttyvp) {
1948 			vnode_hold(ttyvp);
1949 		}
1950 		session_unlock(procsp);
1951 
1952 		if (ttyvp) {
1953 			if (vnode_getwithvid(ttyvp, ttyvid) == 0) {
1954 				*vp = ttyvp;
1955 				err = 0;
1956 			}
1957 			vnode_drop(ttyvp);
1958 		} else {
1959 			err = ENOENT;
1960 		}
1961 
1962 		pgrp_rele(pg);
1963 	}
1964 
1965 	return err;
1966 }
1967 
1968 int
proc_gettty_dev(proc_t p,dev_t * devp)1969 proc_gettty_dev(proc_t p, dev_t *devp)
1970 {
1971 	struct pgrp *pg;
1972 	dev_t dev = NODEV;
1973 
1974 	if ((pg = proc_pgrp(p, NULL)) != PGRP_NULL) {
1975 		dev = os_atomic_load(&pg->pg_session->s_ttydev, relaxed);
1976 		pgrp_rele(pg);
1977 	}
1978 
1979 	if (dev == NODEV) {
1980 		return EINVAL;
1981 	}
1982 
1983 	*devp = dev;
1984 	return 0;
1985 }
1986 
1987 int
proc_selfexecutableargs(uint8_t * buf,size_t * buflen)1988 proc_selfexecutableargs(uint8_t *buf, size_t *buflen)
1989 {
1990 	proc_t p = current_proc();
1991 
1992 	// buflen must always be provided
1993 	if (buflen == NULL) {
1994 		return EINVAL;
1995 	}
1996 
1997 	// If a buf is provided, there must be at least enough room to fit argc
1998 	if (buf && *buflen < sizeof(p->p_argc)) {
1999 		return EINVAL;
2000 	}
2001 
2002 	if (!p->user_stack) {
2003 		return EINVAL;
2004 	}
2005 
2006 	if (buf == NULL) {
2007 		*buflen = p->p_argslen + sizeof(p->p_argc);
2008 		return 0;
2009 	}
2010 
2011 	// Copy in argc to the first 4 bytes
2012 	memcpy(buf, &p->p_argc, sizeof(p->p_argc));
2013 
2014 	if (*buflen > sizeof(p->p_argc) && p->p_argslen > 0) {
2015 		// See memory layout comment in kern_exec.c:exec_copyout_strings()
2016 		// We want to copy starting from `p_argslen` bytes away from top of stack
2017 		return copyin(p->user_stack - p->p_argslen,
2018 		           buf + sizeof(p->p_argc),
2019 		           MIN(p->p_argslen, *buflen - sizeof(p->p_argc)));
2020 	} else {
2021 		return 0;
2022 	}
2023 }
2024 
2025 off_t
proc_getexecutableoffset(proc_t p)2026 proc_getexecutableoffset(proc_t p)
2027 {
2028 	return p->p_textoff;
2029 }
2030 
2031 void
bsd_set_dependency_capable(task_t task)2032 bsd_set_dependency_capable(task_t task)
2033 {
2034 	proc_t p = get_bsdtask_info(task);
2035 
2036 	if (p) {
2037 		OSBitOrAtomic(P_DEPENDENCY_CAPABLE, &p->p_flag);
2038 	}
2039 }
2040 
2041 
2042 #ifndef __arm__
2043 int
IS_64BIT_PROCESS(proc_t p)2044 IS_64BIT_PROCESS(proc_t p)
2045 {
2046 	if (p && (p->p_flag & P_LP64)) {
2047 		return 1;
2048 	} else {
2049 		return 0;
2050 	}
2051 }
2052 #endif
2053 
2054 SMRH_TRAITS_DEFINE_SCALAR(pid_hash_traits, struct proc, p_pid, p_hash,
2055     .domain = &smr_proc_task);
2056 
2057 /*
2058  * Locate a process by number
2059  */
2060 proc_t
phash_find_locked(pid_t pid)2061 phash_find_locked(pid_t pid)
2062 {
2063 	smrh_key_t key = SMRH_SCALAR_KEY(pid);
2064 
2065 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2066 
2067 	if (!pid) {
2068 		return kernproc;
2069 	}
2070 
2071 	return smr_hash_serialized_find(&pid_hash, key, &pid_hash_traits);
2072 }
2073 
2074 void
phash_replace_locked(struct proc * old_proc,struct proc * new_proc)2075 phash_replace_locked(struct proc *old_proc, struct proc *new_proc)
2076 {
2077 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2078 
2079 	smr_hash_serialized_replace(&pid_hash,
2080 	    &old_proc->p_hash, &new_proc->p_hash, &pid_hash_traits);
2081 }
2082 
2083 void
phash_insert_locked(struct proc * p)2084 phash_insert_locked(struct proc *p)
2085 {
2086 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2087 
2088 	smr_hash_serialized_insert(&pid_hash, &p->p_hash, &pid_hash_traits);
2089 }
2090 
2091 void
phash_remove_locked(struct proc * p)2092 phash_remove_locked(struct proc *p)
2093 {
2094 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2095 
2096 	smr_hash_serialized_remove(&pid_hash, &p->p_hash, &pid_hash_traits);
2097 }
2098 
2099 proc_t
proc_find_noref_smr(int pid)2100 proc_find_noref_smr(int pid)
2101 {
2102 	smrh_key_t key = SMRH_SCALAR_KEY(pid);
2103 
2104 	if (__improbable(pid == 0)) {
2105 		return kernproc;
2106 	}
2107 
2108 	return smr_hash_entered_find(&pid_hash, key, &pid_hash_traits);
2109 }
2110 
2111 proc_t
proc_find(int pid)2112 proc_find(int pid)
2113 {
2114 	smrh_key_t key = SMRH_SCALAR_KEY(pid);
2115 	proc_t p;
2116 	uint32_t bits;
2117 	bool shadow_proc = false;
2118 
2119 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2120 
2121 	if (!pid) {
2122 		return proc_ref(kernproc, false);
2123 	}
2124 
2125 retry:
2126 	p = PROC_NULL;
2127 	bits = 0;
2128 	shadow_proc = false;
2129 
2130 	smr_proc_task_enter();
2131 	p = smr_hash_entered_find(&pid_hash, key, &pid_hash_traits);
2132 	if (p) {
2133 		bits = proc_ref_try_fast(p);
2134 		shadow_proc = !!proc_is_shadow(p);
2135 	}
2136 	smr_proc_task_leave();
2137 
2138 	/* Retry if the proc is a shadow proc */
2139 	if (shadow_proc) {
2140 		if (bits) {
2141 			proc_rele(p);
2142 		}
2143 		goto retry;
2144 	}
2145 
2146 	if (__improbable(!bits)) {
2147 		return PROC_NULL;
2148 	}
2149 
2150 	if (__improbable(proc_ref_needs_wait_for_exec(bits))) {
2151 		p = proc_ref_wait_for_exec(p, bits, false);
2152 		/*
2153 		 * Retry if exec was successful since the old proc
2154 		 * would have become a shadow proc and might be in
2155 		 * middle of exiting.
2156 		 */
2157 		if (p == PROC_NULL || proc_is_shadow(p)) {
2158 			if (p != PROC_NULL) {
2159 				proc_rele(p);
2160 			}
2161 			goto retry;
2162 		}
2163 	}
2164 
2165 	return p;
2166 }
2167 
2168 proc_t
proc_find_locked(int pid)2169 proc_find_locked(int pid)
2170 {
2171 	proc_t p = PROC_NULL;
2172 
2173 retry:
2174 	p = phash_find_locked(pid);
2175 	if (p != PROC_NULL) {
2176 		uint32_t bits;
2177 
2178 		assert(!proc_is_shadow(p));
2179 
2180 		bits = proc_ref_try_fast(p);
2181 		if (__improbable(!bits)) {
2182 			return PROC_NULL;
2183 		}
2184 
2185 		if (__improbable(proc_ref_needs_wait_for_exec(bits))) {
2186 			p = proc_ref_wait_for_exec(p, bits, true);
2187 			/*
2188 			 * Retry if exec was successful since the old proc
2189 			 * would have become a shadow proc and might be in
2190 			 * middle of exiting.
2191 			 */
2192 			if (p == PROC_NULL || proc_is_shadow(p)) {
2193 				if (p != PROC_NULL) {
2194 					proc_rele(p);
2195 				}
2196 				goto retry;
2197 			}
2198 		}
2199 	}
2200 
2201 	return p;
2202 }
2203 
2204 proc_t
proc_findthread(thread_t thread)2205 proc_findthread(thread_t thread)
2206 {
2207 	proc_t p = PROC_NULL;
2208 
2209 	proc_list_lock();
2210 	{
2211 		p = (proc_t)(get_bsdthreadtask_info(thread));
2212 	}
2213 	p = proc_ref(p, true);
2214 	proc_list_unlock();
2215 	return p;
2216 }
2217 
2218 
2219 /*
2220  * Locate a zombie by PID
2221  */
2222 __private_extern__ proc_t
pzfind(pid_t pid)2223 pzfind(pid_t pid)
2224 {
2225 	proc_t p;
2226 
2227 
2228 	proc_list_lock();
2229 
2230 	LIST_FOREACH(p, &zombproc, p_list) {
2231 		if (proc_getpid(p) == pid && !proc_is_shadow(p)) {
2232 			break;
2233 		}
2234 	}
2235 
2236 	proc_list_unlock();
2237 
2238 	return p;
2239 }
2240 
2241 /*
2242  * Acquire a pgrp ref, if and only if the pgrp is non empty.
2243  */
2244 static inline bool
pg_ref_try(struct pgrp * pgrp)2245 pg_ref_try(struct pgrp *pgrp)
2246 {
2247 	return os_ref_retain_try_mask(&pgrp->pg_refcount, PGRP_REF_BITS,
2248 	           PGRP_REF_EMPTY, &p_refgrp);
2249 }
2250 
2251 static bool
pgrp_hash_obj_try_get(void * pgrp)2252 pgrp_hash_obj_try_get(void *pgrp)
2253 {
2254 	return pg_ref_try(pgrp);
2255 }
2256 /*
2257  * Unconditionally acquire a pgrp ref,
2258  * regardless of whether the pgrp is empty or not.
2259  */
2260 static inline struct pgrp *
pg_ref(struct pgrp * pgrp)2261 pg_ref(struct pgrp *pgrp)
2262 {
2263 	os_ref_retain_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp);
2264 	return pgrp;
2265 }
2266 
2267 SMRH_TRAITS_DEFINE_SCALAR(pgrp_hash_traits, struct pgrp, pg_id, pg_hash,
2268     .domain      = &smr_proc_task,
2269     .obj_try_get = pgrp_hash_obj_try_get);
2270 
2271 /*
2272  * Locate a process group by number
2273  */
2274 bool
pghash_exists_locked(pid_t pgid)2275 pghash_exists_locked(pid_t pgid)
2276 {
2277 	smrh_key_t key = SMRH_SCALAR_KEY(pgid);
2278 
2279 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2280 
2281 	return smr_hash_serialized_find(&pgrp_hash, key, &pgrp_hash_traits);
2282 }
2283 
2284 void
pghash_insert_locked(struct pgrp * pgrp)2285 pghash_insert_locked(struct pgrp *pgrp)
2286 {
2287 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2288 
2289 	smr_hash_serialized_insert(&pgrp_hash, &pgrp->pg_hash,
2290 	    &pgrp_hash_traits);
2291 }
2292 
2293 static void
pghash_remove_locked(struct pgrp * pgrp)2294 pghash_remove_locked(struct pgrp *pgrp)
2295 {
2296 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2297 
2298 	smr_hash_serialized_remove(&pgrp_hash, &pgrp->pg_hash,
2299 	    &pgrp_hash_traits);
2300 }
2301 
2302 struct pgrp *
pgrp_find(pid_t pgid)2303 pgrp_find(pid_t pgid)
2304 {
2305 	smrh_key_t key = SMRH_SCALAR_KEY(pgid);
2306 
2307 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2308 
2309 	return smr_hash_get(&pgrp_hash, key, &pgrp_hash_traits);
2310 }
2311 
2312 /* consumes one ref from pgrp */
2313 static void
pgrp_add_member(struct pgrp * pgrp,struct proc * parent,struct proc * p)2314 pgrp_add_member(struct pgrp *pgrp, struct proc *parent, struct proc *p)
2315 {
2316 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2317 
2318 	pgrp_lock(pgrp);
2319 	if (LIST_EMPTY(&pgrp->pg_members)) {
2320 		os_atomic_andnot(&pgrp->pg_refcount, PGRP_REF_EMPTY, relaxed);
2321 	}
2322 	if (parent != PROC_NULL) {
2323 		assert(pgrp == smr_serialized_load(&parent->p_pgrp));
2324 	}
2325 
2326 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
2327 	pgrp_unlock(pgrp);
2328 
2329 	p->p_pgrpid = pgrp->pg_id;
2330 	p->p_sessionid = pgrp->pg_session->s_sid;
2331 	smr_serialized_store(&p->p_pgrp, pgrp);
2332 }
2333 
2334 /* returns one ref from pgrp */
2335 static void
pgrp_del_member(struct pgrp * pgrp,struct proc * p)2336 pgrp_del_member(struct pgrp *pgrp, struct proc *p)
2337 {
2338 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2339 
2340 	pgrp_lock(pgrp);
2341 	LIST_REMOVE(p, p_pglist);
2342 	if (LIST_EMPTY(&pgrp->pg_members)) {
2343 		os_atomic_or(&pgrp->pg_refcount, PGRP_REF_EMPTY, relaxed);
2344 	}
2345 	pgrp_unlock(pgrp);
2346 }
2347 
2348 void
pgrp_rele(struct pgrp * pgrp)2349 pgrp_rele(struct pgrp * pgrp)
2350 {
2351 	if (pgrp == PGRP_NULL) {
2352 		return;
2353 	}
2354 
2355 	if (os_ref_release_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp) == 0) {
2356 		pgrp_destroy(pgrp);
2357 	}
2358 }
2359 
2360 struct session *
session_alloc(proc_t leader)2361 session_alloc(proc_t leader)
2362 {
2363 	struct session *sess;
2364 
2365 	sess = zalloc_flags(session_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2366 	lck_mtx_init(&sess->s_mlock, &proc_mlock_grp, &proc_lck_attr);
2367 	sess->s_leader = leader;
2368 	sess->s_sid = proc_getpid(leader);
2369 	sess->s_ttypgrpid = NO_PID;
2370 	os_atomic_init(&sess->s_ttydev, NODEV);
2371 	os_ref_init_mask(&sess->s_refcount, SESSION_REF_BITS,
2372 	    &p_refgrp, S_DEFAULT);
2373 
2374 	return sess;
2375 }
2376 
2377 struct tty *
session_set_tty_locked(struct session * sessp,struct tty * tp)2378 session_set_tty_locked(struct session *sessp, struct tty *tp)
2379 {
2380 	struct tty *old;
2381 
2382 	LCK_MTX_ASSERT(&sessp->s_mlock, LCK_MTX_ASSERT_OWNED);
2383 
2384 	old = sessp->s_ttyp;
2385 	ttyhold(tp);
2386 	sessp->s_ttyp = tp;
2387 	os_atomic_store(&sessp->s_ttydev, tp->t_dev, relaxed);
2388 
2389 	return old;
2390 }
2391 
2392 struct tty *
session_clear_tty_locked(struct session * sessp)2393 session_clear_tty_locked(struct session *sessp)
2394 {
2395 	struct tty *tp = sessp->s_ttyp;
2396 
2397 	LCK_MTX_ASSERT(&sessp->s_mlock, LCK_MTX_ASSERT_OWNED);
2398 	sessp->s_ttyvp = NULLVP;
2399 	sessp->s_ttyvid = 0;
2400 	sessp->s_ttyp = TTY_NULL;
2401 	sessp->s_ttypgrpid = NO_PID;
2402 	os_atomic_store(&sessp->s_ttydev, NODEV, relaxed);
2403 
2404 	return tp;
2405 }
2406 
2407 __attribute__((noinline))
2408 static void
session_destroy(struct session * sess)2409 session_destroy(struct session *sess)
2410 {
2411 	proc_list_lock();
2412 	LIST_REMOVE(sess, s_hash);
2413 	proc_list_unlock();
2414 
2415 	/*
2416 	 * Either the TTY was closed,
2417 	 * or proc_exit() destroyed it when the leader went away
2418 	 */
2419 	assert(sess->s_ttyp == TTY_NULL);
2420 
2421 	lck_mtx_destroy(&sess->s_mlock, &proc_mlock_grp);
2422 	zfree(session_zone, sess);
2423 }
2424 
2425 struct session *
session_ref(struct session * sess)2426 session_ref(struct session *sess)
2427 {
2428 	os_ref_retain_mask(&sess->s_refcount, SESSION_REF_BITS, &p_refgrp);
2429 	return sess;
2430 }
2431 
2432 void
session_rele(struct session * sess)2433 session_rele(struct session *sess)
2434 {
2435 	if (os_ref_release_mask(&sess->s_refcount, SESSION_REF_BITS, &p_refgrp) == 0) {
2436 		session_destroy(sess);
2437 	}
2438 }
2439 
2440 
2441 /*
2442  * Make a new process ready to become a useful member of society by making it
2443  * visible in all the right places and initialize its own lists to empty.
2444  *
2445  * Parameters:	parent			The parent of the process to insert
2446  *		child			The child process to insert
2447  *		in_exec			The child process is in exec
2448  *
2449  * Returns:	(void)
2450  *
2451  * Notes:	Insert a child process into the parents children list, assign
2452  *		the child the parent process pointer and PPID of the parent...
2453  */
2454 void
pinsertchild(proc_t parent,proc_t child,bool in_exec)2455 pinsertchild(proc_t parent, proc_t child, bool in_exec)
2456 {
2457 	LIST_INIT(&child->p_children);
2458 	proc_t sibling = parent;
2459 
2460 	/* For exec case, new proc is not a child of old proc, but its replacement */
2461 	if (in_exec) {
2462 		parent = proc_parent(parent);
2463 		assert(parent != PROC_NULL);
2464 
2465 		/* Copy the ptrace flags from sibling */
2466 		proc_lock(sibling);
2467 		child->p_oppid = sibling->p_oppid;
2468 		child->p_lflag |= (sibling->p_lflag & (P_LTRACED | P_LSIGEXC | P_LNOATTACH));
2469 		proc_unlock(sibling);
2470 	}
2471 
2472 	proc_list_lock();
2473 
2474 	child->p_pptr = parent;
2475 	child->p_ppid = proc_getpid(parent);
2476 	child->p_original_ppid = in_exec ? sibling->p_original_ppid : proc_getpid(parent);
2477 	child->p_puniqueid = proc_uniqueid(parent);
2478 	child->p_xhighbits = 0;
2479 #if CONFIG_MEMORYSTATUS
2480 	memorystatus_add(child, TRUE);
2481 #endif
2482 
2483 	/* If the parent is initproc and p_original pid is not 1, then set reparent flag */
2484 	if (in_exec && parent == initproc && child->p_original_ppid != 1) {
2485 		child->p_listflag |= P_LIST_DEADPARENT;
2486 	}
2487 
2488 	parent->p_childrencnt++;
2489 	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
2490 
2491 	LIST_INSERT_HEAD(&allproc, child, p_list);
2492 	/* mark the completion of proc creation */
2493 	os_atomic_andnot(&child->p_refcount, P_REF_NEW, relaxed);
2494 
2495 	proc_list_unlock();
2496 	if (in_exec) {
2497 		proc_rele(parent);
2498 	}
2499 }
2500 
2501 /*
2502  * Reparent all children of old proc to new proc.
2503  *
2504  * Parameters:	old process		Old process.
2505  *		new process		New process.
2506  *
2507  * Returns:	None.
2508  */
2509 void
p_reparentallchildren(proc_t old_proc,proc_t new_proc)2510 p_reparentallchildren(proc_t old_proc, proc_t new_proc)
2511 {
2512 	proc_t child;
2513 
2514 	LIST_INIT(&new_proc->p_children);
2515 
2516 	/* Wait for parent ref to drop */
2517 	proc_childdrainstart(old_proc);
2518 
2519 	/* Reparent child from old proc to new proc */
2520 	while ((child = old_proc->p_children.lh_first) != NULL) {
2521 		LIST_REMOVE(child, p_sibling);
2522 		old_proc->p_childrencnt--;
2523 		child->p_pptr = new_proc;
2524 		LIST_INSERT_HEAD(&new_proc->p_children, child, p_sibling);
2525 		new_proc->p_childrencnt++;
2526 	}
2527 
2528 	new_proc->si_pid = old_proc->si_pid;
2529 	new_proc->si_status = old_proc->si_status;
2530 	new_proc->si_code = old_proc->si_code;
2531 	new_proc->si_uid = old_proc->si_uid;
2532 
2533 	proc_childdrainend(old_proc);
2534 }
2535 
2536 /*
2537  * Move p to a new or existing process group (and session)
2538  *
2539  * Returns:	0			Success
2540  *		ESRCH			No such process
2541  */
2542 int
enterpgrp(proc_t p,pid_t pgid,int mksess)2543 enterpgrp(proc_t p, pid_t pgid, int mksess)
2544 {
2545 	struct pgrp *pgrp;
2546 	struct pgrp *mypgrp;
2547 	struct session *procsp;
2548 
2549 	pgrp = pgrp_find(pgid);
2550 	mypgrp = proc_pgrp(p, &procsp);
2551 
2552 #if DIAGNOSTIC
2553 	if (pgrp != NULL && mksess) {   /* firewalls */
2554 		panic("enterpgrp: setsid into non-empty pgrp");
2555 	}
2556 	if (SESS_LEADER(p, mypgrp->pg_session)) {
2557 		panic("enterpgrp: session leader attempted setpgrp");
2558 	}
2559 #endif
2560 	if (pgrp == PGRP_NULL) {
2561 		struct session *sess;
2562 		pid_t savepid = proc_getpid(p);
2563 		proc_t np = PROC_NULL;
2564 
2565 		/*
2566 		 * new process group
2567 		 */
2568 #if DIAGNOSTIC
2569 		if (proc_getpid(p) != pgid) {
2570 			panic("enterpgrp: new pgrp and pid != pgid");
2571 		}
2572 #endif
2573 		if ((np = proc_find(savepid)) == NULL || np != p) {
2574 			if (np != PROC_NULL) {
2575 				proc_rele(np);
2576 			}
2577 			pgrp_rele(mypgrp);
2578 			return ESRCH;
2579 		}
2580 		proc_rele(np);
2581 
2582 		pgrp = pgrp_alloc(pgid, PGRP_REF_EMPTY);
2583 
2584 		if (mksess) {
2585 			/*
2586 			 * new session
2587 			 */
2588 			sess = session_alloc(p);
2589 
2590 			bcopy(mypgrp->pg_session->s_login, sess->s_login,
2591 			    sizeof(sess->s_login));
2592 			os_atomic_andnot(&p->p_flag, P_CONTROLT, relaxed);
2593 		} else {
2594 			sess = session_ref(procsp);
2595 		}
2596 
2597 		proc_list_lock();
2598 		pgrp->pg_session = sess;
2599 		p->p_sessionid = sess->s_sid;
2600 		pghash_insert_locked(pgrp);
2601 		if (mksess) {
2602 			LIST_INSERT_HEAD(SESSHASH(sess->s_sid), sess, s_hash);
2603 		}
2604 		proc_list_unlock();
2605 	} else if (pgrp == mypgrp) {
2606 		pgrp_rele(pgrp);
2607 		pgrp_rele(mypgrp);
2608 		return 0;
2609 	}
2610 
2611 	/*
2612 	 * Adjust eligibility of affected pgrps to participate in job control.
2613 	 * Increment eligibility counts before decrementing, otherwise we
2614 	 * could reach 0 spuriously during the first call.
2615 	 */
2616 	fixjobc(p, pgrp, 1);
2617 	fixjobc(p, mypgrp, 0);
2618 
2619 	pgrp_rele(mypgrp);
2620 	pgrp_replace(p, pgrp);
2621 
2622 	return 0;
2623 }
2624 
2625 /*
2626  * remove process from process group
2627  */
2628 struct pgrp *
pgrp_leave_locked(proc_t p)2629 pgrp_leave_locked(proc_t p)
2630 {
2631 	struct pgrp *pg;
2632 
2633 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2634 
2635 	pg = smr_serialized_load(&p->p_pgrp);
2636 	pgrp_del_member(pg, p);
2637 	p->p_pgrpid = PGRPID_DEAD;
2638 	smr_clear_store(&p->p_pgrp);
2639 
2640 	return pg;
2641 }
2642 
2643 struct pgrp *
pgrp_enter_locked(struct proc * parent,struct proc * child)2644 pgrp_enter_locked(struct proc *parent, struct proc *child)
2645 {
2646 	struct pgrp *pgrp;
2647 
2648 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2649 
2650 	pgrp = pg_ref(smr_serialized_load(&parent->p_pgrp));
2651 	pgrp_add_member(pgrp, parent, child);
2652 	return pgrp;
2653 }
2654 
2655 /*
2656  * delete a process group
2657  */
2658 static void
pgrp_free(smr_node_t node)2659 pgrp_free(smr_node_t node)
2660 {
2661 	struct pgrp *pgrp = __container_of(node, struct pgrp, pg_smr_node);
2662 
2663 	zfree(pgrp_zone, pgrp);
2664 }
2665 
2666 __attribute__((noinline))
2667 static void
pgrp_destroy(struct pgrp * pgrp)2668 pgrp_destroy(struct pgrp *pgrp)
2669 {
2670 	struct session *sess;
2671 
2672 	assert(LIST_EMPTY(&pgrp->pg_members));
2673 	assert(os_ref_get_raw_mask(&pgrp->pg_refcount) & PGRP_REF_EMPTY);
2674 
2675 	proc_list_lock();
2676 	pghash_remove_locked(pgrp);
2677 	proc_list_unlock();
2678 
2679 	sess = pgrp->pg_session;
2680 	pgrp->pg_session = SESSION_NULL;
2681 	session_rele(sess);
2682 
2683 	lck_mtx_destroy(&pgrp->pg_mlock, &proc_mlock_grp);
2684 	if (os_ref_release_raw(&pgrp->pg_hashref, &p_refgrp) == 0) {
2685 		smr_proc_task_call(&pgrp->pg_smr_node, sizeof(*pgrp), pgrp_free);
2686 	}
2687 }
2688 
2689 
2690 /*
2691  * Adjust pgrp jobc counters when specified process changes process group.
2692  * We count the number of processes in each process group that "qualify"
2693  * the group for terminal job control (those with a parent in a different
2694  * process group of the same session).  If that count reaches zero, the
2695  * process group becomes orphaned.  Check both the specified process'
2696  * process group and that of its children.
2697  * entering == 0 => p is leaving specified group.
2698  * entering == 1 => p is entering specified group.
2699  */
2700 int
fixjob_callback(proc_t p,void * arg)2701 fixjob_callback(proc_t p, void * arg)
2702 {
2703 	struct fixjob_iterargs *fp;
2704 	struct pgrp * pg, *hispg;
2705 	struct session * mysession, *hissess;
2706 	int entering;
2707 
2708 	fp = (struct fixjob_iterargs *)arg;
2709 	pg = fp->pg;
2710 	mysession = fp->mysession;
2711 	entering = fp->entering;
2712 
2713 	hispg = proc_pgrp(p, &hissess);
2714 
2715 	if (hispg != pg && hissess == mysession) {
2716 		pgrp_lock(hispg);
2717 		if (entering) {
2718 			hispg->pg_jobc++;
2719 			pgrp_unlock(hispg);
2720 		} else if (--hispg->pg_jobc == 0) {
2721 			pgrp_unlock(hispg);
2722 			orphanpg(hispg);
2723 		} else {
2724 			pgrp_unlock(hispg);
2725 		}
2726 	}
2727 	pgrp_rele(hispg);
2728 
2729 	return PROC_RETURNED;
2730 }
2731 
2732 void
fixjobc(proc_t p,struct pgrp * pgrp,int entering)2733 fixjobc(proc_t p, struct pgrp *pgrp, int entering)
2734 {
2735 	struct pgrp *hispgrp = PGRP_NULL;
2736 	struct session *hissess = SESSION_NULL;
2737 	struct session *mysession = pgrp->pg_session;
2738 	proc_t parent;
2739 	struct fixjob_iterargs fjarg;
2740 	boolean_t proc_parent_self;
2741 
2742 	/*
2743 	 * Check if p's parent is current proc, if yes then no need to take
2744 	 * a ref; calling proc_parent with current proc as parent may
2745 	 * deadlock if current proc is exiting.
2746 	 */
2747 	proc_parent_self = proc_parent_is_currentproc(p);
2748 	if (proc_parent_self) {
2749 		parent = current_proc();
2750 	} else {
2751 		parent = proc_parent(p);
2752 	}
2753 
2754 	if (parent != PROC_NULL) {
2755 		hispgrp = proc_pgrp(parent, &hissess);
2756 		if (!proc_parent_self) {
2757 			proc_rele(parent);
2758 		}
2759 	}
2760 
2761 	/*
2762 	 * Check p's parent to see whether p qualifies its own process
2763 	 * group; if so, adjust count for p's process group.
2764 	 */
2765 	if (hispgrp != pgrp && hissess == mysession) {
2766 		pgrp_lock(pgrp);
2767 		if (entering) {
2768 			pgrp->pg_jobc++;
2769 			pgrp_unlock(pgrp);
2770 		} else if (--pgrp->pg_jobc == 0) {
2771 			pgrp_unlock(pgrp);
2772 			orphanpg(pgrp);
2773 		} else {
2774 			pgrp_unlock(pgrp);
2775 		}
2776 	}
2777 
2778 	pgrp_rele(hispgrp);
2779 
2780 	/*
2781 	 * Check this process' children to see whether they qualify
2782 	 * their process groups; if so, adjust counts for children's
2783 	 * process groups.
2784 	 */
2785 	fjarg.pg = pgrp;
2786 	fjarg.mysession = mysession;
2787 	fjarg.entering = entering;
2788 	proc_childrenwalk(p, fixjob_callback, &fjarg);
2789 }
2790 
2791 /*
2792  * The pidlist_* routines support the functions in this file that
2793  * walk lists of processes applying filters and callouts to the
2794  * elements of the list.
2795  *
2796  * A prior implementation used a single linear array, which can be
2797  * tricky to allocate on large systems. This implementation creates
2798  * an SLIST of modestly sized arrays of PIDS_PER_ENTRY elements.
2799  *
2800  * The array should be sized large enough to keep the overhead of
2801  * walking the list low, but small enough that blocking allocations of
2802  * pidlist_entry_t structures always succeed.
2803  */
2804 
2805 #define PIDS_PER_ENTRY 1021
2806 
2807 typedef struct pidlist_entry {
2808 	SLIST_ENTRY(pidlist_entry) pe_link;
2809 	u_int pe_nused;
2810 	pid_t pe_pid[PIDS_PER_ENTRY];
2811 } pidlist_entry_t;
2812 
2813 typedef struct {
2814 	SLIST_HEAD(, pidlist_entry) pl_head;
2815 	struct pidlist_entry *pl_active;
2816 	u_int pl_nalloc;
2817 } pidlist_t;
2818 
2819 static __inline__ pidlist_t *
pidlist_init(pidlist_t * pl)2820 pidlist_init(pidlist_t *pl)
2821 {
2822 	SLIST_INIT(&pl->pl_head);
2823 	pl->pl_active = NULL;
2824 	pl->pl_nalloc = 0;
2825 	return pl;
2826 }
2827 
2828 static u_int
pidlist_alloc(pidlist_t * pl,u_int needed)2829 pidlist_alloc(pidlist_t *pl, u_int needed)
2830 {
2831 	while (pl->pl_nalloc < needed) {
2832 		pidlist_entry_t *pe = kalloc_type(pidlist_entry_t,
2833 		    Z_WAITOK | Z_ZERO | Z_NOFAIL);
2834 		SLIST_INSERT_HEAD(&pl->pl_head, pe, pe_link);
2835 		pl->pl_nalloc += (sizeof(pe->pe_pid) / sizeof(pe->pe_pid[0]));
2836 	}
2837 	return pl->pl_nalloc;
2838 }
2839 
2840 static void
pidlist_free(pidlist_t * pl)2841 pidlist_free(pidlist_t *pl)
2842 {
2843 	pidlist_entry_t *pe;
2844 	while (NULL != (pe = SLIST_FIRST(&pl->pl_head))) {
2845 		SLIST_FIRST(&pl->pl_head) = SLIST_NEXT(pe, pe_link);
2846 		kfree_type(pidlist_entry_t, pe);
2847 	}
2848 	pl->pl_nalloc = 0;
2849 }
2850 
2851 static __inline__ void
pidlist_set_active(pidlist_t * pl)2852 pidlist_set_active(pidlist_t *pl)
2853 {
2854 	pl->pl_active = SLIST_FIRST(&pl->pl_head);
2855 	assert(pl->pl_active);
2856 }
2857 
2858 static void
pidlist_add_pid(pidlist_t * pl,pid_t pid)2859 pidlist_add_pid(pidlist_t *pl, pid_t pid)
2860 {
2861 	pidlist_entry_t *pe = pl->pl_active;
2862 	if (pe->pe_nused >= sizeof(pe->pe_pid) / sizeof(pe->pe_pid[0])) {
2863 		if (NULL == (pe = SLIST_NEXT(pe, pe_link))) {
2864 			panic("pidlist allocation exhausted");
2865 		}
2866 		pl->pl_active = pe;
2867 	}
2868 	pe->pe_pid[pe->pe_nused++] = pid;
2869 }
2870 
2871 static __inline__ u_int
pidlist_nalloc(const pidlist_t * pl)2872 pidlist_nalloc(const pidlist_t *pl)
2873 {
2874 	return pl->pl_nalloc;
2875 }
2876 
2877 /*
2878  * A process group has become orphaned; if there are any stopped processes in
2879  * the group, hang-up all process in that group.
2880  */
2881 static void
orphanpg(struct pgrp * pgrp)2882 orphanpg(struct pgrp *pgrp)
2883 {
2884 	pidlist_t pid_list, *pl = pidlist_init(&pid_list);
2885 	u_int pid_count_available = 0;
2886 	proc_t p;
2887 
2888 	/* allocate outside of the pgrp_lock */
2889 	for (;;) {
2890 		pgrp_lock(pgrp);
2891 
2892 		boolean_t should_iterate = FALSE;
2893 		pid_count_available = 0;
2894 
2895 		PGMEMBERS_FOREACH(pgrp, p) {
2896 			pid_count_available++;
2897 			if (p->p_stat == SSTOP) {
2898 				should_iterate = TRUE;
2899 			}
2900 		}
2901 		if (pid_count_available == 0 || !should_iterate) {
2902 			pgrp_unlock(pgrp);
2903 			goto out; /* no orphaned processes OR nothing stopped */
2904 		}
2905 		if (pidlist_nalloc(pl) >= pid_count_available) {
2906 			break;
2907 		}
2908 		pgrp_unlock(pgrp);
2909 
2910 		pidlist_alloc(pl, pid_count_available);
2911 	}
2912 	pidlist_set_active(pl);
2913 
2914 	u_int pid_count = 0;
2915 	PGMEMBERS_FOREACH(pgrp, p) {
2916 		pidlist_add_pid(pl, proc_pid(p));
2917 		if (++pid_count >= pid_count_available) {
2918 			break;
2919 		}
2920 	}
2921 	pgrp_unlock(pgrp);
2922 
2923 	const pidlist_entry_t *pe;
2924 	SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
2925 		for (u_int i = 0; i < pe->pe_nused; i++) {
2926 			const pid_t pid = pe->pe_pid[i];
2927 			if (0 == pid) {
2928 				continue; /* skip kernproc */
2929 			}
2930 			p = proc_find(pid);
2931 			if (!p) {
2932 				continue;
2933 			}
2934 			proc_transwait(p, 0);
2935 			pt_setrunnable(p);
2936 			psignal(p, SIGHUP);
2937 			psignal(p, SIGCONT);
2938 			proc_rele(p);
2939 		}
2940 	}
2941 out:
2942 	pidlist_free(pl);
2943 }
2944 
2945 boolean_t
proc_is_translated(proc_t p)2946 proc_is_translated(proc_t p)
2947 {
2948 	return p && ((p->p_flag & P_TRANSLATED) != 0);
2949 }
2950 
2951 
2952 
2953 int
proc_is_classic(proc_t p __unused)2954 proc_is_classic(proc_t p __unused)
2955 {
2956 	return 0;
2957 }
2958 
2959 bool
proc_is_exotic(proc_t p)2960 proc_is_exotic(
2961 	proc_t p)
2962 {
2963 	if (p == NULL) {
2964 		return false;
2965 	}
2966 	return task_is_exotic(proc_task(p));
2967 }
2968 
2969 bool
proc_is_alien(proc_t p)2970 proc_is_alien(
2971 	proc_t p)
2972 {
2973 	if (p == NULL) {
2974 		return false;
2975 	}
2976 	return task_is_alien(proc_task(p));
2977 }
2978 
2979 bool
proc_is_driver(proc_t p)2980 proc_is_driver(proc_t p)
2981 {
2982 	if (p == NULL) {
2983 		return false;
2984 	}
2985 	return task_is_driver(proc_task(p));
2986 }
2987 
2988 bool
proc_is_third_party_debuggable_driver(proc_t p)2989 proc_is_third_party_debuggable_driver(proc_t p)
2990 {
2991 #if XNU_TARGET_OS_IOS
2992 	uint64_t csflags;
2993 	if (proc_csflags(p, &csflags) != 0) {
2994 		return false;
2995 	}
2996 
2997 	if (proc_is_driver(p) &&
2998 	    !csproc_get_platform_binary(p) &&
2999 	    IOTaskHasEntitlement(proc_task(p), kIODriverKitEntitlementKey) &&
3000 	    (csflags & CS_GET_TASK_ALLOW) != 0) {
3001 		return true;
3002 	}
3003 
3004 	return false;
3005 
3006 #else
3007 	/* On other platforms, fall back to existing rules for debugging */
3008 	(void)p;
3009 	return false;
3010 #endif /* XNU_TARGET_OS_IOS */
3011 }
3012 
3013 /* XXX Why does this function exist?  Need to kill it off... */
3014 proc_t
current_proc_EXTERNAL(void)3015 current_proc_EXTERNAL(void)
3016 {
3017 	return current_proc();
3018 }
3019 
3020 int
proc_is_forcing_hfs_case_sensitivity(proc_t p)3021 proc_is_forcing_hfs_case_sensitivity(proc_t p)
3022 {
3023 	return (p->p_vfs_iopolicy & P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY) ? 1 : 0;
3024 }
3025 
3026 bool
proc_ignores_content_protection(proc_t p)3027 proc_ignores_content_protection(proc_t p)
3028 {
3029 	return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_CONTENT_PROTECTION;
3030 }
3031 
3032 bool
proc_ignores_node_permissions(proc_t p)3033 proc_ignores_node_permissions(proc_t p)
3034 {
3035 	return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS;
3036 }
3037 
3038 bool
proc_skip_mtime_update(proc_t p)3039 proc_skip_mtime_update(proc_t p)
3040 {
3041 	struct uthread *ut = NULL;
3042 
3043 	/*
3044 	 * We only check the thread's policy if the current proc matches the given
3045 	 * proc.
3046 	 */
3047 	if (current_proc() == p) {
3048 		ut = get_bsdthread_info(current_thread());
3049 	}
3050 
3051 	if (ut && (os_atomic_load(&ut->uu_flag, relaxed) & UT_SKIP_MTIME_UPDATE)) {
3052 		return true;
3053 	}
3054 
3055 	if (p && (os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_SKIP_MTIME_UPDATE)) {
3056 		return true;
3057 	}
3058 
3059 	return false;
3060 }
3061 
3062 bool
proc_allow_low_space_writes(proc_t p)3063 proc_allow_low_space_writes(proc_t p)
3064 {
3065 	return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_ALLOW_LOW_SPACE_WRITES;
3066 }
3067 
3068 bool
proc_disallow_rw_for_o_evtonly(proc_t p)3069 proc_disallow_rw_for_o_evtonly(proc_t p)
3070 {
3071 	return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_DISALLOW_RW_FOR_O_EVTONLY;
3072 }
3073 
3074 bool
proc_use_alternative_symlink_ea(proc_t p)3075 proc_use_alternative_symlink_ea(proc_t p)
3076 {
3077 	return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_ALTLINK;
3078 }
3079 
3080 bool
proc_is_rsr(proc_t p)3081 proc_is_rsr(proc_t p)
3082 {
3083 	return os_atomic_load(&p->p_ladvflag, relaxed) & P_RSR;
3084 }
3085 
3086 #if CONFIG_COREDUMP
3087 /*
3088  * proc_core_name(format, name, uid, pid)
3089  * Expand the name described in format, using name, uid, and pid.
3090  * format is a printf-like string, with four format specifiers:
3091  *	%N	name of process ("name")
3092  *	%P	process id (pid)
3093  *	%U	user id (uid)
3094  *	%T  mach_continuous_time() timestamp
3095  * For example, "%N.core" is the default; they can be disabled completely
3096  * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
3097  * This is controlled by the sysctl variable kern.corefile (see above).
3098  */
3099 __private_extern__ int
proc_core_name(const char * format,const char * name,uid_t uid,pid_t pid,char * cf_name,size_t cf_name_len)3100 proc_core_name(const char *format, const char * name, uid_t uid, pid_t pid, char *cf_name,
3101     size_t cf_name_len)
3102 {
3103 	const char *appendstr;
3104 	char id_buf[sizeof(OS_STRINGIFY(INT32_MAX))];          /* Buffer for pid/uid -- max 4B */
3105 	_Static_assert(sizeof(id_buf) == 11, "size mismatch");
3106 	char timestamp_buf[sizeof(OS_STRINGIFY(UINT64_MAX))];  /* Buffer for timestamp, including null terminator */
3107 	clock_sec_t secs = 0;
3108 	_Static_assert(sizeof(clock_sec_t) <= sizeof(uint64_t), "size mismatch");
3109 	clock_usec_t microsecs = 0;
3110 	size_t i, l, n;
3111 
3112 	if (cf_name == NULL) {
3113 		goto toolong;
3114 	}
3115 
3116 	for (i = 0, n = 0; n < cf_name_len && format[i]; i++) {
3117 		switch (format[i]) {
3118 		case '%':       /* Format character */
3119 			i++;
3120 			switch (format[i]) {
3121 			case '%':
3122 				appendstr = "%";
3123 				break;
3124 			case 'N':       /* process name */
3125 				appendstr = name;
3126 				break;
3127 			case 'P':       /* process id */
3128 				snprintf(id_buf, sizeof(id_buf), "%u", pid);
3129 				appendstr = id_buf;
3130 				break;
3131 			case 'U':       /* user id */
3132 				snprintf(id_buf, sizeof(id_buf), "%u", uid);
3133 				appendstr = id_buf;
3134 				break;
3135 			case 'T':       /* MCT timestamp */
3136 				snprintf(timestamp_buf, sizeof(timestamp_buf), "%llu", mach_continuous_time());
3137 				appendstr = timestamp_buf;
3138 				break;
3139 			case 't':       /* Unix timestamp */
3140 				clock_gettimeofday(&secs, &microsecs);
3141 				snprintf(timestamp_buf, sizeof(timestamp_buf), "%lu", secs);
3142 				appendstr = timestamp_buf;
3143 				break;
3144 			case '\0': /* format string ended in % symbol */
3145 				goto endofstring;
3146 			default:
3147 				appendstr = "";
3148 				log(LOG_ERR,
3149 				    "Unknown format character %c in `%s'\n",
3150 				    format[i], format);
3151 			}
3152 			l = strlen(appendstr);
3153 			if ((n + l) >= cf_name_len) {
3154 				goto toolong;
3155 			}
3156 			bcopy(appendstr, cf_name + n, l);
3157 			n += l;
3158 			break;
3159 		default:
3160 			cf_name[n++] = format[i];
3161 		}
3162 	}
3163 	if (format[i] != '\0') {
3164 		goto toolong;
3165 	}
3166 	return 0;
3167 toolong:
3168 	log(LOG_ERR, "pid %ld (%s), uid (%u): corename is too long\n",
3169 	    (long)pid, name, (uint32_t)uid);
3170 	return 1;
3171 endofstring:
3172 	log(LOG_ERR, "pid %ld (%s), uid (%u): unexpected end of string after %% token\n",
3173 	    (long)pid, name, (uint32_t)uid);
3174 	return 1;
3175 }
3176 #endif /* CONFIG_COREDUMP */
3177 
3178 /* Code Signing related routines */
3179 
3180 int
csops(__unused proc_t p,struct csops_args * uap,__unused int32_t * retval)3181 csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval)
3182 {
3183 	return csops_internal(uap->pid, uap->ops, uap->useraddr,
3184 	           uap->usersize, USER_ADDR_NULL);
3185 }
3186 
3187 int
csops_audittoken(__unused proc_t p,struct csops_audittoken_args * uap,__unused int32_t * retval)3188 csops_audittoken(__unused proc_t p, struct csops_audittoken_args *uap, __unused int32_t *retval)
3189 {
3190 	if (uap->uaudittoken == USER_ADDR_NULL) {
3191 		return EINVAL;
3192 	}
3193 	return csops_internal(uap->pid, uap->ops, uap->useraddr,
3194 	           uap->usersize, uap->uaudittoken);
3195 }
3196 
3197 static int
csops_copy_token(const void * start,size_t length,user_size_t usize,user_addr_t uaddr)3198 csops_copy_token(const void *start, size_t length, user_size_t usize, user_addr_t uaddr)
3199 {
3200 	char fakeheader[8] = { 0 };
3201 	int error;
3202 
3203 	if (usize < sizeof(fakeheader)) {
3204 		return ERANGE;
3205 	}
3206 
3207 	/* if no blob, fill in zero header */
3208 	if (NULL == start) {
3209 		start = fakeheader;
3210 		length = sizeof(fakeheader);
3211 	} else if (usize < length) {
3212 		/* ... if input too short, copy out length of entitlement */
3213 		uint32_t length32 = htonl((uint32_t)length);
3214 		memcpy(&fakeheader[4], &length32, sizeof(length32));
3215 
3216 		error = copyout(fakeheader, uaddr, sizeof(fakeheader));
3217 		if (error == 0) {
3218 			return ERANGE; /* input buffer to short, ERANGE signals that */
3219 		}
3220 		return error;
3221 	}
3222 	return copyout(start, uaddr, length);
3223 }
3224 
3225 static int
csops_internal(pid_t pid,int ops,user_addr_t uaddr,user_size_t usersize,user_addr_t uaudittoken)3226 csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaudittoken)
3227 {
3228 	size_t usize = (size_t)CAST_DOWN(size_t, usersize);
3229 	proc_t pt;
3230 	int forself;
3231 	int error;
3232 	vnode_t tvp;
3233 	off_t toff;
3234 	unsigned char cdhash[SHA1_RESULTLEN];
3235 	audit_token_t token;
3236 	unsigned int upid = 0, uidversion = 0;
3237 
3238 	forself = error = 0;
3239 
3240 	if (pid == 0) {
3241 		pid = proc_selfpid();
3242 	}
3243 	if (pid == proc_selfpid()) {
3244 		forself = 1;
3245 	}
3246 
3247 
3248 	switch (ops) {
3249 	case CS_OPS_STATUS:
3250 	case CS_OPS_CDHASH:
3251 	case CS_OPS_PIDOFFSET:
3252 	case CS_OPS_ENTITLEMENTS_BLOB:
3253 	case CS_OPS_DER_ENTITLEMENTS_BLOB:
3254 	case CS_OPS_IDENTITY:
3255 	case CS_OPS_BLOB:
3256 	case CS_OPS_TEAMID:
3257 	case CS_OPS_CLEAR_LV:
3258 	case CS_OPS_VALIDATION_CATEGORY:
3259 		break;          /* not restricted to root */
3260 	default:
3261 		if (forself == 0 && kauth_cred_issuser(kauth_cred_get()) != TRUE) {
3262 			return EPERM;
3263 		}
3264 		break;
3265 	}
3266 
3267 	pt = proc_find(pid);
3268 	if (pt == PROC_NULL) {
3269 		return ESRCH;
3270 	}
3271 
3272 	upid = proc_getpid(pt);
3273 	uidversion = proc_pidversion(pt);
3274 	if (uaudittoken != USER_ADDR_NULL) {
3275 		error = copyin(uaudittoken, &token, sizeof(audit_token_t));
3276 		if (error != 0) {
3277 			goto out;
3278 		}
3279 		/* verify the audit token pid/idversion matches with proc */
3280 		if ((token.val[5] != upid) || (token.val[7] != uidversion)) {
3281 			error = ESRCH;
3282 			goto out;
3283 		}
3284 	}
3285 
3286 #if CONFIG_MACF
3287 	switch (ops) {
3288 	case CS_OPS_MARKINVALID:
3289 	case CS_OPS_MARKHARD:
3290 	case CS_OPS_MARKKILL:
3291 	case CS_OPS_MARKRESTRICT:
3292 	case CS_OPS_SET_STATUS:
3293 	case CS_OPS_CLEARINSTALLER:
3294 	case CS_OPS_CLEARPLATFORM:
3295 	case CS_OPS_CLEAR_LV:
3296 		if ((error = mac_proc_check_set_cs_info(current_proc(), pt, ops))) {
3297 			goto out;
3298 		}
3299 		break;
3300 	default:
3301 		if ((error = mac_proc_check_get_cs_info(current_proc(), pt, ops))) {
3302 			goto out;
3303 		}
3304 	}
3305 #endif
3306 
3307 	switch (ops) {
3308 	case CS_OPS_STATUS: {
3309 		uint32_t retflags;
3310 
3311 		proc_lock(pt);
3312 		retflags = (uint32_t)proc_getcsflags(pt);
3313 		if (cs_process_enforcement(pt)) {
3314 			retflags |= CS_ENFORCEMENT;
3315 		}
3316 		if (csproc_get_platform_binary(pt)) {
3317 			retflags |= CS_PLATFORM_BINARY;
3318 		}
3319 		if (csproc_get_platform_path(pt)) {
3320 			retflags |= CS_PLATFORM_PATH;
3321 		}
3322 		//Don't return CS_REQUIRE_LV if we turned it on with CS_FORCED_LV but still report CS_FORCED_LV
3323 		if ((proc_getcsflags(pt) & CS_FORCED_LV) == CS_FORCED_LV) {
3324 			retflags &= (~CS_REQUIRE_LV);
3325 		}
3326 		proc_unlock(pt);
3327 
3328 		if (uaddr != USER_ADDR_NULL) {
3329 			error = copyout(&retflags, uaddr, sizeof(uint32_t));
3330 		}
3331 		break;
3332 	}
3333 	case CS_OPS_MARKINVALID:
3334 		proc_lock(pt);
3335 		if ((proc_getcsflags(pt) & CS_VALID) == CS_VALID) {           /* is currently valid */
3336 			proc_csflags_clear(pt, CS_VALID);       /* set invalid */
3337 			cs_process_invalidated(pt);
3338 			if ((proc_getcsflags(pt) & CS_KILL) == CS_KILL) {
3339 				proc_csflags_set(pt, CS_KILLED);
3340 				proc_unlock(pt);
3341 				if (cs_debug) {
3342 					printf("CODE SIGNING: marked invalid by pid %d: "
3343 					    "p=%d[%s] honoring CS_KILL, final status 0x%x\n",
3344 					    proc_selfpid(), proc_getpid(pt), pt->p_comm,
3345 					    (unsigned int)proc_getcsflags(pt));
3346 				}
3347 				psignal(pt, SIGKILL);
3348 			} else {
3349 				proc_unlock(pt);
3350 			}
3351 		} else {
3352 			proc_unlock(pt);
3353 		}
3354 
3355 		break;
3356 
3357 	case CS_OPS_MARKHARD:
3358 		proc_lock(pt);
3359 		proc_csflags_set(pt, CS_HARD);
3360 		if ((proc_getcsflags(pt) & CS_VALID) == 0) {
3361 			/* @@@ allow? reject? kill? @@@ */
3362 			proc_unlock(pt);
3363 			error = EINVAL;
3364 			goto out;
3365 		} else {
3366 			proc_unlock(pt);
3367 		}
3368 		break;
3369 
3370 	case CS_OPS_MARKKILL:
3371 		proc_lock(pt);
3372 		proc_csflags_set(pt, CS_KILL);
3373 		if ((proc_getcsflags(pt) & CS_VALID) == 0) {
3374 			proc_unlock(pt);
3375 			psignal(pt, SIGKILL);
3376 		} else {
3377 			proc_unlock(pt);
3378 		}
3379 		break;
3380 
3381 	case CS_OPS_PIDOFFSET:
3382 		toff = pt->p_textoff;
3383 		proc_rele(pt);
3384 		error = copyout(&toff, uaddr, sizeof(toff));
3385 		return error;
3386 
3387 	case CS_OPS_CDHASH:
3388 
3389 		/* pt already holds a reference on its p_textvp */
3390 		tvp = pt->p_textvp;
3391 		toff = pt->p_textoff;
3392 
3393 		if (tvp == NULLVP || usize != SHA1_RESULTLEN) {
3394 			proc_rele(pt);
3395 			return EINVAL;
3396 		}
3397 
3398 		error = vn_getcdhash(tvp, toff, cdhash);
3399 		proc_rele(pt);
3400 
3401 		if (error == 0) {
3402 			error = copyout(cdhash, uaddr, sizeof(cdhash));
3403 		}
3404 
3405 		return error;
3406 
3407 	case CS_OPS_ENTITLEMENTS_BLOB: {
3408 		void *start;
3409 		size_t length;
3410 		struct cs_blob* blob;
3411 
3412 		proc_lock(pt);
3413 		if ((proc_getcsflags(pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3414 			proc_unlock(pt);
3415 			error = EINVAL;
3416 			goto out;
3417 		}
3418 		blob = csproc_get_blob(pt);
3419 		proc_unlock(pt);
3420 
3421 		if (!blob) {
3422 			error = EBADEXEC;
3423 			goto out;
3424 		}
3425 
3426 		void* osent = csblob_os_entitlements_get(blob);
3427 		if (!osent) {
3428 			goto out;
3429 		}
3430 		CS_GenericBlob* xmlblob = NULL;
3431 		if (amfi->OSEntitlements_get_xml(osent, &xmlblob)) {
3432 			start = (void*)xmlblob;
3433 			length = (size_t)ntohl(xmlblob->length);
3434 		} else {
3435 			goto out;
3436 		}
3437 
3438 		error = csops_copy_token(start, length, usize, uaddr);
3439 		kfree_data(start, length);
3440 		goto out;
3441 	}
3442 	case CS_OPS_DER_ENTITLEMENTS_BLOB: {
3443 		const void *start;
3444 		size_t length;
3445 		struct cs_blob* blob;
3446 
3447 		proc_lock(pt);
3448 		if ((proc_getcsflags(pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3449 			proc_unlock(pt);
3450 			error = EINVAL;
3451 			goto out;
3452 		}
3453 		blob = csproc_get_blob(pt);
3454 		proc_unlock(pt);
3455 
3456 		if (!blob) {
3457 			error = EBADEXEC;
3458 			goto out;
3459 		}
3460 
3461 		error = csblob_get_der_entitlements(blob, (const CS_GenericBlob **)&start, &length);
3462 		if (error || start == NULL) {
3463 			if (amfi && csblob_os_entitlements_get(blob)) {
3464 				void* osent = csblob_os_entitlements_get(blob);
3465 
3466 				const CS_GenericBlob* transmuted = NULL;
3467 				if (amfi->OSEntitlements_get_transmuted(osent, &transmuted)) {
3468 					start = transmuted;
3469 					length = (size_t)ntohl(transmuted->length);
3470 				} else {
3471 					goto out;
3472 				}
3473 			} else {
3474 				goto out;
3475 			}
3476 		}
3477 
3478 		error = csops_copy_token(start, length, usize, uaddr);
3479 		goto out;
3480 	}
3481 
3482 	case CS_OPS_VALIDATION_CATEGORY:
3483 	{
3484 		unsigned int validation_category = CS_VALIDATION_CATEGORY_INVALID;
3485 		error = csproc_get_validation_category(pt, &validation_category);
3486 		if (error) {
3487 			goto out;
3488 		}
3489 		error = copyout(&validation_category, uaddr, sizeof(validation_category));
3490 		break;
3491 	}
3492 
3493 	case CS_OPS_MARKRESTRICT:
3494 		proc_lock(pt);
3495 		proc_csflags_set(pt, CS_RESTRICT);
3496 		proc_unlock(pt);
3497 		break;
3498 
3499 	case CS_OPS_SET_STATUS: {
3500 		uint32_t flags;
3501 
3502 		if (usize < sizeof(flags)) {
3503 			error = ERANGE;
3504 			break;
3505 		}
3506 
3507 		error = copyin(uaddr, &flags, sizeof(flags));
3508 		if (error) {
3509 			break;
3510 		}
3511 
3512 		/* only allow setting a subset of all code sign flags */
3513 		flags &=
3514 		    CS_HARD | CS_EXEC_SET_HARD |
3515 		    CS_KILL | CS_EXEC_SET_KILL |
3516 		    CS_RESTRICT |
3517 		    CS_REQUIRE_LV |
3518 		    CS_ENFORCEMENT | CS_EXEC_SET_ENFORCEMENT;
3519 
3520 		proc_lock(pt);
3521 		if (proc_getcsflags(pt) & CS_VALID) {
3522 			if ((flags & CS_ENFORCEMENT) &&
3523 			    !(proc_getcsflags(pt) & CS_ENFORCEMENT)) {
3524 				vm_map_cs_enforcement_set(get_task_map(proc_task(pt)), TRUE);
3525 			}
3526 			proc_csflags_set(pt, flags);
3527 		} else {
3528 			error = EINVAL;
3529 		}
3530 		proc_unlock(pt);
3531 
3532 		break;
3533 	}
3534 	case CS_OPS_CLEAR_LV: {
3535 		/*
3536 		 * This option is used to remove library validation from
3537 		 * a running process. This is used in plugin architectures
3538 		 * when a program needs to load untrusted libraries. This
3539 		 * allows the process to maintain library validation as
3540 		 * long as possible, then drop it only when required.
3541 		 * Once a process has loaded the untrusted library,
3542 		 * relying on library validation in the future will
3543 		 * not be effective. An alternative is to re-exec
3544 		 * your application without library validation, or
3545 		 * fork an untrusted child.
3546 		 */
3547 #if !defined(XNU_TARGET_OS_OSX)
3548 		// We only support dropping library validation on macOS
3549 		error = ENOTSUP;
3550 #else
3551 		/*
3552 		 * if we have the flag set, and the caller wants
3553 		 * to remove it, and they're entitled to, then
3554 		 * we remove it from the csflags
3555 		 *
3556 		 * NOTE: We are fine to poke into the task because
3557 		 * we get a ref to pt when we do the proc_find
3558 		 * at the beginning of this function.
3559 		 *
3560 		 * We also only allow altering ourselves.
3561 		 */
3562 		if (forself == 1 && IOTaskHasEntitlement(proc_task(pt), CLEAR_LV_ENTITLEMENT)) {
3563 			proc_lock(pt);
3564 			if (!(proc_getcsflags(pt) & CS_INSTALLER)) {
3565 				proc_csflags_clear(pt, CS_REQUIRE_LV | CS_FORCED_LV);
3566 				error = 0;
3567 			} else {
3568 				error = EPERM;
3569 			}
3570 			proc_unlock(pt);
3571 		} else {
3572 			error = EPERM;
3573 		}
3574 #endif
3575 		break;
3576 	}
3577 	case CS_OPS_BLOB: {
3578 		void *start;
3579 		size_t length;
3580 
3581 		proc_lock(pt);
3582 		if ((proc_getcsflags(pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3583 			proc_unlock(pt);
3584 			error = EINVAL;
3585 			break;
3586 		}
3587 		proc_unlock(pt);
3588 		// Don't need to lock here as not accessing CSFLAGS
3589 		error = cs_blob_get(pt, &start, &length);
3590 		if (error) {
3591 			goto out;
3592 		}
3593 
3594 		error = csops_copy_token(start, length, usize, uaddr);
3595 		goto out;
3596 	}
3597 	case CS_OPS_IDENTITY:
3598 	case CS_OPS_TEAMID: {
3599 		const char *identity;
3600 		uint8_t fakeheader[8];
3601 		uint32_t idlen;
3602 		size_t length;
3603 
3604 		/*
3605 		 * Make identity have a blob header to make it
3606 		 * easier on userland to guess the identity
3607 		 * length.
3608 		 */
3609 		if (usize < sizeof(fakeheader)) {
3610 			error = ERANGE;
3611 			break;
3612 		}
3613 		memset(fakeheader, 0, sizeof(fakeheader));
3614 
3615 		proc_lock(pt);
3616 		if ((proc_getcsflags(pt) & (CS_VALID | CS_DEBUGGED)) == 0) {
3617 			proc_unlock(pt);
3618 			error = EINVAL;
3619 			break;
3620 		}
3621 		identity = ops == CS_OPS_TEAMID ? csproc_get_teamid(pt) : cs_identity_get(pt);
3622 		proc_unlock(pt);
3623 
3624 		if (identity == NULL) {
3625 			error = ENOENT;
3626 			goto out;
3627 		}
3628 
3629 		length = strlen(identity) + 1;         /* include NUL */
3630 		idlen = htonl((uint32_t)(length + sizeof(fakeheader)));
3631 		memcpy(&fakeheader[4], &idlen, sizeof(idlen));
3632 
3633 		error = copyout(fakeheader, uaddr, sizeof(fakeheader));
3634 		if (error) {
3635 			goto out;
3636 		}
3637 
3638 		if (usize < sizeof(fakeheader) + length) {
3639 			error = ERANGE;
3640 		} else if (usize > sizeof(fakeheader)) {
3641 			error = copyout(identity, uaddr + sizeof(fakeheader), length);
3642 		}
3643 		goto out;
3644 	}
3645 
3646 	case CS_OPS_CLEARINSTALLER:
3647 		proc_lock(pt);
3648 		proc_csflags_clear(pt, CS_INSTALLER | CS_DATAVAULT_CONTROLLER | CS_EXEC_INHERIT_SIP);
3649 		proc_unlock(pt);
3650 		break;
3651 
3652 	case CS_OPS_CLEARPLATFORM:
3653 #if DEVELOPMENT || DEBUG
3654 		if (cs_process_global_enforcement()) {
3655 			error = ENOTSUP;
3656 			break;
3657 		}
3658 
3659 #if CONFIG_CSR
3660 		if (csr_check(CSR_ALLOW_APPLE_INTERNAL) != 0) {
3661 			error = ENOTSUP;
3662 			break;
3663 		}
3664 #endif /* CONFIG_CSR */
3665 		task_t task = proc_task(pt);
3666 
3667 		proc_lock(pt);
3668 		proc_csflags_clear(pt, CS_PLATFORM_BINARY | CS_PLATFORM_PATH);
3669 		task_set_hardened_runtime(task, false);
3670 		csproc_clear_platform_binary(pt);
3671 		proc_unlock(pt);
3672 		break;
3673 #else  /* DEVELOPMENT || DEBUG */
3674 		error = ENOTSUP;
3675 		break;
3676 #endif /* !DEVELOPMENT || DEBUG */
3677 
3678 	default:
3679 		error = EINVAL;
3680 		break;
3681 	}
3682 out:
3683 	proc_rele(pt);
3684 	return error;
3685 }
3686 
3687 void
proc_iterate(unsigned int flags,proc_iterate_fn_t callout,void * arg,proc_iterate_fn_t filterfn,void * filterarg)3688 proc_iterate(
3689 	unsigned int flags,
3690 	proc_iterate_fn_t callout,
3691 	void *arg,
3692 	proc_iterate_fn_t filterfn,
3693 	void *filterarg)
3694 {
3695 	pidlist_t pid_list, *pl = pidlist_init(&pid_list);
3696 	u_int pid_count_available = 0;
3697 
3698 	assert(callout != NULL);
3699 
3700 	/* allocate outside of the proc_list_lock */
3701 	for (;;) {
3702 		proc_list_lock();
3703 		pid_count_available = nprocs + 1; /* kernel_task not counted in nprocs */
3704 		assert(pid_count_available > 0);
3705 		if (pidlist_nalloc(pl) >= pid_count_available) {
3706 			break;
3707 		}
3708 		proc_list_unlock();
3709 
3710 		pidlist_alloc(pl, pid_count_available);
3711 	}
3712 	pidlist_set_active(pl);
3713 
3714 	/* filter pids into the pid_list */
3715 
3716 	u_int pid_count = 0;
3717 	if (flags & PROC_ALLPROCLIST) {
3718 		proc_t p;
3719 		ALLPROC_FOREACH(p) {
3720 			/* ignore processes that are being forked */
3721 			if (p->p_stat == SIDL || proc_is_shadow(p)) {
3722 				continue;
3723 			}
3724 			if ((filterfn != NULL) && (filterfn(p, filterarg) == 0)) {
3725 				continue;
3726 			}
3727 			pidlist_add_pid(pl, proc_pid(p));
3728 			if (++pid_count >= pid_count_available) {
3729 				break;
3730 			}
3731 		}
3732 	}
3733 
3734 	if ((pid_count < pid_count_available) &&
3735 	    (flags & PROC_ZOMBPROCLIST)) {
3736 		proc_t p;
3737 		ZOMBPROC_FOREACH(p) {
3738 			if (proc_is_shadow(p)) {
3739 				continue;
3740 			}
3741 			if ((filterfn != NULL) && (filterfn(p, filterarg) == 0)) {
3742 				continue;
3743 			}
3744 			pidlist_add_pid(pl, proc_pid(p));
3745 			if (++pid_count >= pid_count_available) {
3746 				break;
3747 			}
3748 		}
3749 	}
3750 
3751 	proc_list_unlock();
3752 
3753 	/* call callout on processes in the pid_list */
3754 
3755 	const pidlist_entry_t *pe;
3756 	SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3757 		for (u_int i = 0; i < pe->pe_nused; i++) {
3758 			const pid_t pid = pe->pe_pid[i];
3759 			proc_t p = proc_find(pid);
3760 			if (p) {
3761 				if ((flags & PROC_NOWAITTRANS) == 0) {
3762 					proc_transwait(p, 0);
3763 				}
3764 				const int callout_ret = callout(p, arg);
3765 
3766 				switch (callout_ret) {
3767 				case PROC_RETURNED_DONE:
3768 					proc_rele(p);
3769 					OS_FALLTHROUGH;
3770 				case PROC_CLAIMED_DONE:
3771 					goto out;
3772 
3773 				case PROC_RETURNED:
3774 					proc_rele(p);
3775 					OS_FALLTHROUGH;
3776 				case PROC_CLAIMED:
3777 					break;
3778 				default:
3779 					panic("%s: callout =%d for pid %d",
3780 					    __func__, callout_ret, pid);
3781 					break;
3782 				}
3783 			} else if (flags & PROC_ZOMBPROCLIST) {
3784 				p = proc_find_zombref(pid);
3785 				if (!p) {
3786 					continue;
3787 				}
3788 				const int callout_ret = callout(p, arg);
3789 
3790 				switch (callout_ret) {
3791 				case PROC_RETURNED_DONE:
3792 					proc_drop_zombref(p);
3793 					OS_FALLTHROUGH;
3794 				case PROC_CLAIMED_DONE:
3795 					goto out;
3796 
3797 				case PROC_RETURNED:
3798 					proc_drop_zombref(p);
3799 					OS_FALLTHROUGH;
3800 				case PROC_CLAIMED:
3801 					break;
3802 				default:
3803 					panic("%s: callout =%d for zombie %d",
3804 					    __func__, callout_ret, pid);
3805 					break;
3806 				}
3807 			}
3808 		}
3809 	}
3810 out:
3811 	pidlist_free(pl);
3812 }
3813 
3814 void
proc_rebootscan(proc_iterate_fn_t callout,void * arg,proc_iterate_fn_t filterfn,void * filterarg)3815 proc_rebootscan(
3816 	proc_iterate_fn_t callout,
3817 	void *arg,
3818 	proc_iterate_fn_t filterfn,
3819 	void *filterarg)
3820 {
3821 	proc_t p;
3822 
3823 	assert(callout != NULL);
3824 
3825 	proc_shutdown_exitcount = 0;
3826 
3827 restart_foreach:
3828 
3829 	proc_list_lock();
3830 
3831 	ALLPROC_FOREACH(p) {
3832 		if ((filterfn != NULL) && filterfn(p, filterarg) == 0) {
3833 			continue;
3834 		}
3835 		p = proc_ref(p, true);
3836 		if (!p) {
3837 			proc_list_unlock();
3838 			goto restart_foreach;
3839 		}
3840 
3841 		proc_list_unlock();
3842 
3843 		proc_transwait(p, 0);
3844 		(void)callout(p, arg);
3845 		proc_rele(p);
3846 
3847 		goto restart_foreach;
3848 	}
3849 
3850 	proc_list_unlock();
3851 }
3852 
3853 void
proc_childrenwalk(proc_t parent,proc_iterate_fn_t callout,void * arg)3854 proc_childrenwalk(
3855 	proc_t parent,
3856 	proc_iterate_fn_t callout,
3857 	void *arg)
3858 {
3859 	pidlist_t pid_list, *pl = pidlist_init(&pid_list);
3860 	u_int pid_count_available = 0;
3861 
3862 	assert(parent != NULL);
3863 	assert(callout != NULL);
3864 
3865 	for (;;) {
3866 		proc_list_lock();
3867 		pid_count_available = parent->p_childrencnt;
3868 		if (pid_count_available == 0) {
3869 			proc_list_unlock();
3870 			goto out;
3871 		}
3872 		if (pidlist_nalloc(pl) >= pid_count_available) {
3873 			break;
3874 		}
3875 		proc_list_unlock();
3876 
3877 		pidlist_alloc(pl, pid_count_available);
3878 	}
3879 	pidlist_set_active(pl);
3880 
3881 	u_int pid_count = 0;
3882 	proc_t p;
3883 	PCHILDREN_FOREACH(parent, p) {
3884 		if (p->p_stat == SIDL || proc_is_shadow(p)) {
3885 			continue;
3886 		}
3887 
3888 		pidlist_add_pid(pl, proc_pid(p));
3889 		if (++pid_count >= pid_count_available) {
3890 			break;
3891 		}
3892 	}
3893 
3894 	proc_list_unlock();
3895 
3896 	const pidlist_entry_t *pe;
3897 	SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3898 		for (u_int i = 0; i < pe->pe_nused; i++) {
3899 			const pid_t pid = pe->pe_pid[i];
3900 			p = proc_find(pid);
3901 			if (!p) {
3902 				continue;
3903 			}
3904 			const int callout_ret = callout(p, arg);
3905 
3906 			switch (callout_ret) {
3907 			case PROC_RETURNED_DONE:
3908 				proc_rele(p);
3909 				OS_FALLTHROUGH;
3910 			case PROC_CLAIMED_DONE:
3911 				goto out;
3912 
3913 			case PROC_RETURNED:
3914 				proc_rele(p);
3915 				OS_FALLTHROUGH;
3916 			case PROC_CLAIMED:
3917 				break;
3918 			default:
3919 				panic("%s: callout =%d for pid %d",
3920 				    __func__, callout_ret, pid);
3921 				break;
3922 			}
3923 		}
3924 	}
3925 out:
3926 	pidlist_free(pl);
3927 }
3928 
3929 void
3930 pgrp_iterate(
3931 	struct pgrp *pgrp,
3932 	proc_iterate_fn_t callout,
3933 	void * arg,
3934 	bool (^filterfn)(proc_t))
3935 {
3936 	pidlist_t pid_list, *pl = pidlist_init(&pid_list);
3937 	u_int pid_count_available = 0;
3938 	proc_t p;
3939 
3940 	assert(pgrp != NULL);
3941 	assert(callout != NULL);
3942 
3943 	for (;;) {
3944 		pgrp_lock(pgrp);
3945 		/*
3946 		 * each member has one ref + some transient holders,
3947 		 * this is a good enough approximation
3948 		 */
3949 		pid_count_available = os_ref_get_count_mask(&pgrp->pg_refcount,
3950 		    PGRP_REF_BITS);
3951 		if (pidlist_nalloc(pl) >= pid_count_available) {
3952 			break;
3953 		}
3954 		pgrp_unlock(pgrp);
3955 
3956 		pidlist_alloc(pl, pid_count_available);
3957 	}
3958 	pidlist_set_active(pl);
3959 
3960 	const pid_t pgid = pgrp->pg_id;
3961 	u_int pid_count = 0;
3962 
PGMEMBERS_FOREACH(pgrp,p)3963 	PGMEMBERS_FOREACH(pgrp, p) {
3964 		if ((filterfn != NULL) && (filterfn(p) == 0)) {
3965 			continue;
3966 		}
3967 		pidlist_add_pid(pl, proc_pid(p));
3968 		if (++pid_count >= pid_count_available) {
3969 			break;
3970 		}
3971 	}
3972 
3973 	pgrp_unlock(pgrp);
3974 
3975 	const pidlist_entry_t *pe;
3976 	SLIST_FOREACH(pe, &(pl->pl_head), pe_link) {
3977 		for (u_int i = 0; i < pe->pe_nused; i++) {
3978 			const pid_t pid = pe->pe_pid[i];
3979 			if (0 == pid) {
3980 				continue; /* skip kernproc */
3981 			}
3982 			p = proc_find(pid);
3983 			if (!p) {
3984 				continue;
3985 			}
3986 			if (p->p_pgrpid != pgid) {
3987 				proc_rele(p);
3988 				continue;
3989 			}
3990 			const int callout_ret = callout(p, arg);
3991 
3992 			switch (callout_ret) {
3993 			case PROC_RETURNED:
3994 				proc_rele(p);
3995 				OS_FALLTHROUGH;
3996 			case PROC_CLAIMED:
3997 				break;
3998 			case PROC_RETURNED_DONE:
3999 				proc_rele(p);
4000 				OS_FALLTHROUGH;
4001 			case PROC_CLAIMED_DONE:
4002 				goto out;
4003 
4004 			default:
4005 				panic("%s: callout =%d for pid %d",
4006 				    __func__, callout_ret, pid);
4007 			}
4008 		}
4009 	}
4010 
4011 out:
4012 	pidlist_free(pl);
4013 }
4014 
4015 /* consumes the newpg ref */
4016 static void
pgrp_replace(struct proc * p,struct pgrp * newpg)4017 pgrp_replace(struct proc *p, struct pgrp *newpg)
4018 {
4019 	struct pgrp *oldpg;
4020 
4021 	proc_list_lock();
4022 	oldpg = smr_serialized_load(&p->p_pgrp);
4023 	pgrp_del_member(oldpg, p);
4024 	pgrp_add_member(newpg, PROC_NULL, p);
4025 	proc_list_unlock();
4026 
4027 	pgrp_rele(oldpg);
4028 }
4029 
4030 struct pgrp *
pgrp_alloc(pid_t pgid,pggrp_ref_bits_t bits)4031 pgrp_alloc(pid_t pgid, pggrp_ref_bits_t bits)
4032 {
4033 	struct pgrp *pgrp = zalloc_flags(pgrp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4034 
4035 	os_ref_init_mask(&pgrp->pg_refcount, PGRP_REF_BITS, &p_refgrp, bits);
4036 	os_ref_init_raw(&pgrp->pg_hashref, &p_refgrp);
4037 	LIST_INIT(&pgrp->pg_members);
4038 	lck_mtx_init(&pgrp->pg_mlock, &proc_mlock_grp, &proc_lck_attr);
4039 	pgrp->pg_id = pgid;
4040 
4041 	return pgrp;
4042 }
4043 
4044 void
pgrp_lock(struct pgrp * pgrp)4045 pgrp_lock(struct pgrp * pgrp)
4046 {
4047 	lck_mtx_lock(&pgrp->pg_mlock);
4048 }
4049 
4050 void
pgrp_unlock(struct pgrp * pgrp)4051 pgrp_unlock(struct pgrp * pgrp)
4052 {
4053 	lck_mtx_unlock(&pgrp->pg_mlock);
4054 }
4055 
4056 struct session *
session_find_locked(pid_t sessid)4057 session_find_locked(pid_t sessid)
4058 {
4059 	struct session *sess;
4060 
4061 	LIST_FOREACH(sess, SESSHASH(sessid), s_hash) {
4062 		if (sess->s_sid == sessid) {
4063 			break;
4064 		}
4065 	}
4066 
4067 	return sess;
4068 }
4069 
4070 void
session_replace_leader(struct proc * old_proc,struct proc * new_proc)4071 session_replace_leader(struct proc *old_proc, struct proc *new_proc)
4072 {
4073 	assert(old_proc == current_proc());
4074 
4075 	/* If old_proc is session leader, change the leader to new proc */
4076 	struct pgrp *pgrp = smr_serialized_load(&old_proc->p_pgrp);
4077 	struct session *sessp = pgrp->pg_session;
4078 	struct tty *ttyp = TTY_NULL;
4079 
4080 	if (sessp == SESSION_NULL || !SESS_LEADER(old_proc, sessp)) {
4081 		return;
4082 	}
4083 
4084 	session_lock(sessp);
4085 	if (sessp->s_ttyp && sessp->s_ttyp->t_session == sessp) {
4086 		ttyp = sessp->s_ttyp;
4087 		ttyhold(ttyp);
4088 	}
4089 
4090 	/* Do the dance to take tty lock and session lock */
4091 	if (ttyp) {
4092 		session_unlock(sessp);
4093 		tty_lock(ttyp);
4094 		session_lock(sessp);
4095 	}
4096 
4097 	sessp->s_leader = new_proc;
4098 	session_unlock(sessp);
4099 
4100 	if (ttyp) {
4101 		tty_unlock(ttyp);
4102 		ttyfree(ttyp);
4103 	}
4104 }
4105 
4106 void
session_lock(struct session * sess)4107 session_lock(struct session * sess)
4108 {
4109 	lck_mtx_lock(&sess->s_mlock);
4110 }
4111 
4112 
4113 void
session_unlock(struct session * sess)4114 session_unlock(struct session * sess)
4115 {
4116 	lck_mtx_unlock(&sess->s_mlock);
4117 }
4118 
4119 struct pgrp *
proc_pgrp(proc_t p,struct session ** sessp)4120 proc_pgrp(proc_t p, struct session **sessp)
4121 {
4122 	struct pgrp *pgrp = PGRP_NULL;
4123 	bool success = false;
4124 
4125 	if (__probable(p != PROC_NULL)) {
4126 		smr_proc_task_enter();
4127 		pgrp = smr_entered_load(&p->p_pgrp);
4128 		success = pgrp == PGRP_NULL || pg_ref_try(pgrp);
4129 		smr_proc_task_leave();
4130 
4131 		if (__improbable(!success)) {
4132 			/*
4133 			 * We caught the process in the middle of pgrp_replace(),
4134 			 * go the slow, never failing way.
4135 			 */
4136 			proc_list_lock();
4137 			pgrp = pg_ref(smr_serialized_load(&p->p_pgrp));
4138 			proc_list_unlock();
4139 		}
4140 	}
4141 
4142 	if (sessp) {
4143 		*sessp = pgrp ? pgrp->pg_session : SESSION_NULL;
4144 	}
4145 	return pgrp;
4146 }
4147 
4148 struct pgrp *
tty_pgrp_locked(struct tty * tp)4149 tty_pgrp_locked(struct tty *tp)
4150 {
4151 	struct pgrp *pg = PGRP_NULL;
4152 
4153 	/* either the tty_lock() or the proc_list_lock() must be held */
4154 
4155 	if (tp->t_pgrp) {
4156 		pg = pg_ref(tp->t_pgrp);
4157 	}
4158 
4159 	return pg;
4160 }
4161 
4162 int
proc_transstart(proc_t p,int locked,int non_blocking)4163 proc_transstart(proc_t p, int locked, int non_blocking)
4164 {
4165 	if (locked == 0) {
4166 		proc_lock(p);
4167 	}
4168 	while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) {
4169 		if (((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT) || non_blocking) {
4170 			if (locked == 0) {
4171 				proc_unlock(p);
4172 			}
4173 			return EDEADLK;
4174 		}
4175 		p->p_lflag |= P_LTRANSWAIT;
4176 		msleep(&p->p_lflag, &p->p_mlock, 0, "proc_signstart", NULL);
4177 	}
4178 	p->p_lflag |= P_LINTRANSIT;
4179 	p->p_transholder = current_thread();
4180 	if (locked == 0) {
4181 		proc_unlock(p);
4182 	}
4183 	return 0;
4184 }
4185 
4186 void
proc_transcommit(proc_t p,int locked)4187 proc_transcommit(proc_t p, int locked)
4188 {
4189 	if (locked == 0) {
4190 		proc_lock(p);
4191 	}
4192 
4193 	assert((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT);
4194 	assert(p->p_transholder == current_thread());
4195 	p->p_lflag |= P_LTRANSCOMMIT;
4196 
4197 	if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) {
4198 		p->p_lflag &= ~P_LTRANSWAIT;
4199 		wakeup(&p->p_lflag);
4200 	}
4201 	if (locked == 0) {
4202 		proc_unlock(p);
4203 	}
4204 }
4205 
4206 void
proc_transend(proc_t p,int locked)4207 proc_transend(proc_t p, int locked)
4208 {
4209 	if (locked == 0) {
4210 		proc_lock(p);
4211 	}
4212 
4213 	p->p_lflag &= ~(P_LINTRANSIT | P_LTRANSCOMMIT);
4214 	p->p_transholder = NULL;
4215 
4216 	if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) {
4217 		p->p_lflag &= ~P_LTRANSWAIT;
4218 		wakeup(&p->p_lflag);
4219 	}
4220 	if (locked == 0) {
4221 		proc_unlock(p);
4222 	}
4223 }
4224 
4225 int
proc_transwait(proc_t p,int locked)4226 proc_transwait(proc_t p, int locked)
4227 {
4228 	if (locked == 0) {
4229 		proc_lock(p);
4230 	}
4231 	while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) {
4232 		if ((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT && current_proc() == p) {
4233 			if (locked == 0) {
4234 				proc_unlock(p);
4235 			}
4236 			return EDEADLK;
4237 		}
4238 		p->p_lflag |= P_LTRANSWAIT;
4239 		msleep(&p->p_lflag, &p->p_mlock, 0, "proc_signstart", NULL);
4240 	}
4241 	if (locked == 0) {
4242 		proc_unlock(p);
4243 	}
4244 	return 0;
4245 }
4246 
4247 void
proc_klist_lock(void)4248 proc_klist_lock(void)
4249 {
4250 	lck_mtx_lock(&proc_klist_mlock);
4251 }
4252 
4253 void
proc_klist_unlock(void)4254 proc_klist_unlock(void)
4255 {
4256 	lck_mtx_unlock(&proc_klist_mlock);
4257 }
4258 
4259 void
proc_knote(struct proc * p,long hint)4260 proc_knote(struct proc * p, long hint)
4261 {
4262 	proc_klist_lock();
4263 	KNOTE(&p->p_klist, hint);
4264 	proc_klist_unlock();
4265 }
4266 
4267 void
proc_transfer_knotes(struct proc * old_proc,struct proc * new_proc)4268 proc_transfer_knotes(struct proc *old_proc, struct proc *new_proc)
4269 {
4270 	struct knote *kn = NULL;
4271 
4272 	proc_klist_lock();
4273 	while ((kn = SLIST_FIRST(&old_proc->p_klist))) {
4274 		KNOTE_DETACH(&old_proc->p_klist, kn);
4275 		if (kn->kn_filtid == (uint8_t)~EVFILT_PROC) {
4276 			kn->kn_proc = new_proc;
4277 			KNOTE_ATTACH(&new_proc->p_klist, kn);
4278 		} else {
4279 			assert(kn->kn_filtid == (uint8_t)~EVFILT_SIGNAL);
4280 			kn->kn_proc = NULL;
4281 		}
4282 	}
4283 	proc_klist_unlock();
4284 }
4285 
4286 void
proc_knote_drain(struct proc * p)4287 proc_knote_drain(struct proc *p)
4288 {
4289 	struct knote *kn = NULL;
4290 
4291 	/*
4292 	 * Clear the proc's klist to avoid references after the proc is reaped.
4293 	 */
4294 	proc_klist_lock();
4295 	while ((kn = SLIST_FIRST(&p->p_klist))) {
4296 		kn->kn_proc = PROC_NULL;
4297 		KNOTE_DETACH(&p->p_klist, kn);
4298 	}
4299 	proc_klist_unlock();
4300 }
4301 
4302 void
proc_setregister(proc_t p)4303 proc_setregister(proc_t p)
4304 {
4305 	proc_lock(p);
4306 	p->p_lflag |= P_LREGISTER;
4307 	proc_unlock(p);
4308 }
4309 
4310 void
proc_resetregister(proc_t p)4311 proc_resetregister(proc_t p)
4312 {
4313 	proc_lock(p);
4314 	p->p_lflag &= ~P_LREGISTER;
4315 	proc_unlock(p);
4316 }
4317 
4318 bool
proc_get_pthread_jit_allowlist(proc_t p,bool * late_out)4319 proc_get_pthread_jit_allowlist(proc_t p, bool *late_out)
4320 {
4321 	bool ret = false;
4322 
4323 	proc_lock(p);
4324 	ret = (p->p_lflag & P_LPTHREADJITALLOWLIST);
4325 	*late_out = (p->p_lflag & P_LPTHREADJITFREEZELATE);
4326 	proc_unlock(p);
4327 
4328 	return ret;
4329 }
4330 
4331 void
proc_set_pthread_jit_allowlist(proc_t p,bool late)4332 proc_set_pthread_jit_allowlist(proc_t p, bool late)
4333 {
4334 	proc_lock(p);
4335 	p->p_lflag |= P_LPTHREADJITALLOWLIST;
4336 	if (late) {
4337 		p->p_lflag |= P_LPTHREADJITFREEZELATE;
4338 	}
4339 	proc_unlock(p);
4340 }
4341 
4342 pid_t
proc_pgrpid(proc_t p)4343 proc_pgrpid(proc_t p)
4344 {
4345 	return p->p_pgrpid;
4346 }
4347 
4348 pid_t
proc_sessionid(proc_t p)4349 proc_sessionid(proc_t p)
4350 {
4351 	return p->p_sessionid;
4352 }
4353 
4354 pid_t
proc_selfpgrpid()4355 proc_selfpgrpid()
4356 {
4357 	return current_proc()->p_pgrpid;
4358 }
4359 
4360 
4361 /* return control and action states */
4362 int
proc_getpcontrol(int pid,int * pcontrolp)4363 proc_getpcontrol(int pid, int * pcontrolp)
4364 {
4365 	proc_t p;
4366 
4367 	p = proc_find(pid);
4368 	if (p == PROC_NULL) {
4369 		return ESRCH;
4370 	}
4371 	if (pcontrolp != NULL) {
4372 		*pcontrolp = p->p_pcaction;
4373 	}
4374 
4375 	proc_rele(p);
4376 	return 0;
4377 }
4378 
4379 int
proc_dopcontrol(proc_t p)4380 proc_dopcontrol(proc_t p)
4381 {
4382 	int pcontrol;
4383 	os_reason_t kill_reason;
4384 
4385 	proc_lock(p);
4386 
4387 	pcontrol = PROC_CONTROL_STATE(p);
4388 
4389 	if (PROC_ACTION_STATE(p) == 0) {
4390 		switch (pcontrol) {
4391 		case P_PCTHROTTLE:
4392 			PROC_SETACTION_STATE(p);
4393 			proc_unlock(p);
4394 			printf("low swap: throttling pid %d (%s)\n", proc_getpid(p), p->p_comm);
4395 			break;
4396 
4397 		case P_PCSUSP:
4398 			PROC_SETACTION_STATE(p);
4399 			proc_unlock(p);
4400 			printf("low swap: suspending pid %d (%s)\n", proc_getpid(p), p->p_comm);
4401 			task_suspend(proc_task(p));
4402 			break;
4403 
4404 		case P_PCKILL:
4405 			PROC_SETACTION_STATE(p);
4406 			proc_unlock(p);
4407 			printf("low swap: killing pid %d (%s)\n", proc_getpid(p), p->p_comm);
4408 			kill_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_LOWSWAP);
4409 			psignal_with_reason(p, SIGKILL, kill_reason);
4410 			break;
4411 
4412 		default:
4413 			proc_unlock(p);
4414 		}
4415 	} else {
4416 		proc_unlock(p);
4417 	}
4418 
4419 	return PROC_RETURNED;
4420 }
4421 
4422 
4423 /*
4424  * Resume a throttled or suspended process.  This is an internal interface that's only
4425  * used by the user level code that presents the GUI when we run out of swap space and
4426  * hence is restricted to processes with superuser privileges.
4427  */
4428 
4429 int
proc_resetpcontrol(int pid)4430 proc_resetpcontrol(int pid)
4431 {
4432 	proc_t p;
4433 	int pcontrol;
4434 	int error;
4435 	proc_t self = current_proc();
4436 
4437 	/* if the process has been validated to handle resource control or root is valid one */
4438 	if (((self->p_lflag & P_LVMRSRCOWNER) == 0) && (error = suser(kauth_cred_get(), 0))) {
4439 		return error;
4440 	}
4441 
4442 	p = proc_find(pid);
4443 	if (p == PROC_NULL) {
4444 		return ESRCH;
4445 	}
4446 
4447 	proc_lock(p);
4448 
4449 	pcontrol = PROC_CONTROL_STATE(p);
4450 
4451 	if (PROC_ACTION_STATE(p) != 0) {
4452 		switch (pcontrol) {
4453 		case P_PCTHROTTLE:
4454 			PROC_RESETACTION_STATE(p);
4455 			proc_unlock(p);
4456 			printf("low swap: unthrottling pid %d (%s)\n", proc_getpid(p), p->p_comm);
4457 			break;
4458 
4459 		case P_PCSUSP:
4460 			PROC_RESETACTION_STATE(p);
4461 			proc_unlock(p);
4462 			printf("low swap: resuming pid %d (%s)\n", proc_getpid(p), p->p_comm);
4463 			task_resume(proc_task(p));
4464 			break;
4465 
4466 		case P_PCKILL:
4467 			/* Huh? */
4468 			PROC_SETACTION_STATE(p);
4469 			proc_unlock(p);
4470 			printf("low swap: attempt to unkill pid %d (%s) ignored\n", proc_getpid(p), p->p_comm);
4471 			break;
4472 
4473 		default:
4474 			proc_unlock(p);
4475 		}
4476 	} else {
4477 		proc_unlock(p);
4478 	}
4479 
4480 	proc_rele(p);
4481 	return 0;
4482 }
4483 
4484 
4485 
4486 struct no_paging_space {
4487 	uint64_t        pcs_max_size;
4488 	uint64_t        pcs_uniqueid;
4489 	int             pcs_pid;
4490 	int             pcs_proc_count;
4491 	uint64_t        pcs_total_size;
4492 
4493 	uint64_t        npcs_max_size;
4494 	uint64_t        npcs_uniqueid;
4495 	int             npcs_pid;
4496 	int             npcs_proc_count;
4497 	uint64_t        npcs_total_size;
4498 
4499 	int             apcs_proc_count;
4500 	uint64_t        apcs_total_size;
4501 };
4502 
4503 
4504 static int
proc_pcontrol_filter(proc_t p,void * arg)4505 proc_pcontrol_filter(proc_t p, void *arg)
4506 {
4507 	struct no_paging_space *nps;
4508 	uint64_t        compressed;
4509 
4510 	nps = (struct no_paging_space *)arg;
4511 
4512 	compressed = get_task_compressed(proc_task(p));
4513 
4514 	if (PROC_CONTROL_STATE(p)) {
4515 		if (PROC_ACTION_STATE(p) == 0) {
4516 			if (compressed > nps->pcs_max_size) {
4517 				nps->pcs_pid = proc_getpid(p);
4518 				nps->pcs_uniqueid = proc_uniqueid(p);
4519 				nps->pcs_max_size = compressed;
4520 			}
4521 			nps->pcs_total_size += compressed;
4522 			nps->pcs_proc_count++;
4523 		} else {
4524 			nps->apcs_total_size += compressed;
4525 			nps->apcs_proc_count++;
4526 		}
4527 	} else {
4528 		if (compressed > nps->npcs_max_size) {
4529 			nps->npcs_pid = proc_getpid(p);
4530 			nps->npcs_uniqueid = proc_uniqueid(p);
4531 			nps->npcs_max_size = compressed;
4532 		}
4533 		nps->npcs_total_size += compressed;
4534 		nps->npcs_proc_count++;
4535 	}
4536 	return 0;
4537 }
4538 
4539 
4540 static int
proc_pcontrol_null(__unused proc_t p,__unused void * arg)4541 proc_pcontrol_null(__unused proc_t p, __unused void *arg)
4542 {
4543 	return PROC_RETURNED;
4544 }
4545 
4546 
4547 /*
4548  * Deal with the low on compressor pool space condition... this function
4549  * gets called when we are approaching the limits of the compressor pool or
4550  * we are unable to create a new swap file.
4551  * Since this eventually creates a memory deadlock situtation, we need to take action to free up
4552  * memory resources (both compressed and uncompressed) in order to prevent the system from hanging completely.
4553  * There are 2 categories of processes to deal with.  Those that have an action
4554  * associated with them by the task itself and those that do not.  Actionable
4555  * tasks can have one of three categories specified:  ones that
4556  * can be killed immediately, ones that should be suspended, and ones that should
4557  * be throttled.  Processes that do not have an action associated with them are normally
4558  * ignored unless they are utilizing such a large percentage of the compressor pool (currently 50%)
4559  * that only by killing them can we hope to put the system back into a usable state.
4560  */
4561 
4562 #define NO_PAGING_SPACE_DEBUG   0
4563 
4564 uint64_t last_no_space_action_ts = 0;
4565 TUNABLE(uint64_t, no_paging_space_action_throttle_delay_ns, "no_paging_space_action_throttle_delay_ns", 5 * NSEC_PER_SEC);
4566 
4567 #define MB_SIZE (1024 * 1024ULL)
4568 boolean_t       memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
4569 
4570 extern int32_t  max_kill_priority;
4571 
4572 
4573 int
no_paging_space_action()4574 no_paging_space_action()
4575 {
4576 	proc_t          p;
4577 	struct no_paging_space nps;
4578 	uint64_t now;
4579 	os_reason_t kill_reason;
4580 
4581 	/*
4582 	 * Throttle how often we come through here.  Once every 5 seconds should be plenty.
4583 	 */
4584 	now = mach_absolute_time();
4585 	uint64_t delta_since_last_no_space_ns;
4586 	absolutetime_to_nanoseconds(now - last_no_space_action_ts, &delta_since_last_no_space_ns);
4587 	if (delta_since_last_no_space_ns <= no_paging_space_action_throttle_delay_ns) {
4588 		return 0;
4589 	}
4590 
4591 	printf("low swap: triggering no paging space action\n");
4592 
4593 	/*
4594 	 * Examine all processes and find the biggest (biggest is based on the number of pages this
4595 	 * task has in the compressor pool) that has been marked to have some action
4596 	 * taken when swap space runs out... we also find the biggest that hasn't been marked for
4597 	 * action.
4598 	 *
4599 	 * If the biggest non-actionable task is over the "dangerously big" threashold (currently 50% of
4600 	 * the total number of pages held by the compressor, we go ahead and kill it since no other task
4601 	 * can have any real effect on the situation.  Otherwise, we go after the actionable process.
4602 	 */
4603 	bzero(&nps, sizeof(nps));
4604 
4605 	proc_iterate(PROC_ALLPROCLIST, proc_pcontrol_null, (void *)NULL, proc_pcontrol_filter, (void *)&nps);
4606 
4607 #if NO_PAGING_SPACE_DEBUG
4608 	printf("low swap: npcs_proc_count = %d, npcs_total_size = %qd, npcs_max_size = %qd\n",
4609 	    nps.npcs_proc_count, nps.npcs_total_size, nps.npcs_max_size);
4610 	printf("low swap: pcs_proc_count = %d, pcs_total_size = %qd, pcs_max_size = %qd\n",
4611 	    nps.pcs_proc_count, nps.pcs_total_size, nps.pcs_max_size);
4612 	printf("low swap: apcs_proc_count = %d, apcs_total_size = %qd\n",
4613 	    nps.apcs_proc_count, nps.apcs_total_size);
4614 #endif
4615 	if (nps.npcs_max_size > (vm_compressor_pages_compressed() * 50) / 100) {
4616 		/*
4617 		 * for now we'll knock out any task that has more then 50% of the pages
4618 		 * held by the compressor
4619 		 */
4620 		if ((p = proc_find(nps.npcs_pid)) != PROC_NULL) {
4621 			if (nps.npcs_uniqueid == proc_uniqueid(p)) {
4622 				/*
4623 				 * verify this is still the same process
4624 				 * in case the proc exited and the pid got reused while
4625 				 * we were finishing the proc_iterate and getting to this point
4626 				 */
4627 				last_no_space_action_ts = now;
4628 
4629 				printf("low swap: killing largest compressed process with pid %d (%s) and size %llu MB\n", proc_getpid(p), p->p_comm, (nps.npcs_max_size / MB_SIZE));
4630 				kill_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_LOWSWAP);
4631 				psignal_with_reason(p, SIGKILL, kill_reason);
4632 
4633 				proc_rele(p);
4634 
4635 				return 0;
4636 			}
4637 
4638 			proc_rele(p);
4639 		}
4640 	}
4641 
4642 	/*
4643 	 * We have some processes within our jetsam bands of consideration and hence can be killed.
4644 	 * So we will invoke the memorystatus thread to go ahead and kill something.
4645 	 */
4646 	if (memorystatus_get_proccnt_upto_priority(max_kill_priority) > 0) {
4647 		last_no_space_action_ts = now;
4648 		/*
4649 		 * TODO(jason): This is only mac OS right now, but we'll need
4650 		 * something like this on iPad...
4651 		 */
4652 		memorystatus_kill_on_VM_compressor_space_shortage(TRUE);
4653 		return 1;
4654 	}
4655 
4656 	/*
4657 	 * No eligible processes to kill. So let's suspend/kill the largest
4658 	 * process depending on its policy control specifications.
4659 	 */
4660 
4661 	if (nps.pcs_max_size > 0) {
4662 		if ((p = proc_find(nps.pcs_pid)) != PROC_NULL) {
4663 			if (nps.pcs_uniqueid == proc_uniqueid(p)) {
4664 				/*
4665 				 * verify this is still the same process
4666 				 * in case the proc exited and the pid got reused while
4667 				 * we were finishing the proc_iterate and getting to this point
4668 				 */
4669 				last_no_space_action_ts = now;
4670 
4671 				proc_dopcontrol(p);
4672 
4673 				proc_rele(p);
4674 
4675 				return 1;
4676 			}
4677 
4678 			proc_rele(p);
4679 		}
4680 	}
4681 	last_no_space_action_ts = now;
4682 
4683 	printf("low swap: unable to find any eligible processes to take action on\n");
4684 
4685 	return 0;
4686 }
4687 
4688 int
proc_trace_log(__unused proc_t p,struct proc_trace_log_args * uap,__unused int * retval)4689 proc_trace_log(__unused proc_t p, struct proc_trace_log_args *uap, __unused int *retval)
4690 {
4691 	int ret = 0;
4692 	proc_t target_proc = PROC_NULL;
4693 	pid_t target_pid = uap->pid;
4694 	uint64_t target_uniqueid = uap->uniqueid;
4695 	task_t target_task = NULL;
4696 
4697 	if (priv_check_cred(kauth_cred_get(), PRIV_PROC_TRACE_INSPECT, 0)) {
4698 		ret = EPERM;
4699 		goto out;
4700 	}
4701 	target_proc = proc_find(target_pid);
4702 	if (target_proc != PROC_NULL) {
4703 		if (target_uniqueid != proc_uniqueid(target_proc)) {
4704 			ret = ENOENT;
4705 			goto out;
4706 		}
4707 
4708 		target_task = proc_task(target_proc);
4709 		if (task_send_trace_memory(target_task, target_pid, target_uniqueid)) {
4710 			ret = EINVAL;
4711 			goto out;
4712 		}
4713 	} else {
4714 		ret = ENOENT;
4715 	}
4716 
4717 out:
4718 	if (target_proc != PROC_NULL) {
4719 		proc_rele(target_proc);
4720 	}
4721 	return ret;
4722 }
4723 
4724 #if VM_SCAN_FOR_SHADOW_CHAIN
4725 int proc_shadow_max(void);
4726 int
proc_shadow_max(void)4727 proc_shadow_max(void)
4728 {
4729 	int             retval, max;
4730 	proc_t          p;
4731 	task_t          task;
4732 	vm_map_t        map;
4733 
4734 	max = 0;
4735 	proc_list_lock();
4736 	for (p = allproc.lh_first; (p != 0); p = p->p_list.le_next) {
4737 		if (p->p_stat == SIDL) {
4738 			continue;
4739 		}
4740 		task = proc_task(p);
4741 		if (task == NULL) {
4742 			continue;
4743 		}
4744 		map = get_task_map(task);
4745 		if (map == NULL) {
4746 			continue;
4747 		}
4748 		retval = vm_map_shadow_max(map);
4749 		if (retval > max) {
4750 			max = retval;
4751 		}
4752 	}
4753 	proc_list_unlock();
4754 	return max;
4755 }
4756 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
4757 
4758 void proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid);
4759 void
proc_set_responsible_pid(proc_t target_proc,pid_t responsible_pid)4760 proc_set_responsible_pid(proc_t target_proc, pid_t responsible_pid)
4761 {
4762 	if (target_proc != NULL) {
4763 		target_proc->p_responsible_pid = responsible_pid;
4764 
4765 		// Also save the responsible UUID
4766 		if (responsible_pid >= 0) {
4767 			proc_t responsible_proc = proc_find(responsible_pid);
4768 			if (responsible_proc != PROC_NULL) {
4769 				proc_getexecutableuuid(responsible_proc, target_proc->p_responsible_uuid, sizeof(target_proc->p_responsible_uuid));
4770 				proc_rele(responsible_proc);
4771 			}
4772 		}
4773 	}
4774 	return;
4775 }
4776 
4777 int
proc_chrooted(proc_t p)4778 proc_chrooted(proc_t p)
4779 {
4780 	int retval = 0;
4781 
4782 	if (p) {
4783 		proc_fdlock(p);
4784 		retval = (p->p_fd.fd_rdir != NULL) ? 1 : 0;
4785 		proc_fdunlock(p);
4786 	}
4787 
4788 	return retval;
4789 }
4790 
4791 boolean_t
proc_send_synchronous_EXC_RESOURCE(proc_t p)4792 proc_send_synchronous_EXC_RESOURCE(proc_t p)
4793 {
4794 	if (p == PROC_NULL) {
4795 		return FALSE;
4796 	}
4797 
4798 	/* Send sync EXC_RESOURCE if the process is traced */
4799 	if (ISSET(p->p_lflag, P_LTRACED)) {
4800 		return TRUE;
4801 	}
4802 	return FALSE;
4803 }
4804 
4805 #if CONFIG_MACF
4806 size_t
proc_get_syscall_filter_mask_size(int which)4807 proc_get_syscall_filter_mask_size(int which)
4808 {
4809 	switch (which) {
4810 	case SYSCALL_MASK_UNIX:
4811 		return nsysent;
4812 	case SYSCALL_MASK_MACH:
4813 		return mach_trap_count;
4814 	case SYSCALL_MASK_KOBJ:
4815 		return mach_kobj_count;
4816 	default:
4817 		return 0;
4818 	}
4819 }
4820 
4821 unsigned char *
proc_get_syscall_filter_mask(proc_t p,int which)4822 proc_get_syscall_filter_mask(proc_t p, int which)
4823 {
4824 	switch (which) {
4825 	case SYSCALL_MASK_UNIX:
4826 		return proc_syscall_filter_mask(p);
4827 	case SYSCALL_MASK_MACH:
4828 		return mac_task_get_mach_filter_mask(proc_task(p));
4829 	case SYSCALL_MASK_KOBJ:
4830 		return mac_task_get_kobj_filter_mask(proc_task(p));
4831 	default:
4832 		return NULL;
4833 	}
4834 }
4835 
4836 int
proc_set_syscall_filter_mask(proc_t p,int which,unsigned char * maskptr,size_t masklen)4837 proc_set_syscall_filter_mask(proc_t p, int which, unsigned char *maskptr, size_t masklen)
4838 {
4839 #if DEVELOPMENT || DEBUG
4840 	if (syscallfilter_disable) {
4841 		printf("proc_set_syscall_filter_mask: attempt to set policy for pid %d, but disabled by boot-arg\n", proc_pid(p));
4842 		return 0;
4843 	}
4844 #endif // DEVELOPMENT || DEBUG
4845 
4846 	switch (which) {
4847 	case SYSCALL_MASK_UNIX:
4848 		if (maskptr != NULL && masklen != nsysent) {
4849 			return EINVAL;
4850 		}
4851 		proc_syscall_filter_mask_set(p, maskptr);
4852 		break;
4853 	case SYSCALL_MASK_MACH:
4854 		if (maskptr != NULL && masklen != (size_t)mach_trap_count) {
4855 			return EINVAL;
4856 		}
4857 		mac_task_set_mach_filter_mask(proc_task(p), maskptr);
4858 		break;
4859 	case SYSCALL_MASK_KOBJ:
4860 		if (maskptr != NULL && masklen != (size_t)mach_kobj_count) {
4861 			return EINVAL;
4862 		}
4863 		mac_task_set_kobj_filter_mask(proc_task(p), maskptr);
4864 		break;
4865 	default:
4866 		return EINVAL;
4867 	}
4868 
4869 	return 0;
4870 }
4871 
4872 int
proc_set_syscall_filter_callbacks(syscall_filter_cbs_t cbs)4873 proc_set_syscall_filter_callbacks(syscall_filter_cbs_t cbs)
4874 {
4875 	if (cbs->version != SYSCALL_FILTER_CALLBACK_VERSION) {
4876 		return EINVAL;
4877 	}
4878 
4879 	/* XXX register unix filter callback instead of using MACF hook. */
4880 
4881 	if (cbs->mach_filter_cbfunc || cbs->kobj_filter_cbfunc) {
4882 		if (mac_task_register_filter_callbacks(cbs->mach_filter_cbfunc,
4883 		    cbs->kobj_filter_cbfunc) != 0) {
4884 			return EPERM;
4885 		}
4886 	}
4887 
4888 	return 0;
4889 }
4890 
4891 int
proc_set_syscall_filter_index(int which,int num,int index)4892 proc_set_syscall_filter_index(int which, int num, int index)
4893 {
4894 	switch (which) {
4895 	case SYSCALL_MASK_KOBJ:
4896 		if (ipc_kobject_set_kobjidx(num, index) != 0) {
4897 			return ENOENT;
4898 		}
4899 		break;
4900 	default:
4901 		return EINVAL;
4902 	}
4903 
4904 	return 0;
4905 }
4906 #endif /* CONFIG_MACF */
4907 
4908 int
proc_set_filter_message_flag(proc_t p,boolean_t flag)4909 proc_set_filter_message_flag(proc_t p, boolean_t flag)
4910 {
4911 	if (p == PROC_NULL) {
4912 		return EINVAL;
4913 	}
4914 
4915 	task_set_filter_msg_flag(proc_task(p), flag);
4916 
4917 	return 0;
4918 }
4919 
4920 int
proc_get_filter_message_flag(proc_t p,boolean_t * flag)4921 proc_get_filter_message_flag(proc_t p, boolean_t *flag)
4922 {
4923 	if (p == PROC_NULL || flag == NULL) {
4924 		return EINVAL;
4925 	}
4926 
4927 	*flag = task_get_filter_msg_flag(proc_task(p));
4928 
4929 	return 0;
4930 }
4931 
4932 bool
proc_is_traced(proc_t p)4933 proc_is_traced(proc_t p)
4934 {
4935 	bool ret = FALSE;
4936 	assert(p != PROC_NULL);
4937 	proc_lock(p);
4938 	if (p->p_lflag & P_LTRACED) {
4939 		ret = TRUE;
4940 	}
4941 	proc_unlock(p);
4942 	return ret;
4943 }
4944 
4945 #if CONFIG_PROC_RESOURCE_LIMITS
4946 int
proc_set_filedesc_limits(proc_t p,int soft_limit,int hard_limit)4947 proc_set_filedesc_limits(proc_t p, int soft_limit, int hard_limit)
4948 {
4949 	struct filedesc *fdp = &p->p_fd;
4950 	int retval = 0;
4951 
4952 	proc_fdlock(p);
4953 
4954 	if (hard_limit > 0) {
4955 		if (soft_limit >= hard_limit) {
4956 			soft_limit = 0;
4957 		}
4958 	}
4959 	fdp->fd_nfiles_soft_limit = soft_limit;
4960 	fdp->fd_nfiles_hard_limit = hard_limit;
4961 	/* Make sure that current fd_nfiles hasn't already exceeded these limits */
4962 	fd_check_limit_exceeded(fdp);
4963 
4964 	proc_fdunlock(p);
4965 
4966 	return retval;
4967 }
4968 
4969 int
proc_set_kqworkloop_limits(proc_t p,int soft_limit,int hard_limit)4970 proc_set_kqworkloop_limits(proc_t p, int soft_limit, int hard_limit)
4971 {
4972 	struct filedesc *fdp = &p->p_fd;
4973 	lck_mtx_lock_spin_always(&fdp->fd_kqhashlock);
4974 
4975 	fdp->kqwl_dyn_soft_limit = soft_limit;
4976 	fdp->kqwl_dyn_hard_limit = hard_limit;
4977 	/* Make sure existing limits aren't exceeded already */
4978 	kqworkloop_check_limit_exceeded(fdp);
4979 
4980 	lck_mtx_unlock(&fdp->fd_kqhashlock);
4981 	return 0;
4982 }
4983 
4984 static int
proc_evaluate_fd_limits_ast(proc_t p,struct filedesc * fdp,int * soft_limit,int * hard_limit)4985 proc_evaluate_fd_limits_ast(proc_t p, struct filedesc *fdp, int *soft_limit, int *hard_limit)
4986 {
4987 	int fd_current_size, fd_soft_limit, fd_hard_limit;
4988 	proc_fdlock(p);
4989 
4990 	fd_current_size = fdp->fd_nfiles_open;
4991 	fd_hard_limit = fdp->fd_nfiles_hard_limit;
4992 	fd_soft_limit = fdp->fd_nfiles_soft_limit;
4993 
4994 	/*
4995 	 * If a thread is going to take action on a specific limit exceeding, it also
4996 	 * clears it out to a SENTINEL so that future threads don't reevaluate the
4997 	 * limit as having exceeded again
4998 	 */
4999 	if (fd_hard_limit > 0 && fd_current_size >= fd_hard_limit) {
5000 		/* Clear our soft limit when we are sending hard limit notification */
5001 		fd_soft_limit = 0;
5002 
5003 		fdp->fd_nfiles_hard_limit = FD_LIMIT_SENTINEL;
5004 	} else if (fd_soft_limit > 0 && fd_current_size >= fd_soft_limit) {
5005 		/* Clear out hard limit when we are sending soft limit notification */
5006 		fd_hard_limit = 0;
5007 
5008 		fdp->fd_nfiles_soft_limit = FD_LIMIT_SENTINEL;
5009 	} else {
5010 		/* Neither limits were exceeded */
5011 		fd_soft_limit = fd_hard_limit = 0;
5012 	}
5013 
5014 	proc_fdunlock(p);
5015 
5016 	*soft_limit = fd_soft_limit;
5017 	*hard_limit = fd_hard_limit;
5018 	return fd_current_size;
5019 }
5020 
5021 static int
proc_evaluate_kqwl_limits_ast(struct filedesc * fdp,int * soft_limit,int * hard_limit)5022 proc_evaluate_kqwl_limits_ast(struct filedesc *fdp, int *soft_limit, int *hard_limit)
5023 {
5024 	lck_mtx_lock_spin_always(&fdp->fd_kqhashlock);
5025 
5026 	int kqwl_current_size = fdp->num_kqwls;
5027 	int kqwl_soft_limit = fdp->kqwl_dyn_soft_limit;
5028 	int kqwl_hard_limit = fdp->kqwl_dyn_hard_limit;
5029 
5030 	/*
5031 	 * If a thread is going to take action on a specific limit exceeding, it also
5032 	 * clears it out to a SENTINEL so that future threads don't reevaluate the
5033 	 * limit as having exceeded again
5034 	 */
5035 	if (kqwl_hard_limit > 0 && kqwl_current_size >= kqwl_hard_limit) {
5036 		/* Clear our soft limit when we are sending hard limit notification */
5037 		kqwl_soft_limit = 0;
5038 
5039 		fdp->kqwl_dyn_hard_limit = KQWL_LIMIT_SENTINEL;
5040 	} else if (kqwl_soft_limit > 0 && kqwl_current_size >= kqwl_soft_limit) {
5041 		/* Clear out hard limit when we are sending soft limit notification */
5042 		kqwl_hard_limit = 0;
5043 
5044 		fdp->kqwl_dyn_soft_limit = KQWL_LIMIT_SENTINEL;
5045 	} else {
5046 		/* Neither limits were exceeded */
5047 		kqwl_soft_limit = kqwl_hard_limit = 0;
5048 	}
5049 
5050 	lck_mtx_unlock(&fdp->fd_kqhashlock);
5051 
5052 	*soft_limit = kqwl_soft_limit;
5053 	*hard_limit = kqwl_hard_limit;
5054 	return kqwl_current_size;
5055 }
5056 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
5057 
5058 void
proc_filedesc_ast(__unused task_t task)5059 proc_filedesc_ast(__unused task_t task)
5060 {
5061 #if CONFIG_PROC_RESOURCE_LIMITS
5062 	assert(task == current_task());
5063 	proc_t p = get_bsdtask_info(task);
5064 	struct filedesc *fdp = &p->p_fd;
5065 
5066 	/*
5067 	 * At this point, we can possibly race with other threads which set the AST
5068 	 * due to triggering the soft/hard limits for fd or kqworkloops.
5069 	 *
5070 	 * The first thread to reach this logic will always evaluate hard limit for fd
5071 	 * or kqworkloops even if it was the one which triggered the soft limit for
5072 	 * them.
5073 	 *
5074 	 * If a thread takes action on a specific limit, it will clear the limit value
5075 	 * in the fdp with a SENTINEL to indicate to other racing threads that they no
5076 	 * longer need to evaluate it.
5077 	 */
5078 	int soft_limit, hard_limit;
5079 	int fd_current_size = proc_evaluate_fd_limits_ast(p, fdp, &soft_limit, &hard_limit);
5080 
5081 	if (hard_limit || soft_limit) {
5082 		return task_filedesc_ast(task, fd_current_size, soft_limit, hard_limit);
5083 	}
5084 
5085 	int kqwl_current_size = proc_evaluate_kqwl_limits_ast(fdp, &soft_limit, &hard_limit);
5086 	if (hard_limit || soft_limit) {
5087 		return task_kqworkloop_ast(task, kqwl_current_size, soft_limit, hard_limit);
5088 	}
5089 #endif /* CONFIG_PROC_RESOURCE_LIMITS */
5090 }
5091 
5092 proc_ro_t
proc_ro_alloc(proc_t p,proc_ro_data_t p_data,task_t t,task_ro_data_t t_data)5093 proc_ro_alloc(proc_t p, proc_ro_data_t p_data, task_t t, task_ro_data_t t_data)
5094 {
5095 	proc_ro_t pr;
5096 	struct proc_ro pr_local = {};
5097 
5098 	pr = (proc_ro_t)zalloc_ro(ZONE_ID_PROC_RO, Z_WAITOK | Z_NOFAIL | Z_ZERO);
5099 
5100 	if (p != PROC_NULL) {
5101 		pr_local.pr_proc = p;
5102 		pr_local.proc_data = *p_data;
5103 	}
5104 
5105 	if (t != TASK_NULL) {
5106 		pr_local.pr_task = t;
5107 		pr_local.task_data = *t_data;
5108 	}
5109 
5110 	if ((p != PROC_NULL) || (t != TASK_NULL)) {
5111 		zalloc_ro_update_elem(ZONE_ID_PROC_RO, pr, &pr_local);
5112 	}
5113 
5114 	return pr;
5115 }
5116 
5117 proc_ro_t
proc_ro_ref_task(proc_ro_t pr,task_t t,task_ro_data_t t_data)5118 proc_ro_ref_task(proc_ro_t pr, task_t t, task_ro_data_t t_data)
5119 {
5120 	struct proc_ro pr_local;
5121 
5122 	if (pr->pr_task != TASK_NULL) {
5123 		panic("%s: proc_ro already has an owning task", __func__);
5124 	}
5125 
5126 	pr_local = *pr;
5127 	pr_local.pr_task = t;
5128 	pr_local.task_data = *t_data;
5129 
5130 	zalloc_ro_update_elem(ZONE_ID_PROC_RO, pr, &pr_local);
5131 
5132 	return pr;
5133 }
5134 
5135 void
proc_ro_erase_task(proc_ro_t pr)5136 proc_ro_erase_task(proc_ro_t pr)
5137 {
5138 	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO,
5139 	    pr, pr_task, ZRO_ATOMIC_XCHG_LONG, TASK_NULL);
5140 }
5141 
5142 __abortlike
5143 static void
panic_proc_ro_proc_backref_mismatch(proc_t p,proc_ro_t ro)5144 panic_proc_ro_proc_backref_mismatch(proc_t p, proc_ro_t ro)
5145 {
5146 	panic("proc_ro->proc backref mismatch: p=%p, ro=%p, "
5147 	    "ro->pr_proc(ro)=%p", p, ro, ro->pr_proc);
5148 }
5149 
5150 proc_ro_t
proc_get_ro(proc_t p)5151 proc_get_ro(proc_t p)
5152 {
5153 	proc_ro_t ro = p->p_proc_ro;
5154 
5155 	zone_require_ro(ZONE_ID_PROC_RO, sizeof(struct proc_ro), ro);
5156 	if (__improbable(ro->pr_proc != p)) {
5157 		panic_proc_ro_proc_backref_mismatch(p, ro);
5158 	}
5159 
5160 	return ro;
5161 }
5162 
5163 task_t
proc_ro_task(proc_ro_t pr)5164 proc_ro_task(proc_ro_t pr)
5165 {
5166 	return pr->pr_task;
5167 }
5168 
5169 /*
5170  * pid_for_task
5171  *
5172  * Find the BSD process ID for the Mach task associated with the given Mach port
5173  * name
5174  *
5175  * Parameters:	args		User argument descriptor (see below)
5176  *
5177  * Indirect parameters:	args->t		Mach port name
5178  *                      args->pid	Process ID (returned value; see below)
5179  *
5180  * Returns:	KERL_SUCCESS	Success
5181  *              KERN_FAILURE	Not success
5182  *
5183  * Implicit returns: args->pid		Process ID
5184  *
5185  */
5186 kern_return_t
pid_for_task(struct pid_for_task_args * args)5187 pid_for_task(
5188 	struct pid_for_task_args *args)
5189 {
5190 	mach_port_name_t        t = args->t;
5191 	user_addr_t             pid_addr  = args->pid;
5192 	proc_t p;
5193 	task_t          t1;
5194 	int     pid = -1;
5195 	kern_return_t   err = KERN_SUCCESS;
5196 
5197 	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
5198 	AUDIT_ARG(mach_port1, t);
5199 
5200 	t1 = port_name_to_task_name(t);
5201 
5202 	if (t1 == TASK_NULL) {
5203 		err = KERN_FAILURE;
5204 		goto pftout;
5205 	} else {
5206 		p = get_bsdtask_info(t1);
5207 		if (p) {
5208 			pid  = proc_pid(p);
5209 			err = KERN_SUCCESS;
5210 		} else if (task_is_a_corpse(t1)) {
5211 			pid = task_pid(t1);
5212 			err = KERN_SUCCESS;
5213 		} else {
5214 			err = KERN_FAILURE;
5215 		}
5216 	}
5217 	task_deallocate(t1);
5218 pftout:
5219 	AUDIT_ARG(pid, pid);
5220 	(void) copyout((char *) &pid, pid_addr, sizeof(int));
5221 	AUDIT_MACH_SYSCALL_EXIT(err);
5222 	return err;
5223 }
5224 
5225 /*
5226  *
5227  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
5228  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
5229  *
5230  */
5231 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
5232 
5233 static int
sysctl_settfp_policy(__unused struct sysctl_oid * oidp,void * arg1,__unused int arg2,struct sysctl_req * req)5234 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
5235     __unused int arg2, struct sysctl_req *req)
5236 {
5237 	int error = 0;
5238 	int new_value;
5239 
5240 	error = SYSCTL_OUT(req, arg1, sizeof(int));
5241 	if (error || req->newptr == USER_ADDR_NULL) {
5242 		return error;
5243 	}
5244 
5245 	if (!kauth_cred_issuser(kauth_cred_get())) {
5246 		return EPERM;
5247 	}
5248 
5249 	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
5250 		goto out;
5251 	}
5252 	if ((new_value == KERN_TFP_POLICY_DENY)
5253 	    || (new_value == KERN_TFP_POLICY_DEFAULT)) {
5254 		tfp_policy = new_value;
5255 	} else {
5256 		error = EINVAL;
5257 	}
5258 out:
5259 	return error;
5260 }
5261 
5262 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
5263 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
5264     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
5265 
5266 /*
5267  *	Routine:	task_for_pid_posix_check
5268  *	Purpose:
5269  *			Verify that the current process should be allowed to
5270  *			get the target process's task port. This is only
5271  *			permitted if:
5272  *			- The current process is root
5273  *			OR all of the following are true:
5274  *			- The target process's real, effective, and saved uids
5275  *			  are the same as the current proc's euid,
5276  *			- The target process's group set is a subset of the
5277  *			  calling process's group set, and
5278  *			- The target process hasn't switched credentials.
5279  *
5280  *	Returns:	TRUE: permitted
5281  *			FALSE: denied
5282  */
5283 static int
task_for_pid_posix_check(proc_t target)5284 task_for_pid_posix_check(proc_t target)
5285 {
5286 	kauth_cred_t targetcred, mycred;
5287 	bool checkcredentials;
5288 	uid_t myuid;
5289 	int allowed;
5290 
5291 	/* No task_for_pid on bad targets */
5292 	if (target->p_stat == SZOMB) {
5293 		return FALSE;
5294 	}
5295 
5296 	mycred = kauth_cred_get();
5297 	myuid = kauth_cred_getuid(mycred);
5298 
5299 	/* If we're running as root, the check passes */
5300 	if (kauth_cred_issuser(mycred)) {
5301 		return TRUE;
5302 	}
5303 
5304 	/* We're allowed to get our own task port */
5305 	if (target == current_proc()) {
5306 		return TRUE;
5307 	}
5308 
5309 	/*
5310 	 * Under DENY, only root can get another proc's task port,
5311 	 * so no more checks are needed.
5312 	 */
5313 	if (tfp_policy == KERN_TFP_POLICY_DENY) {
5314 		return FALSE;
5315 	}
5316 
5317 	targetcred = kauth_cred_proc_ref(target);
5318 	allowed = TRUE;
5319 
5320 	checkcredentials = !proc_is_third_party_debuggable_driver(target);
5321 
5322 	if (checkcredentials) {
5323 		/* Do target's ruid, euid, and saved uid match my euid? */
5324 		if ((kauth_cred_getuid(targetcred) != myuid) ||
5325 		    (kauth_cred_getruid(targetcred) != myuid) ||
5326 		    (kauth_cred_getsvuid(targetcred) != myuid)) {
5327 			allowed = FALSE;
5328 			goto out;
5329 		}
5330 		/* Are target's groups a subset of my groups? */
5331 		if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
5332 		    allowed == 0) {
5333 			allowed = FALSE;
5334 			goto out;
5335 		}
5336 	}
5337 
5338 	/* Has target switched credentials? */
5339 	if (target->p_flag & P_SUGID) {
5340 		allowed = FALSE;
5341 		goto out;
5342 	}
5343 
5344 out:
5345 	kauth_cred_unref(&targetcred);
5346 	return allowed;
5347 }
5348 
5349 /*
5350  *	__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
5351  *
5352  *	Description:	Waits for the user space daemon to respond to the request
5353  *			we made. Function declared non inline to be visible in
5354  *			stackshots and spindumps as well as debugging.
5355  */
5356 static __attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(mach_port_t task_access_port,int32_t calling_pid,uint32_t calling_gid,int32_t target_pid,mach_task_flavor_t flavor)5357 __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
5358 	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
5359 {
5360 	return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
5361 }
5362 
5363 /*
5364  *	Routine:	task_for_pid
5365  *	Purpose:
5366  *		Get the task port for another "process", named by its
5367  *		process ID on the same host as "target_task".
5368  *
5369  *		Only permitted to privileged processes, or processes
5370  *		with the same user ID.
5371  *
5372  *		Note: if pid == 0, an error is return no matter who is calling.
5373  *
5374  * XXX This should be a BSD system call, not a Mach trap!!!
5375  */
5376 kern_return_t
task_for_pid(struct task_for_pid_args * args)5377 task_for_pid(
5378 	struct task_for_pid_args *args)
5379 {
5380 	mach_port_name_t        target_tport = args->target_tport;
5381 	int                     pid = args->pid;
5382 	user_addr_t             task_addr = args->t;
5383 	proc_t                  p = PROC_NULL;
5384 	task_t                  t1 = TASK_NULL;
5385 	task_t                  task = TASK_NULL;
5386 	mach_port_name_t        tret = MACH_PORT_NULL;
5387 	ipc_port_t              tfpport = MACH_PORT_NULL;
5388 	void                    * sright = NULL;
5389 	int                     error = 0;
5390 	boolean_t               is_current_proc = FALSE;
5391 	struct proc_ident       pident = {0};
5392 
5393 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
5394 	AUDIT_ARG(pid, pid);
5395 	AUDIT_ARG(mach_port1, target_tport);
5396 
5397 	/* Always check if pid == 0 */
5398 	if (pid == 0) {
5399 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5400 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
5401 		return KERN_FAILURE;
5402 	}
5403 
5404 	t1 = port_name_to_task(target_tport);
5405 	if (t1 == TASK_NULL) {
5406 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5407 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
5408 		return KERN_FAILURE;
5409 	}
5410 
5411 
5412 	p = proc_find(pid);
5413 	if (p == PROC_NULL) {
5414 		error = KERN_FAILURE;
5415 		goto tfpout;
5416 	}
5417 	pident = proc_ident(p);
5418 	is_current_proc = (p == current_proc());
5419 
5420 #if CONFIG_AUDIT
5421 	AUDIT_ARG(process, p);
5422 #endif
5423 
5424 	if (!(task_for_pid_posix_check(p))) {
5425 		error = KERN_FAILURE;
5426 		goto tfpout;
5427 	}
5428 
5429 	if (proc_task(p) == TASK_NULL) {
5430 		error = KERN_SUCCESS;
5431 		goto tfpout;
5432 	}
5433 
5434 	/*
5435 	 * Grab a task reference and drop the proc reference as the proc ref
5436 	 * shouldn't be held accross upcalls.
5437 	 */
5438 	task = proc_task(p);
5439 	task_reference(task);
5440 
5441 	proc_rele(p);
5442 	p = PROC_NULL;
5443 
5444 	/* IPC is not active on the task until after `exec_resettextvp` has been called.
5445 	 * We don't want to call into MAC hooks until we know that this has occured, otherwise
5446 	 * AMFI and others will read uninitialized fields from the csproc
5447 	 */
5448 	if (!task_is_ipc_active(task)) {
5449 		error = KERN_FAILURE;
5450 		goto tfpout;
5451 	}
5452 
5453 #if CONFIG_MACF
5454 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
5455 	if (error) {
5456 		error = KERN_FAILURE;
5457 		goto tfpout;
5458 	}
5459 #endif
5460 
5461 	/* If we aren't root and target's task access port is set... */
5462 	if (!kauth_cred_issuser(kauth_cred_get()) &&
5463 	    !is_current_proc &&
5464 	    (task_get_task_access_port(task, &tfpport) == 0) &&
5465 	    (tfpport != IPC_PORT_NULL)) {
5466 		if (tfpport == IPC_PORT_DEAD) {
5467 			error = KERN_PROTECTION_FAILURE;
5468 			goto tfpout;
5469 		}
5470 
5471 		/* Call up to the task access server */
5472 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
5473 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
5474 
5475 		if (error != MACH_MSG_SUCCESS) {
5476 			if (error == MACH_RCV_INTERRUPTED) {
5477 				error = KERN_ABORTED;
5478 			} else {
5479 				error = KERN_FAILURE;
5480 			}
5481 			goto tfpout;
5482 		}
5483 	}
5484 
5485 	/* Grant task port access */
5486 	extmod_statistics_incr_task_for_pid(task);
5487 
5488 	/* this reference will be consumed during conversion */
5489 	task_reference(task);
5490 	if (task == current_task()) {
5491 		/* return pinned self if current_task() so equality check with mach_task_self_ passes */
5492 		sright = (void *)convert_task_to_port_pinned(task);
5493 	} else {
5494 		sright = (void *)convert_task_to_port(task);
5495 	}
5496 	/* extra task ref consumed */
5497 
5498 	/*
5499 	 * Check if the task has been corpsified. We must do so after conversion
5500 	 * since we don't hold locks and may have grabbed a corpse control port
5501 	 * above which will prevent no-senders notification delivery.
5502 	 */
5503 	if (task_is_a_corpse(task)) {
5504 		ipc_port_release_send(sright);
5505 		error = KERN_FAILURE;
5506 		goto tfpout;
5507 	}
5508 
5509 	tret = ipc_port_copyout_send(
5510 		sright,
5511 		get_task_ipcspace(current_task()));
5512 
5513 	error = KERN_SUCCESS;
5514 
5515 tfpout:
5516 	task_deallocate(t1);
5517 	AUDIT_ARG(mach_port2, tret);
5518 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
5519 
5520 	if (tfpport != IPC_PORT_NULL) {
5521 		ipc_port_release_send(tfpport);
5522 	}
5523 	if (task != TASK_NULL) {
5524 		task_deallocate(task);
5525 	}
5526 	if (p != PROC_NULL) {
5527 		proc_rele(p);
5528 	}
5529 	AUDIT_MACH_SYSCALL_EXIT(error);
5530 	return error;
5531 }
5532 
5533 /*
5534  *	Routine:	task_name_for_pid
5535  *	Purpose:
5536  *		Get the task name port for another "process", named by its
5537  *		process ID on the same host as "target_task".
5538  *
5539  *		Only permitted to privileged processes, or processes
5540  *		with the same user ID.
5541  *
5542  * XXX This should be a BSD system call, not a Mach trap!!!
5543  */
5544 
5545 kern_return_t
task_name_for_pid(struct task_name_for_pid_args * args)5546 task_name_for_pid(
5547 	struct task_name_for_pid_args *args)
5548 {
5549 	mach_port_name_t        target_tport = args->target_tport;
5550 	int                     pid = args->pid;
5551 	user_addr_t             task_addr = args->t;
5552 	proc_t                  p = PROC_NULL;
5553 	task_t                  t1 = TASK_NULL;
5554 	mach_port_name_t        tret = MACH_PORT_NULL;
5555 	void * sright;
5556 	int error = 0, refheld = 0;
5557 	kauth_cred_t target_cred;
5558 
5559 	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
5560 	AUDIT_ARG(pid, pid);
5561 	AUDIT_ARG(mach_port1, target_tport);
5562 
5563 	t1 = port_name_to_task(target_tport);
5564 	if (t1 == TASK_NULL) {
5565 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5566 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
5567 		return KERN_FAILURE;
5568 	}
5569 
5570 	p = proc_find(pid);
5571 	if (p != PROC_NULL) {
5572 		AUDIT_ARG(process, p);
5573 		target_cred = kauth_cred_proc_ref(p);
5574 		refheld = 1;
5575 
5576 		if ((p->p_stat != SZOMB)
5577 		    && ((current_proc() == p)
5578 		    || kauth_cred_issuser(kauth_cred_get())
5579 		    || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
5580 		    ((kauth_cred_getruid(target_cred) == kauth_getruid())))
5581 		    || IOCurrentTaskHasEntitlement("com.apple.system-task-ports.name.safe")
5582 		    )) {
5583 			if (proc_task(p) != TASK_NULL) {
5584 				struct proc_ident pident = proc_ident(p);
5585 
5586 				task_t task = proc_task(p);
5587 
5588 				task_reference(task);
5589 				proc_rele(p);
5590 				p = PROC_NULL;
5591 #if CONFIG_MACF
5592 				error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
5593 				if (error) {
5594 					task_deallocate(task);
5595 					goto noperm;
5596 				}
5597 #endif
5598 				sright = (void *)convert_task_name_to_port(task);
5599 				task = NULL;
5600 				tret = ipc_port_copyout_send(sright,
5601 				    get_task_ipcspace(current_task()));
5602 			} else {
5603 				tret  = MACH_PORT_NULL;
5604 			}
5605 
5606 			AUDIT_ARG(mach_port2, tret);
5607 			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5608 			task_deallocate(t1);
5609 			error = KERN_SUCCESS;
5610 			goto tnfpout;
5611 		}
5612 	}
5613 
5614 #if CONFIG_MACF
5615 noperm:
5616 #endif
5617 	task_deallocate(t1);
5618 	tret = MACH_PORT_NULL;
5619 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
5620 	error = KERN_FAILURE;
5621 tnfpout:
5622 	if (refheld != 0) {
5623 		kauth_cred_unref(&target_cred);
5624 	}
5625 	if (p != PROC_NULL) {
5626 		proc_rele(p);
5627 	}
5628 	AUDIT_MACH_SYSCALL_EXIT(error);
5629 	return error;
5630 }
5631 
5632 /*
5633  *	Routine:	task_inspect_for_pid
5634  *	Purpose:
5635  *		Get the task inspect port for another "process", named by its
5636  *		process ID on the same host as "target_task".
5637  */
5638 int
task_inspect_for_pid(struct proc * p __unused,struct task_inspect_for_pid_args * args,int * ret)5639 task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
5640 {
5641 	mach_port_name_t        target_tport = args->target_tport;
5642 	int                     pid = args->pid;
5643 	user_addr_t             task_addr = args->t;
5644 
5645 	proc_t                  proc = PROC_NULL;
5646 	task_t                  t1 = TASK_NULL;
5647 	task_inspect_t          task_insp = TASK_INSPECT_NULL;
5648 	mach_port_name_t        tret = MACH_PORT_NULL;
5649 	ipc_port_t              tfpport = MACH_PORT_NULL;
5650 	int                     error = 0;
5651 	void                    *sright = NULL;
5652 	boolean_t               is_current_proc = FALSE;
5653 	struct proc_ident       pident = {0};
5654 
5655 	/* Disallow inspect port for kernel_task */
5656 	if (pid == 0) {
5657 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5658 		return EPERM;
5659 	}
5660 
5661 	t1 = port_name_to_task(target_tport);
5662 	if (t1 == TASK_NULL) {
5663 		(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
5664 		return EINVAL;
5665 	}
5666 
5667 	proc = proc_find(pid);
5668 	if (proc == PROC_NULL) {
5669 		error = ESRCH;
5670 		goto tifpout;
5671 	}
5672 	pident = proc_ident(proc);
5673 	is_current_proc = (proc == current_proc());
5674 
5675 	if (!(task_for_pid_posix_check(proc))) {
5676 		error = EPERM;
5677 		goto tifpout;
5678 	}
5679 
5680 	task_insp = proc_task(proc);
5681 	if (task_insp == TASK_INSPECT_NULL) {
5682 		goto tifpout;
5683 	}
5684 
5685 	/*
5686 	 * Grab a task reference and drop the proc reference before making any upcalls.
5687 	 */
5688 	task_reference(task_insp);
5689 
5690 	proc_rele(proc);
5691 	proc = PROC_NULL;
5692 
5693 #if CONFIG_MACF
5694 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
5695 	if (error) {
5696 		error = EPERM;
5697 		goto tifpout;
5698 	}
5699 #endif
5700 
5701 	/* If we aren't root and target's task access port is set... */
5702 	if (!kauth_cred_issuser(kauth_cred_get()) &&
5703 	    !is_current_proc &&
5704 	    (task_get_task_access_port(task_insp, &tfpport) == 0) &&
5705 	    (tfpport != IPC_PORT_NULL)) {
5706 		if (tfpport == IPC_PORT_DEAD) {
5707 			error = EACCES;
5708 			goto tifpout;
5709 		}
5710 
5711 
5712 		/* Call up to the task access server */
5713 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
5714 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
5715 
5716 		if (error != MACH_MSG_SUCCESS) {
5717 			if (error == MACH_RCV_INTERRUPTED) {
5718 				error = EINTR;
5719 			} else {
5720 				error = EPERM;
5721 			}
5722 			goto tifpout;
5723 		}
5724 	}
5725 
5726 	/* Check if the task has been corpsified */
5727 	if (task_is_a_corpse(task_insp)) {
5728 		error = EACCES;
5729 		goto tifpout;
5730 	}
5731 
5732 	/* could be IP_NULL, consumes a ref */
5733 	sright = (void*) convert_task_inspect_to_port(task_insp);
5734 	task_insp = TASK_INSPECT_NULL;
5735 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
5736 
5737 tifpout:
5738 	task_deallocate(t1);
5739 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
5740 	if (proc != PROC_NULL) {
5741 		proc_rele(proc);
5742 	}
5743 	if (tfpport != IPC_PORT_NULL) {
5744 		ipc_port_release_send(tfpport);
5745 	}
5746 	if (task_insp != TASK_INSPECT_NULL) {
5747 		task_deallocate(task_insp);
5748 	}
5749 
5750 	*ret = error;
5751 	return error;
5752 }
5753 
5754 /*
5755  *	Routine:	task_read_for_pid
5756  *	Purpose:
5757  *		Get the task read port for another "process", named by its
5758  *		process ID on the same host as "target_task".
5759  */
5760 int
task_read_for_pid(struct proc * p __unused,struct task_read_for_pid_args * args,int * ret)5761 task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
5762 {
5763 	mach_port_name_t        target_tport = args->target_tport;
5764 	int                     pid = args->pid;
5765 	user_addr_t             task_addr = args->t;
5766 
5767 	proc_t                  proc = PROC_NULL;
5768 	task_t                  t1 = TASK_NULL;
5769 	task_read_t             task_read = TASK_READ_NULL;
5770 	mach_port_name_t        tret = MACH_PORT_NULL;
5771 	ipc_port_t              tfpport = MACH_PORT_NULL;
5772 	int                     error = 0;
5773 	void                    *sright = NULL;
5774 	boolean_t               is_current_proc = FALSE;
5775 	struct proc_ident       pident = {0};
5776 
5777 	/* Disallow read port for kernel_task */
5778 	if (pid == 0) {
5779 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5780 		return EPERM;
5781 	}
5782 
5783 	t1 = port_name_to_task(target_tport);
5784 	if (t1 == TASK_NULL) {
5785 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5786 		return EINVAL;
5787 	}
5788 
5789 	proc = proc_find(pid);
5790 	if (proc == PROC_NULL) {
5791 		error = ESRCH;
5792 		goto trfpout;
5793 	}
5794 	pident = proc_ident(proc);
5795 	is_current_proc = (proc == current_proc());
5796 
5797 	if (!(task_for_pid_posix_check(proc))) {
5798 		error = EPERM;
5799 		goto trfpout;
5800 	}
5801 
5802 	task_read = proc_task(proc);
5803 	if (task_read == TASK_INSPECT_NULL) {
5804 		goto trfpout;
5805 	}
5806 
5807 	/*
5808 	 * Grab a task reference and drop the proc reference before making any upcalls.
5809 	 */
5810 	task_reference(task_read);
5811 
5812 	proc_rele(proc);
5813 	proc = PROC_NULL;
5814 
5815 #if CONFIG_MACF
5816 	error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
5817 	if (error) {
5818 		error = EPERM;
5819 		goto trfpout;
5820 	}
5821 #endif
5822 
5823 	/* If we aren't root and target's task access port is set... */
5824 	if (!kauth_cred_issuser(kauth_cred_get()) &&
5825 	    !is_current_proc &&
5826 	    (task_get_task_access_port(task_read, &tfpport) == 0) &&
5827 	    (tfpport != IPC_PORT_NULL)) {
5828 		if (tfpport == IPC_PORT_DEAD) {
5829 			error = EACCES;
5830 			goto trfpout;
5831 		}
5832 
5833 
5834 		/* Call up to the task access server */
5835 		error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
5836 		    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
5837 
5838 		if (error != MACH_MSG_SUCCESS) {
5839 			if (error == MACH_RCV_INTERRUPTED) {
5840 				error = EINTR;
5841 			} else {
5842 				error = EPERM;
5843 			}
5844 			goto trfpout;
5845 		}
5846 	}
5847 
5848 	/* Check if the task has been corpsified */
5849 	if (task_is_a_corpse(task_read)) {
5850 		error = EACCES;
5851 		goto trfpout;
5852 	}
5853 
5854 	/* could be IP_NULL, consumes a ref */
5855 	sright = (void*) convert_task_read_to_port(task_read);
5856 	task_read = TASK_READ_NULL;
5857 	tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task()));
5858 
5859 trfpout:
5860 	task_deallocate(t1);
5861 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
5862 	if (proc != PROC_NULL) {
5863 		proc_rele(proc);
5864 	}
5865 	if (tfpport != IPC_PORT_NULL) {
5866 		ipc_port_release_send(tfpport);
5867 	}
5868 	if (task_read != TASK_READ_NULL) {
5869 		task_deallocate(task_read);
5870 	}
5871 
5872 	*ret = error;
5873 	return error;
5874 }
5875 
5876 kern_return_t
pid_suspend(struct proc * p __unused,struct pid_suspend_args * args,int * ret)5877 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
5878 {
5879 	task_t  target = NULL;
5880 	proc_t  targetproc = PROC_NULL;
5881 	int     pid = args->pid;
5882 	int     error = 0;
5883 	mach_port_t tfpport = MACH_PORT_NULL;
5884 
5885 	if (pid == 0) {
5886 		error = EPERM;
5887 		goto out;
5888 	}
5889 
5890 	targetproc = proc_find(pid);
5891 	if (targetproc == PROC_NULL) {
5892 		error = ESRCH;
5893 		goto out;
5894 	}
5895 
5896 	if (!task_for_pid_posix_check(targetproc) &&
5897 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
5898 		error = EPERM;
5899 		goto out;
5900 	}
5901 
5902 #if CONFIG_MACF
5903 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND);
5904 	if (error) {
5905 		error = EPERM;
5906 		goto out;
5907 	}
5908 #endif
5909 
5910 	target = proc_task(targetproc);
5911 #if XNU_TARGET_OS_OSX
5912 	if (target != TASK_NULL) {
5913 		/* If we aren't root and target's task access port is set... */
5914 		if (!kauth_cred_issuser(kauth_cred_get()) &&
5915 		    targetproc != current_proc() &&
5916 		    (task_get_task_access_port(target, &tfpport) == 0) &&
5917 		    (tfpport != IPC_PORT_NULL)) {
5918 			if (tfpport == IPC_PORT_DEAD) {
5919 				error = EACCES;
5920 				goto out;
5921 			}
5922 
5923 			/* Call up to the task access server */
5924 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
5925 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
5926 
5927 			if (error != MACH_MSG_SUCCESS) {
5928 				if (error == MACH_RCV_INTERRUPTED) {
5929 					error = EINTR;
5930 				} else {
5931 					error = EPERM;
5932 				}
5933 				goto out;
5934 			}
5935 		}
5936 	}
5937 #endif /* XNU_TARGET_OS_OSX */
5938 
5939 	task_reference(target);
5940 	error = task_pidsuspend(target);
5941 	if (error) {
5942 		if (error == KERN_INVALID_ARGUMENT) {
5943 			error = EINVAL;
5944 		} else {
5945 			error = EPERM;
5946 		}
5947 	}
5948 #if CONFIG_MEMORYSTATUS
5949 	else {
5950 		memorystatus_on_suspend(targetproc);
5951 	}
5952 #endif
5953 
5954 	task_deallocate(target);
5955 
5956 out:
5957 	if (tfpport != IPC_PORT_NULL) {
5958 		ipc_port_release_send(tfpport);
5959 	}
5960 
5961 	if (targetproc != PROC_NULL) {
5962 		proc_rele(targetproc);
5963 	}
5964 	*ret = error;
5965 	return error;
5966 }
5967 
5968 kern_return_t
debug_control_port_for_pid(struct debug_control_port_for_pid_args * args)5969 debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
5970 {
5971 	mach_port_name_t        target_tport = args->target_tport;
5972 	int                     pid = args->pid;
5973 	user_addr_t             task_addr = args->t;
5974 	proc_t                  p = PROC_NULL;
5975 	task_t                  t1 = TASK_NULL;
5976 	task_t                  task = TASK_NULL;
5977 	mach_port_name_t        tret = MACH_PORT_NULL;
5978 	ipc_port_t              tfpport = MACH_PORT_NULL;
5979 	ipc_port_t              sright = NULL;
5980 	int                     error = 0;
5981 	boolean_t               is_current_proc = FALSE;
5982 	struct proc_ident       pident = {0};
5983 
5984 	AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
5985 	AUDIT_ARG(pid, pid);
5986 	AUDIT_ARG(mach_port1, target_tport);
5987 
5988 	/* Always check if pid == 0 */
5989 	if (pid == 0) {
5990 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5991 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
5992 		return KERN_FAILURE;
5993 	}
5994 
5995 	t1 = port_name_to_task(target_tport);
5996 	if (t1 == TASK_NULL) {
5997 		(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
5998 		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
5999 		return KERN_FAILURE;
6000 	}
6001 
6002 	p = proc_find(pid);
6003 	if (p == PROC_NULL) {
6004 		error = KERN_FAILURE;
6005 		goto tfpout;
6006 	}
6007 	pident = proc_ident(p);
6008 	is_current_proc = (p == current_proc());
6009 
6010 #if CONFIG_AUDIT
6011 	AUDIT_ARG(process, p);
6012 #endif
6013 
6014 	if (!(task_for_pid_posix_check(p))) {
6015 		error = KERN_FAILURE;
6016 		goto tfpout;
6017 	}
6018 
6019 	if (proc_task(p) == TASK_NULL) {
6020 		error = KERN_SUCCESS;
6021 		goto tfpout;
6022 	}
6023 
6024 	/*
6025 	 * Grab a task reference and drop the proc reference before making any upcalls.
6026 	 */
6027 	task = proc_task(p);
6028 	task_reference(task);
6029 
6030 	proc_rele(p);
6031 	p = PROC_NULL;
6032 
6033 	if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
6034 #if CONFIG_MACF
6035 		error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
6036 		if (error) {
6037 			error = KERN_FAILURE;
6038 			goto tfpout;
6039 		}
6040 #endif
6041 
6042 		/* If we aren't root and target's task access port is set... */
6043 		if (!kauth_cred_issuser(kauth_cred_get()) &&
6044 		    !is_current_proc &&
6045 		    (task_get_task_access_port(task, &tfpport) == 0) &&
6046 		    (tfpport != IPC_PORT_NULL)) {
6047 			if (tfpport == IPC_PORT_DEAD) {
6048 				error = KERN_PROTECTION_FAILURE;
6049 				goto tfpout;
6050 			}
6051 
6052 
6053 			/* Call up to the task access server */
6054 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
6055 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
6056 
6057 			if (error != MACH_MSG_SUCCESS) {
6058 				if (error == MACH_RCV_INTERRUPTED) {
6059 					error = KERN_ABORTED;
6060 				} else {
6061 					error = KERN_FAILURE;
6062 				}
6063 				goto tfpout;
6064 			}
6065 		}
6066 	}
6067 
6068 	/* Check if the task has been corpsified */
6069 	if (task_is_a_corpse(task)) {
6070 		error = KERN_FAILURE;
6071 		goto tfpout;
6072 	}
6073 
6074 	error = task_get_debug_control_port(task, &sright);
6075 	if (error != KERN_SUCCESS) {
6076 		goto tfpout;
6077 	}
6078 
6079 	tret = ipc_port_copyout_send(
6080 		sright,
6081 		get_task_ipcspace(current_task()));
6082 
6083 	error = KERN_SUCCESS;
6084 
6085 tfpout:
6086 	task_deallocate(t1);
6087 	AUDIT_ARG(mach_port2, tret);
6088 	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
6089 
6090 	if (tfpport != IPC_PORT_NULL) {
6091 		ipc_port_release_send(tfpport);
6092 	}
6093 	if (task != TASK_NULL) {
6094 		task_deallocate(task);
6095 	}
6096 	if (p != PROC_NULL) {
6097 		proc_rele(p);
6098 	}
6099 	AUDIT_MACH_SYSCALL_EXIT(error);
6100 	return error;
6101 }
6102 
6103 kern_return_t
pid_resume(struct proc * p __unused,struct pid_resume_args * args,int * ret)6104 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
6105 {
6106 	task_t  target = NULL;
6107 	proc_t  targetproc = PROC_NULL;
6108 	int     pid = args->pid;
6109 	int     error = 0;
6110 	mach_port_t tfpport = MACH_PORT_NULL;
6111 
6112 	if (pid == 0) {
6113 		error = EPERM;
6114 		goto out;
6115 	}
6116 
6117 	targetproc = proc_find(pid);
6118 	if (targetproc == PROC_NULL) {
6119 		error = ESRCH;
6120 		goto out;
6121 	}
6122 
6123 	if (!task_for_pid_posix_check(targetproc) &&
6124 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
6125 		error = EPERM;
6126 		goto out;
6127 	}
6128 
6129 #if CONFIG_MACF
6130 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME);
6131 	if (error) {
6132 		error = EPERM;
6133 		goto out;
6134 	}
6135 #endif
6136 
6137 	target = proc_task(targetproc);
6138 #if XNU_TARGET_OS_OSX
6139 	if (target != TASK_NULL) {
6140 		/* If we aren't root and target's task access port is set... */
6141 		if (!kauth_cred_issuser(kauth_cred_get()) &&
6142 		    targetproc != current_proc() &&
6143 		    (task_get_task_access_port(target, &tfpport) == 0) &&
6144 		    (tfpport != IPC_PORT_NULL)) {
6145 			if (tfpport == IPC_PORT_DEAD) {
6146 				error = EACCES;
6147 				goto out;
6148 			}
6149 
6150 			/* Call up to the task access server */
6151 			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
6152 			    proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
6153 
6154 			if (error != MACH_MSG_SUCCESS) {
6155 				if (error == MACH_RCV_INTERRUPTED) {
6156 					error = EINTR;
6157 				} else {
6158 					error = EPERM;
6159 				}
6160 				goto out;
6161 			}
6162 		}
6163 	}
6164 #endif /* XNU_TARGET_OS_OSX */
6165 
6166 #if !XNU_TARGET_OS_OSX
6167 #if SOCKETS
6168 	resume_proc_sockets(targetproc);
6169 #endif /* SOCKETS */
6170 #endif /* !XNU_TARGET_OS_OSX */
6171 
6172 	task_reference(target);
6173 
6174 #if CONFIG_MEMORYSTATUS
6175 	memorystatus_on_resume(targetproc);
6176 #endif
6177 
6178 	error = task_pidresume(target);
6179 	if (error) {
6180 		if (error == KERN_INVALID_ARGUMENT) {
6181 			error = EINVAL;
6182 		} else {
6183 			if (error == KERN_MEMORY_ERROR) {
6184 				psignal(targetproc, SIGKILL);
6185 				error = EIO;
6186 			} else {
6187 				error = EPERM;
6188 			}
6189 		}
6190 	}
6191 
6192 	task_deallocate(target);
6193 
6194 out:
6195 	if (tfpport != IPC_PORT_NULL) {
6196 		ipc_port_release_send(tfpport);
6197 	}
6198 
6199 	if (targetproc != PROC_NULL) {
6200 		proc_rele(targetproc);
6201 	}
6202 
6203 	*ret = error;
6204 	return error;
6205 }
6206 
6207 #if !XNU_TARGET_OS_OSX
6208 /*
6209  * Freeze the specified process (provided in args->pid), or find and freeze a PID.
6210  * When a process is specified, this call is blocking, otherwise we wake up the
6211  * freezer thread and do not block on a process being frozen.
6212  */
6213 int
pid_hibernate(struct proc * p __unused,struct pid_hibernate_args * args,int * ret)6214 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
6215 {
6216 	int     error = 0;
6217 	proc_t  targetproc = PROC_NULL;
6218 	int     pid = args->pid;
6219 
6220 	/*
6221 	 * TODO: Create a different interface for compressor sweeps,
6222 	 * gated by an entitlement: rdar://116490432
6223 	 */
6224 	if (pid == -2) {
6225 		error = mach_to_bsd_errno(vm_pageout_anonymous_pages());
6226 	}
6227 
6228 #ifndef CONFIG_FREEZE
6229 	if (pid != -2) {
6230 		os_log(OS_LOG_DEFAULT, "%s: pid %d not supported when freezer is disabled.",
6231 		    __func__, pid);
6232 		error = ENOTSUP;
6233 	}
6234 #else
6235 
6236 	/*
6237 	 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
6238 	 */
6239 
6240 	if (pid >= 0) {
6241 		targetproc = proc_find(pid);
6242 
6243 		if (targetproc == PROC_NULL) {
6244 			error = ESRCH;
6245 			goto out;
6246 		}
6247 
6248 		if (!task_for_pid_posix_check(targetproc)) {
6249 			error = EPERM;
6250 			goto out;
6251 		}
6252 	}
6253 
6254 #if CONFIG_MACF
6255 	//Note that targetproc may be null
6256 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
6257 	if (error) {
6258 		error = EPERM;
6259 		goto out;
6260 	}
6261 #endif
6262 
6263 	if (pid == -1) {
6264 		memorystatus_on_inactivity(targetproc);
6265 	} else if (pid >= 0) {
6266 		error = memorystatus_freeze_process_sync(targetproc);
6267 	}
6268 	/* We already handled the pid == -2 case */
6269 
6270 out:
6271 
6272 #endif /* CONFIG_FREEZE */
6273 
6274 	if (targetproc != PROC_NULL) {
6275 		proc_rele(targetproc);
6276 	}
6277 	*ret = error;
6278 	return error;
6279 }
6280 #endif /* !XNU_TARGET_OS_OSX */
6281 
6282 #if SOCKETS
6283 
6284 #if SKYWALK
6285 /*
6286  * Since we make multiple passes across the fileproc array, record the
6287  * first MAX_CHANNELS channel handles found.  MAX_CHANNELS should be
6288  * large enough to accomodate most, if not all cases.  If we find more,
6289  * we'll go to the slow path during second pass.
6290  */
6291 #define MAX_CHANNELS    8       /* should be more than enough */
6292 #endif /* SKYWALK */
6293 
6294 static int
networking_defunct_callout(proc_t p,void * arg)6295 networking_defunct_callout(proc_t p, void *arg)
6296 {
6297 	struct pid_shutdown_sockets_args *args = arg;
6298 	int pid = args->pid;
6299 	int level = args->level;
6300 	struct fileproc *fp;
6301 #if SKYWALK
6302 	int i;
6303 	int channel_count = 0;
6304 	struct kern_channel *channel_array[MAX_CHANNELS];
6305 
6306 	bzero(&channel_array, sizeof(channel_array));
6307 
6308 	sk_protect_t protect = sk_async_transmit_protect();
6309 #endif /* SKYWALK */
6310 
6311 	proc_fdlock(p);
6312 
6313 	fdt_foreach(fp, p) {
6314 		struct fileglob *fg = fp->fp_glob;
6315 
6316 		switch (FILEGLOB_DTYPE(fg)) {
6317 		case DTYPE_SOCKET: {
6318 			struct socket *so = (struct socket *)fg_get_data(fg);
6319 			if (proc_getpid(p) == pid || so->last_pid == pid ||
6320 			    ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
6321 				/* Call networking stack with socket and level */
6322 				(void)socket_defunct(p, so, level);
6323 			}
6324 			break;
6325 		}
6326 #if NECP
6327 		case DTYPE_NETPOLICY:
6328 			/* first pass: defunct necp and get stats for ntstat */
6329 			if (proc_getpid(p) == pid) {
6330 				necp_fd_defunct(p,
6331 				    (struct necp_fd_data *)fg_get_data(fg));
6332 			}
6333 			break;
6334 #endif /* NECP */
6335 #if SKYWALK
6336 		case DTYPE_CHANNEL:
6337 			/* first pass: get channels and total count */
6338 			if (proc_getpid(p) == pid) {
6339 				if (channel_count < MAX_CHANNELS) {
6340 					channel_array[channel_count] =
6341 					    (struct kern_channel *)fg_get_data(fg);
6342 				}
6343 				++channel_count;
6344 			}
6345 			break;
6346 #endif /* SKYWALK */
6347 		default:
6348 			break;
6349 		}
6350 	}
6351 
6352 #if SKYWALK
6353 	/*
6354 	 * Second pass: defunct channels/flows (after NECP).  Handle
6355 	 * the common case of up to MAX_CHANNELS count with fast path,
6356 	 * and traverse the fileproc array again only if we exceed it.
6357 	 */
6358 	if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
6359 		ASSERT(proc_getpid(p) == pid);
6360 		for (i = 0; i < channel_count; i++) {
6361 			ASSERT(channel_array[i] != NULL);
6362 			kern_channel_defunct(p, channel_array[i]);
6363 		}
6364 	} else if (channel_count != 0) {
6365 		ASSERT(proc_getpid(p) == pid);
6366 		fdt_foreach(fp, p) {
6367 			struct fileglob *fg = fp->fp_glob;
6368 
6369 			if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
6370 				kern_channel_defunct(p,
6371 				    (struct kern_channel *)fg_get_data(fg));
6372 			}
6373 		}
6374 	}
6375 
6376 	sk_async_transmit_unprotect(protect);
6377 #endif /* SKYWALK */
6378 
6379 	proc_fdunlock(p);
6380 
6381 	return PROC_RETURNED;
6382 }
6383 
6384 int
pid_shutdown_sockets(struct proc * p __unused,struct pid_shutdown_sockets_args * args,int * ret)6385 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
6386 {
6387 	int                             error = 0;
6388 	proc_t                          targetproc = PROC_NULL;
6389 	int                             pid = args->pid;
6390 	int                             level = args->level;
6391 
6392 	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
6393 	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
6394 		error = EINVAL;
6395 		goto out;
6396 	}
6397 
6398 	targetproc = proc_find(pid);
6399 	if (targetproc == PROC_NULL) {
6400 		error = ESRCH;
6401 		goto out;
6402 	}
6403 
6404 	if (!task_for_pid_posix_check(targetproc) &&
6405 	    !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
6406 		error = EPERM;
6407 		goto out;
6408 	}
6409 
6410 #if CONFIG_MACF
6411 	error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
6412 	if (error) {
6413 		error = EPERM;
6414 		goto out;
6415 	}
6416 #endif
6417 
6418 	proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
6419 	    networking_defunct_callout, args, NULL, NULL);
6420 
6421 out:
6422 	if (targetproc != PROC_NULL) {
6423 		proc_rele(targetproc);
6424 	}
6425 	*ret = error;
6426 	return error;
6427 }
6428 
6429 #endif /* SOCKETS */
6430