xref: /xnu-11215/osfmk/kern/kern_stackshot.c (revision 4f1223e8)
1 /*
2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <mach/mach_types.h>
31 #include <mach/vm_param.h>
32 #include <mach/mach_vm.h>
33 #include <mach/clock_types.h>
34 #include <sys/code_signing.h>
35 #include <sys/errno.h>
36 #include <sys/stackshot.h>
37 #if defined(__arm64__)
38 #include <arm/cpu_internal.h>
39 #endif /* __arm64__ */
40 #ifdef IMPORTANCE_INHERITANCE
41 #include <ipc/ipc_importance.h>
42 #endif
43 #include <sys/appleapiopts.h>
44 #include <kern/debug.h>
45 #include <kern/block_hint.h>
46 #include <uuid/uuid.h>
47 
48 #include <kdp/kdp_dyld.h>
49 #include <kdp/kdp_en_debugger.h>
50 #include <kdp/processor_core.h>
51 #include <kdp/kdp_common.h>
52 
53 #include <libsa/types.h>
54 #include <libkern/version.h>
55 #include <libkern/section_keywords.h>
56 
57 #include <string.h> /* bcopy */
58 
59 #include <kern/kern_stackshot.h>
60 #include <kern/backtrace.h>
61 #include <kern/coalition.h>
62 #include <kern/epoch_sync.h>
63 #include <kern/exclaves_stackshot.h>
64 #include <kern/exclaves_inspection.h>
65 #include <kern/processor.h>
66 #include <kern/host_statistics.h>
67 #include <kern/counter.h>
68 #include <kern/thread.h>
69 #include <kern/thread_group.h>
70 #include <kern/task.h>
71 #include <kern/telemetry.h>
72 #include <kern/clock.h>
73 #include <kern/policy_internal.h>
74 #include <kern/socd_client.h>
75 #include <kern/startup.h>
76 #include <vm/vm_map_xnu.h>
77 #include <vm/vm_kern_xnu.h>
78 #include <vm/vm_pageout.h>
79 #include <vm/vm_fault.h>
80 #include <vm/vm_shared_region_xnu.h>
81 #include <vm/vm_compressor_xnu.h>
82 #include <libkern/OSKextLibPrivate.h>
83 #include <os/log.h>
84 
85 #ifdef CONFIG_EXCLAVES
86 #include <kern/exclaves.tightbeam.h>
87 #endif /* CONFIG_EXCLAVES */
88 
89 #include <kern/exclaves_test_stackshot.h>
90 
91 #include <libkern/coreanalytics/coreanalytics.h>
92 
93 #if defined(__x86_64__)
94 #include <i386/mp.h>
95 #include <i386/cpu_threads.h>
96 #endif
97 
98 #include <pexpert/pexpert.h>
99 
100 #if CONFIG_PERVASIVE_CPI
101 #include <kern/monotonic.h>
102 #endif /* CONFIG_PERVASIVE_CPI */
103 
104 #include <san/kasan.h>
105 
106 #if DEBUG || DEVELOPMENT
107 #define STACKSHOT_COLLECTS_DIAGNOSTICS 1
108 #define STACKSHOT_COLLECTS_LATENCY_INFO 1
109 #else
110 #define STACKSHOT_COLLECTS_DIAGNOSTICS 0
111 #define STACKSHOT_COLLECTS_LATENCY_INFO 0
112 #endif /* DEBUG || DEVELOPMENT */
113 
114 #define STACKSHOT_COLLECTS_RDAR_126582377_DATA 0
115 
116 #if defined(__AMP__)
117 #define STACKSHOT_NUM_WORKQUEUES 2
118 #else /* __AMP__ */
119 #define STACKSHOT_NUM_WORKQUEUES 1
120 #endif
121 
122 #if defined(__arm64__)
123 #define STACKSHOT_NUM_BUFFERS MAX_CPU_CLUSTERS
124 #else /* __arm64__ */
125 #define STACKSHOT_NUM_BUFFERS 1
126 #endif /* __arm64__ */
127 
128 /* The number of threads which will land a task in the hardest workqueue. */
129 #define STACKSHOT_HARDEST_THREADCOUNT 10
130 
131 TUNABLE_DEV_WRITEABLE(unsigned int, stackshot_single_thread, "stackshot_single_thread", 0);
132 
133 extern unsigned int not_in_kdp;
134 
135 /* indicate to the compiler that some accesses are unaligned */
136 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
137 
138 int kdp_snapshot                            = 0;
139 
140 #pragma mark ---Stackshot Struct Definitions---
141 
142 typedef struct linked_kcdata_descriptor {
143 	struct kcdata_descriptor          kcdata;
144 	struct linked_kcdata_descriptor  *next;
145 } * linked_kcdata_descriptor_t;
146 
147 struct stackshot_workitem {
148 	task_t                        sswi_task;
149 	linked_kcdata_descriptor_t    sswi_data; /* The kcdata for this task. */
150 	int                           sswi_idx;  /* The index of this job, used for ordering kcdata across multiple queues. */
151 };
152 
153 struct stackshot_workqueue {
154 	uint32_t _Atomic              sswq_num_items; /* Only modified by main CPU */
155 	uint32_t _Atomic              sswq_cur_item; /* Modified by all CPUs */
156 	size_t                        sswq_capacity; /* Constant after preflight */
157 	bool _Atomic                  sswq_populated; /* Only modified by main CPU */
158 	struct stackshot_workitem    *__counted_by(capacity) sswq_items;
159 };
160 
161 struct freelist_entry {
162 	struct freelist_entry        *fl_next; /* Next entry in the freelist */
163 	size_t                        fl_size; /* Size of the entry (must be >= sizeof(struct freelist_entry)) */
164 };
165 
166 struct stackshot_buffer {
167 	void                         *ssb_ptr; /* Base of buffer */
168 	size_t                        ssb_size;
169 	size_t _Atomic                ssb_used;
170 	struct freelist_entry        *ssb_freelist; /* First freelist entry */
171 	int _Atomic                   ssb_freelist_lock;
172 	size_t _Atomic                ssb_overhead; /* Total amount ever freed (even if re-allocated from freelist) */
173 };
174 
175 struct kdp_snapshot_args {
176 	int                           pid;
177 	void                         *buffer;
178 	struct kcdata_descriptor     *descriptor;
179 	uint32_t                      buffer_size;
180 	uint64_t                      flags;
181 	uint64_t                      since_timestamp;
182 	uint32_t                      pagetable_mask;
183 };
184 
185 /*
186  * Keep a simple cache of the most recent validation done at a page granularity
187  * to avoid the expensive software KVA-to-phys translation in the VM.
188  */
189 
190 struct _stackshot_validation_state {
191 	vm_offset_t last_valid_page_kva;
192 	size_t last_valid_size;
193 };
194 
195 /* CPU-local generation counts for PLH */
196 struct _stackshot_plh_gen_state {
197 	uint8_t                *pgs_gen;       /* last 'gen #' seen in */
198 	int16_t                 pgs_curgen_min; /* min idx seen for this gen */
199 	int16_t                 pgs_curgen_max; /* max idx seen for this gen */
200 	uint8_t                 pgs_curgen;     /* current gen */
201 };
202 
203 /*
204  * For port labels, we have a small hash table we use to track the
205  * struct ipc_service_port_label pointers we see along the way.
206  * This structure encapsulates the global state.
207  *
208  * The hash table is insert-only, similar to "intern"ing strings.  It's
209  * only used an manipulated in during the stackshot collection.  We use
210  * seperate chaining, with the hash elements and chains being int16_ts
211  * indexes into the parallel arrays, with -1 ending the chain.  Array indices are
212  * allocated using a bump allocator.
213  *
214  * The parallel arrays contain:
215  *      - plh_array[idx]	the pointer entered
216  *      - plh_chains[idx]	the hash chain
217  *      - plh_gen[idx]		the last 'generation #' seen
218  *
219  * Generation IDs are used to track entries looked up in the current
220  * task; 0 is never used, and the plh_gen array is cleared to 0 on
221  * rollover.
222  *
223  * The portlabel_ids we report externally are just the index in the array,
224  * plus 1 to avoid 0 as a value.  0 is NONE, -1 is UNKNOWN (e.g. there is
225  * one, but we ran out of space)
226  */
227 struct port_label_hash {
228 	int _Atomic             plh_lock;       /* lock for concurrent modifications to this plh */
229 	uint16_t                plh_size;       /* size of allocations; 0 disables tracking */
230 	uint16_t                plh_count;      /* count of used entries in plh_array */
231 	struct ipc_service_port_label **plh_array; /* _size allocated, _count used */
232 	int16_t                *plh_chains;    /* _size allocated */
233 	int16_t                *plh_hash;      /* (1 << STACKSHOT_PLH_SHIFT) entry hash table: hash(ptr) -> array index */
234 #if DEVELOPMENT || DEBUG
235 	/* statistics */
236 	uint32_t _Atomic        plh_lookups;    /* # lookups or inserts */
237 	uint32_t _Atomic        plh_found;
238 	uint32_t _Atomic        plh_found_depth;
239 	uint32_t _Atomic        plh_insert;
240 	uint32_t _Atomic        plh_insert_depth;
241 	uint32_t _Atomic        plh_bad;
242 	uint32_t _Atomic        plh_bad_depth;
243 	uint32_t _Atomic        plh_lookup_send;
244 	uint32_t _Atomic        plh_lookup_receive;
245 #define PLH_STAT_OP(...)    (void)(__VA_ARGS__)
246 #else /* DEVELOPMENT || DEBUG */
247 #define PLH_STAT_OP(...)    (void)(0)
248 #endif /* DEVELOPMENT || DEBUG */
249 };
250 
251 #define plh_lock(plh) while(!os_atomic_cmpxchg(&(plh)->plh_lock, 0, 1, acquire)) { loop_wait(); }
252 #define plh_unlock(plh) os_atomic_store(&(plh)->plh_lock, 0, release);
253 
254 #define STACKSHOT_PLH_SHIFT    7
255 #define STACKSHOT_PLH_SIZE_MAX ((kdp_ipc_have_splabel)? 1024 : 0)
256 size_t stackshot_port_label_size = (2 * (1u << STACKSHOT_PLH_SHIFT));
257 #define STASKSHOT_PLH_SIZE(x) MIN((x), STACKSHOT_PLH_SIZE_MAX)
258 
259 struct stackshot_cpu_context {
260 	bool                               scc_can_work; /* Whether the CPU can do more stackshot work */
261 	bool                               scc_did_work; /* Whether the CPU actually did any stackshot work */
262 	linked_kcdata_descriptor_t         scc_kcdata_head; /* See `linked_kcdata_alloc_callback */
263 	linked_kcdata_descriptor_t         scc_kcdata_tail; /* See `linked_kcdata_alloc_callback */
264 	uintptr_t                         *scc_stack_buffer; /* A buffer for stacktraces. */
265 	struct stackshot_fault_stats       scc_fault_stats;
266 	struct _stackshot_validation_state scc_validation_state;
267 	struct _stackshot_plh_gen_state    scc_plh_gen;
268 };
269 
270 /*
271  * When directly modifying the stackshot state, always use the macros below to
272  * work wth this enum - the higher order bits are used to store an error code
273  * in the case of SS_ERRORED.
274  *
275  *        +------------------------------------+-------------------+
276  *        |                                    |                   |
277  *        v                                    |                   |
278  * +-------------+     +----------+     +------------+     +------------+
279  * | SS_INACTIVE |---->| SS_SETUP |---->| SS_RUNNING |---->| SS_ERRORED |
280  * +-------------+     +----------+     +------------+     +------------+
281  *                         |  |                |                ^  |
282  *                         |  +----------------|----------------+  |
283  * +-------------+         |                   |                   |
284  * | SS_PANICKED |<--------+-------------------+                   |
285  * +-------------+                                                 |
286  *        ^                                                        |
287  *        |                                                        |
288  *        +--------------------------------------------------------+
289  */
290 __enum_closed_decl(stackshot_state_t, uint, {
291 	SS_INACTIVE = 0x0, /* -> SS_SETUP */
292 	SS_SETUP    = 0x1, /* -> SS_RUNNING, SS_ERRORED, SS_PANICKED */
293 	SS_RUNNING  = 0x2, /* -> SS_ERRORED, SS_PANICKED, SS_INACTIVE */
294 	SS_ERRORED  = 0x3, /* -> SS_INACTIVE, SS_PANICKED */
295 	SS_PANICKED = 0x4, /* -> N/A */
296 	_SS_COUNT
297 });
298 
299 static_assert(_SS_COUNT <= 0x5);
300 /* Get the stackshot state ID from a stackshot_state_t. */
301 #define SS_STATE(state) ((state) & 0x7u)
302 /* Get the error code from a stackshot_state_t. */
303 #define SS_ERRCODE(state) ((state) >> 3)
304 /* Make a stackshot error state with a given code. */
305 #define SS_MKERR(code) (((code) << 3) | SS_ERRORED)
306 
307 struct stackshot_context {
308 	/* Constants & Arguments */
309 	struct kdp_snapshot_args      sc_args;
310 	int                           sc_calling_cpuid;
311 	int                           sc_main_cpuid;
312 	bool                          sc_enable_faulting;
313 	uint64_t                      sc_microsecs; /* Timestamp */
314 	bool                          sc_panic_stackshot;
315 	size_t                        sc_min_kcdata_size;
316 	bool                          sc_is_singlethreaded;
317 
318 	/* State & Errors */
319 	stackshot_state_t _Atomic     sc_state; /* Only modified by calling CPU, main CPU, or panicking CPU. See comment above type definition for details. */
320 	kern_return_t                 sc_retval; /* The return value of the main thread */
321 	uint32_t _Atomic              sc_cpus_working;
322 
323 	/* KCData */
324 	linked_kcdata_descriptor_t    sc_pretask_kcdata;
325 	linked_kcdata_descriptor_t    sc_posttask_kcdata;
326 	kcdata_descriptor_t           sc_finalized_kcdata;
327 
328 	/* Buffers & Queues */
329 	struct stackshot_buffer       __counted_by(num_buffers) sc_buffers[STACKSHOT_NUM_BUFFERS];
330 	size_t                        sc_num_buffers;
331 	struct stackshot_workqueue    __counted_by(STACKSHOT_NUM_WORKQUEUES) sc_workqueues[STACKSHOT_NUM_WORKQUEUES];
332 	struct port_label_hash        sc_plh;
333 
334 	/* Statistics */
335 	struct stackshot_duration_v2  sc_duration;
336 	uint32_t                      sc_bytes_traced;
337 	uint32_t                      sc_bytes_uncompressed;
338 #if STACKSHOT_COLLECTS_LATENCY_INFO
339 	struct stackshot_latency_collection_v2 sc_latency;
340 #endif
341 };
342 
343 #define STACKSHOT_DEBUG_TRACEBUF_SIZE 16
344 
345 struct stackshot_trace_entry {
346 	int               sste_line_no;
347 	uint64_t          sste_timestamp;
348 	mach_vm_address_t sste_data;
349 };
350 
351 struct stackshot_trace_buffer {
352 	uint64_t                     sstb_last_trace_timestamp;
353 	size_t                       sstb_tail_idx;
354 	size_t                       sstb_size;
355 	struct stackshot_trace_entry __counted_by(STACKSHOT_DEBUG_TRACEBUF_SIZE) sstb_entries[STACKSHOT_DEBUG_TRACEBUF_SIZE];
356 };
357 
358 #pragma mark ---Stackshot State and Data---
359 
360 /*
361  * Two stackshot states, one for panic and one for normal.
362  * That way, we can take a stackshot during a panic without clobbering state.
363  */
364 #define STACKSHOT_CTX_IDX_NORMAL 0
365 #define STACKSHOT_CTX_IDX_PANIC  1
366 size_t cur_stackshot_ctx_idx   = STACKSHOT_CTX_IDX_NORMAL;
367 struct stackshot_context stackshot_contexts[2] = {{0}, {0}};
368 #define stackshot_ctx (stackshot_contexts[cur_stackshot_ctx_idx])
369 #define stackshot_args (stackshot_ctx.sc_args)
370 #define stackshot_flags (stackshot_args.flags)
371 
372 static struct {
373 	uint64_t last_abs_start;      /* start time of last stackshot */
374 	uint64_t last_abs_end;        /* end time of last stackshot */
375 	uint64_t stackshots_taken;    /* total stackshots taken since boot */
376 	uint64_t stackshots_duration; /* total abs time spent in stackshot_trap() since boot */
377 } stackshot_stats = { 0 };
378 
379 #if STACKSHOT_COLLECTS_LATENCY_INFO
380 static struct stackshot_latency_cpu PERCPU_DATA(stackshot_cpu_latency_percpu);
381 #define stackshot_cpu_latency (*PERCPU_GET(stackshot_cpu_latency_percpu))
382 #endif
383 
384 static struct stackshot_cpu_context PERCPU_DATA(stackshot_cpu_ctx_percpu);
385 #define stackshot_cpu_ctx (*PERCPU_GET(stackshot_cpu_ctx_percpu))
386 
387 static struct kcdata_descriptor PERCPU_DATA(stackshot_kcdata_percpu);
388 #define stackshot_kcdata_p (PERCPU_GET(stackshot_kcdata_percpu))
389 
390 #if STACKSHOT_COLLECTS_LATENCY_INFO
391 static bool collect_latency_info = true;
392 #endif
393 
394 static uint64_t stackshot_max_fault_time;
395 
396 #if STACKSHOT_COLLECTS_DIAGNOSTICS
397 static struct stackshot_trace_buffer PERCPU_DATA(stackshot_trace_buffer);
398 #endif
399 
400 #pragma mark ---Stackshot Global State---
401 
402 uint32_t stackshot_estimate_adj = 25; /* experiment factor: 0-100, adjust our estimate up by this amount */
403 
404 static uint32_t stackshot_initial_estimate;
405 static uint32_t stackshot_initial_estimate_adj;
406 static uint64_t stackshot_duration_prior_abs;   /* prior attempts, abs */
407 static unaligned_u64 * stackshot_duration_outer;
408 static uint64_t stackshot_tries;
409 
410 void * kernel_stackshot_buf   = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
411 int kernel_stackshot_buf_size = 0;
412 
413 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
414 
415 #if CONFIG_EXCLAVES
416 static ctid_t *stackshot_exclave_inspect_ctids = NULL;
417 static size_t stackshot_exclave_inspect_ctid_count = 0;
418 static size_t stackshot_exclave_inspect_ctid_capacity = 0;
419 
420 static kern_return_t stackshot_exclave_kr = KERN_SUCCESS;
421 #endif /* CONFIG_EXCLAVES */
422 
423 #if DEBUG || DEVELOPMENT
424 TUNABLE(bool, disable_exclave_stackshot, "-disable_exclave_stackshot", false);
425 #else
426 const bool disable_exclave_stackshot = false;
427 #endif
428 
429 #pragma mark ---Stackshot Static Function Declarations---
430 
431 __private_extern__ void stackshot_init( void );
432 static boolean_t        memory_iszero(void *addr, size_t size);
433 static void             stackshot_cpu_do_work(void);
434 static kern_return_t    stackshot_finalize_kcdata(void);
435 static kern_return_t    stackshot_finalize_singlethreaded_kcdata(void);
436 static kern_return_t    stackshot_collect_kcdata(void);
437 static int              kdp_stackshot_kcdata_format();
438 static void             kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
439 static vm_offset_t      stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags);
440 static boolean_t        stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
441 static int              stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
442 static boolean_t        stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
443 static uint64_t         proc_was_throttled_from_task(task_t task);
444 static void             stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t * waitinfo);
445 static int              stackshot_thread_has_valid_waitinfo(thread_t thread);
446 static void             stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo);
447 static int              stackshot_thread_has_valid_turnstileinfo(thread_t thread);
448 static uint32_t         get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj, uint64_t trace_flags, pid_t target_pid);
449 static kern_return_t    kdp_snapshot_preflight_internal(struct kdp_snapshot_args args);
450 
451 #if CONFIG_COALITIONS
452 static void             stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
453 static void             stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
454 #endif /* CONFIG_COALITIONS */
455 
456 #if CONFIG_THREAD_GROUPS
457 static void             stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
458 static void             stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
459 #endif /* CONFIG_THREAD_GROUPS */
460 
461 extern uint64_t         workqueue_get_task_ss_flags_from_pwq_state_kdp(void *proc);
462 
463 static kcdata_descriptor_t linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size);
464 
465 #pragma mark ---Stackshot Externs---
466 
467 struct proc;
468 extern int              proc_pid(struct proc *p);
469 extern uint64_t         proc_uniqueid(void *p);
470 extern uint64_t         proc_was_throttled(void *p);
471 extern uint64_t         proc_did_throttle(void *p);
472 extern int              proc_exiting(void *p);
473 extern int              proc_in_teardown(void *p);
474 static uint64_t         proc_did_throttle_from_task(task_t task);
475 extern void             proc_name_kdp(struct proc *p, char * buf, int size);
476 extern int              proc_threadname_kdp(void * uth, char * buf, size_t size);
477 extern void             proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
478 extern void             proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
479 extern uint64_t         proc_getcsflags_kdp(void * p);
480 extern boolean_t        proc_binary_uuid_kdp(task_t task, uuid_t uuid);
481 extern int              memorystatus_get_pressure_status_kdp(void);
482 extern void             memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
483 extern void             panic_stackshot_release_lock(void);
484 
485 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
486 
487 #if CONFIG_TELEMETRY
488 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
489 #endif /* CONFIG_TELEMETRY */
490 
491 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
492 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
493 
494 static size_t stackshot_plh_est_size(void);
495 
496 #if CONFIG_EXCLAVES
497 static kern_return_t collect_exclave_threads(uint64_t);
498 static kern_return_t stackshot_setup_exclave_waitlist(void);
499 #endif
500 
501 /*
502  * Validates that the given address for a word is both a valid page and has
503  * default caching attributes for the current map.
504  */
505 bool machine_trace_thread_validate_kva(vm_offset_t);
506 /*
507  * Validates a region that stackshot will potentially inspect.
508  */
509 static bool _stackshot_validate_kva(vm_offset_t, size_t);
510 /*
511  * Must be called whenever stackshot is re-driven.
512  */
513 static void _stackshot_validation_reset(void);
514 /*
515  * A kdp-safe strlen() call.  Returns:
516  *      -1 if we reach maxlen or a bad address before the end of the string, or
517  *      strlen(s)
518  */
519 static long _stackshot_strlen(const char *s, size_t maxlen);
520 
521 #define MAX_FRAMES 1000
522 #define STACKSHOT_PAGETABLE_BUFSZ 4000
523 #define MAX_LOADINFOS 500
524 #define MAX_DYLD_COMPACTINFO (20 * 1024)  // max bytes of compactinfo to include per proc/shared region
525 #define TASK_IMP_WALK_LIMIT 20
526 
527 typedef struct thread_snapshot *thread_snapshot_t;
528 typedef struct task_snapshot *task_snapshot_t;
529 
530 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
531 extern kdp_send_t    kdp_en_send_pkt;
532 #endif
533 
534 /*
535  * Stackshot locking and other defines.
536  */
537 LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
538 LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
539 
540 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
541 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
542 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
543 #define STACKSHOT_SUBSYS_ASSERT_LOCKED() lck_mtx_assert(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
544 
545 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
546 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
547 
548 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
549 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
550 
551 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
552 
553 /*
554  * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
555  * for non-panic stackshots where faulting is requested.
556  */
557 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
558 
559 
560 #ifndef ROUNDUP
561 #define ROUNDUP(x, y)            ((((x)+(y)-1)/(y))*(y))
562 #endif
563 
564 #define STACKSHOT_QUEUE_LABEL_MAXSIZE  64
565 
566 #pragma mark ---Stackshot Useful Macros---
567 
568 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
569 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
570 /*
571  * Use of the kcd_exit_on_error(action) macro requires a local
572  * 'kern_return_t error' variable and 'error_exit' label.
573  */
574 #define kcd_exit_on_error(action)                      \
575 	do {                                               \
576 	    if (KERN_SUCCESS != (error = (action))) {      \
577 	        STACKSHOT_TRACE(error);                    \
578 	        if (error == KERN_RESOURCE_SHORTAGE) {     \
579 	            error = KERN_INSUFFICIENT_BUFFER_SIZE; \
580 	        }                                          \
581 	        goto error_exit;                           \
582 	    }                                              \
583 	} while (0); /* end kcd_exit_on_error */
584 
585 #if defined(__arm64__)
586 #define loop_wait_noguard() __builtin_arm_wfe()
587 #elif defined(__x86_64__)
588 #define loop_wait_noguard() __builtin_ia32_pause()
589 #else
590 #define loop_wait_noguard()
591 #endif /* __x86_64__ */
592 
593 #define loop_wait() { loop_wait_noguard(); stackshot_panic_guard(); }
594 
595 static inline void stackshot_panic_guard(void);
596 
597 static __attribute__((noreturn, noinline)) void
stackshot_panic_spin(void)598 stackshot_panic_spin(void)
599 {
600 	if (stackshot_cpu_ctx.scc_can_work) {
601 		stackshot_cpu_ctx.scc_can_work = false;
602 		os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
603 	}
604 	if (stackshot_ctx.sc_calling_cpuid == cpu_number()) {
605 		while (os_atomic_load(&stackshot_ctx.sc_cpus_working, acquire) != 0) {
606 			loop_wait_noguard();
607 		}
608 		panic_stackshot_release_lock();
609 	}
610 	while (1) {
611 		loop_wait_noguard();
612 	}
613 }
614 
615 /**
616  * Immediately aborts if another CPU panicked during the stackshot.
617  */
618 static inline void
stackshot_panic_guard(void)619 stackshot_panic_guard(void)
620 {
621 	if (__improbable(os_atomic_load(&stackshot_ctx.sc_state, relaxed) == SS_PANICKED)) {
622 		stackshot_panic_spin();
623 	}
624 }
625 
626 /*
627  * Signal that we panicked during a stackshot by setting an atomic flag and
628  * waiting for others to coalesce before continuing the panic. Other CPUs will
629  * spin on this as soon as they see it set in order to prevent multiple
630  * concurrent panics. The calling CPU (i.e. the one holding the debugger lock)
631  * will release it for us in `stackshot_panic_spin` so we can continue
632  * panicking.
633  *
634  * This is called from panic_trap_to_debugger.
635  */
636 void
stackshot_cpu_signal_panic(void)637 stackshot_cpu_signal_panic(void)
638 {
639 	stackshot_state_t o_state;
640 	if (stackshot_active()) {
641 		/* Check if someone else panicked before we did. */
642 		o_state = os_atomic_xchg(&stackshot_ctx.sc_state, SS_PANICKED, seq_cst);
643 		if (o_state == SS_PANICKED) {
644 			stackshot_panic_spin();
645 		}
646 
647 		/* We're the first CPU to panic - wait for everyone to coalesce. */
648 		if (stackshot_cpu_ctx.scc_can_work) {
649 			stackshot_cpu_ctx.scc_can_work = false;
650 			os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
651 		}
652 		while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
653 			loop_wait_noguard();
654 		}
655 	}
656 }
657 
658 /*
659  * Sets the stackshot state to SS_ERRORED along with the error code.
660  * Only works if the current state is SS_RUNNING or SS_SETUP.
661  */
662 static inline void
stackshot_set_error(kern_return_t error)663 stackshot_set_error(kern_return_t error)
664 {
665 	stackshot_state_t cur_state;
666 	stackshot_state_t err_state = SS_MKERR(error);
667 	if (__improbable(!os_atomic_cmpxchgv(&stackshot_ctx.sc_state, SS_RUNNING, err_state, &cur_state, seq_cst))) {
668 		if (cur_state == SS_SETUP) {
669 			os_atomic_cmpxchg(&stackshot_ctx.sc_state, SS_SETUP, err_state, seq_cst);
670 		} else {
671 			/* Our state is something other than SS_RUNNING or SS_SETUP... Check for panic. */
672 			stackshot_panic_guard();
673 		}
674 	}
675 }
676 
677 /* Returns an error code if the current stackshot context has errored out.
678  * Also functions as a panic guard.
679  */
680 __result_use_check
681 static inline kern_return_t
stackshot_status_check(void)682 stackshot_status_check(void)
683 {
684 	stackshot_state_t state = os_atomic_load(&stackshot_ctx.sc_state, relaxed);
685 
686 	/* Check for panic */
687 	if (__improbable(SS_STATE(state) == SS_PANICKED)) {
688 		stackshot_panic_spin();
689 	}
690 
691 	/* Check for error */
692 	if (__improbable(SS_STATE(state) == SS_ERRORED)) {
693 		kern_return_t err = SS_ERRCODE(state);
694 		assert(err != KERN_SUCCESS); /* SS_ERRORED should always store an associated error code. */
695 		return err;
696 	}
697 
698 	return KERN_SUCCESS;
699 }
700 
701 #pragma mark ---Stackshot Tracing---
702 
703 #if STACKSHOT_COLLECTS_DIAGNOSTICS
704 static void
stackshot_trace(int line_no,mach_vm_address_t data)705 stackshot_trace(int line_no, mach_vm_address_t data)
706 {
707 	struct stackshot_trace_buffer *buffer = PERCPU_GET(stackshot_trace_buffer);
708 	buffer->sstb_entries[buffer->sstb_tail_idx] = (struct stackshot_trace_entry) {
709 		.sste_line_no = line_no,
710 		.sste_timestamp = mach_continuous_time(),
711 		.sste_data = data
712 	};
713 	buffer->sstb_tail_idx = (buffer->sstb_tail_idx + 1) % STACKSHOT_DEBUG_TRACEBUF_SIZE;
714 	buffer->sstb_size = MIN(buffer->sstb_size + 1, STACKSHOT_DEBUG_TRACEBUF_SIZE);
715 }
716 #define STACKSHOT_TRACE(data) stackshot_trace(__LINE__, (mach_vm_address_t) (data))
717 
718 #else /* STACKSHOT_COLLECTS_DIAGNOSTICS */
719 #define STACKSHOT_TRACE(data) ((void) data)
720 #endif /* !STACKSHOT_COLLECTS_DIAGNOSTICS */
721 
722 #pragma mark ---Stackshot Buffer Management---
723 
724 #define freelist_lock(buffer) while(!os_atomic_cmpxchg(&buffer->ssb_freelist_lock, 0, 1, acquire)) { loop_wait(); }
725 #define freelist_unlock(buffer) os_atomic_store(&buffer->ssb_freelist_lock, 0, release);
726 
727 /**
728  * Allocates some data from the shared stackshot buffer freelist.
729  * This should not be used directly, it is a last resort if we run out of space.
730  */
731 static void *
stackshot_freelist_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)732 stackshot_freelist_alloc(
733 	size_t size,
734 	struct stackshot_buffer *buffer,
735 	kern_return_t *error)
736 {
737 	struct freelist_entry **cur_freelist, **best_freelist = NULL, *ret = NULL;
738 
739 	freelist_lock(buffer);
740 
741 	cur_freelist = &buffer->ssb_freelist;
742 
743 	while (*cur_freelist != NULL) {
744 		if (((*cur_freelist)->fl_size >= size) && ((best_freelist == NULL) || ((*best_freelist)->fl_size > (*cur_freelist)->fl_size))) {
745 			best_freelist = cur_freelist;
746 			if ((*best_freelist)->fl_size == size) {
747 				break;
748 			}
749 		}
750 		cur_freelist = &((*cur_freelist)->fl_next);
751 	}
752 
753 	/* If we found a freelist entry, update the freelist */
754 	if (best_freelist != NULL) {
755 		os_atomic_sub(&buffer->ssb_overhead, size, relaxed);
756 		ret = *best_freelist;
757 
758 		/* If there's enough unused space at the end of this entry, we should make a new one */
759 		if (((*best_freelist)->fl_size - size) > sizeof(struct freelist_entry)) {
760 			struct freelist_entry *new_freelist = (struct freelist_entry*) ((mach_vm_address_t) *best_freelist + size);
761 			*new_freelist = (struct freelist_entry) {
762 				.fl_next = (*best_freelist)->fl_next,
763 				.fl_size = (*best_freelist)->fl_size - size
764 			};
765 			(*best_freelist)->fl_next = new_freelist;
766 		}
767 
768 		/* Update previous entry with next or new entry */
769 		*best_freelist = (*best_freelist)->fl_next;
770 	}
771 
772 	freelist_unlock(buffer);
773 
774 	if (error != NULL) {
775 		if (ret == NULL) {
776 			*error = KERN_INSUFFICIENT_BUFFER_SIZE;
777 		} else {
778 			*error = KERN_SUCCESS;
779 		}
780 	}
781 
782 	return ret;
783 }
784 
785 /**
786  * Allocates some data from the shared stackshot buffer.
787  * Should not be used directly - see the `stackshot_alloc` and
788  * `stackshot_alloc_arr` macros.
789  */
790 static void *
stackshot_buffer_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)791 stackshot_buffer_alloc(
792 	size_t size,
793 	struct stackshot_buffer *buffer,
794 	kern_return_t *error)
795 {
796 	size_t o_used, new_used;
797 
798 	stackshot_panic_guard();
799 	assert(!stackshot_ctx.sc_is_singlethreaded);
800 
801 	os_atomic_rmw_loop(&buffer->ssb_used, o_used, new_used, relaxed, {
802 		new_used = o_used + size;
803 		if (new_used > buffer->ssb_size) {
804 		        os_atomic_rmw_loop_give_up(return stackshot_freelist_alloc(size, buffer, error));
805 		}
806 	});
807 
808 	if (error != NULL) {
809 		*error = KERN_SUCCESS;
810 	}
811 
812 	return (void*) ((mach_vm_address_t) buffer->ssb_ptr + o_used);
813 }
814 
815 /**
816  * Finds the best stackshot buffer to use (prefer our cluster's buffer)
817  * and allocates from it.
818  * Should not be used directly - see the `stackshot_alloc` and
819  * `stackshot_alloc_arr` macros.
820  */
821 __result_use_check
822 static void *
stackshot_best_buffer_alloc(size_t size,kern_return_t * error)823 stackshot_best_buffer_alloc(size_t size, kern_return_t *error)
824 {
825 #if defined(__AMP__)
826 	kern_return_t err;
827 	int           my_cluster;
828 	void         *ret = NULL;
829 #endif /* __AMP__ */
830 
831 #if STACKSHOT_COLLECTS_LATENCY_INFO
832 	stackshot_cpu_latency.total_buf += size;
833 #endif
834 
835 #if defined(__AMP__)
836 	/* First, try our cluster's buffer */
837 	my_cluster = cpu_cluster_id();
838 	ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[my_cluster], &err);
839 
840 	/* Try other buffers now. */
841 	if (err != KERN_SUCCESS) {
842 		for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
843 			if (buf_idx == my_cluster) {
844 				continue;
845 			}
846 
847 			ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[buf_idx], &err);
848 			if (err == KERN_SUCCESS) {
849 #if STACKSHOT_COLLECTS_LATENCY_INFO
850 				stackshot_cpu_latency.intercluster_buf_used += size;
851 #endif
852 				break;
853 			}
854 		}
855 	}
856 
857 	if (error != NULL) {
858 		*error = err;
859 	}
860 
861 	return ret;
862 #else /* __AMP__ */
863 	return stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[0], error);
864 #endif /* !__AMP__ */
865 }
866 
867 /**
868  * Frees some data from the shared stackshot buffer and adds it to the freelist.
869  */
870 static void
stackshot_buffer_free(void * ptr,struct stackshot_buffer * buffer,size_t size)871 stackshot_buffer_free(
872 	void *ptr,
873 	struct stackshot_buffer *buffer,
874 	size_t size)
875 {
876 	stackshot_panic_guard();
877 
878 	/* This should never be called during a singlethreaded stackshot. */
879 	assert(!stackshot_ctx.sc_is_singlethreaded);
880 
881 	os_atomic_add(&buffer->ssb_overhead, size, relaxed);
882 
883 	/* Make sure we have enough space for the freelist entry */
884 	if (size < sizeof(struct freelist_entry)) {
885 		return;
886 	}
887 
888 	freelist_lock(buffer);
889 
890 	/* Create new freelist entry and push it to the front of the list */
891 	*((struct freelist_entry*) ptr) = (struct freelist_entry) {
892 		.fl_size = size,
893 		.fl_next = buffer->ssb_freelist
894 	};
895 	buffer->ssb_freelist = ptr;
896 
897 	freelist_unlock(buffer);
898 }
899 
900 /**
901  * Allocates some data from the stackshot buffer. Uses the bump allocator in
902  * multithreaded mode and endalloc in singlethreaded.
903  * err must ALWAYS be nonnull.
904  * Should not be used directly - see the macros in kern_stackshot.h.
905  */
906 void *
stackshot_alloc_with_size(size_t size,kern_return_t * err)907 stackshot_alloc_with_size(size_t size, kern_return_t *err)
908 {
909 	void *ptr;
910 	assert(err != NULL);
911 	assert(stackshot_active());
912 
913 	stackshot_panic_guard();
914 
915 	if (stackshot_ctx.sc_is_singlethreaded) {
916 		ptr = kcdata_endalloc(stackshot_kcdata_p, size);
917 		if (ptr == NULL) {
918 			*err = KERN_INSUFFICIENT_BUFFER_SIZE;
919 		}
920 	} else {
921 		ptr = stackshot_best_buffer_alloc(size, err);
922 		if (ptr == NULL) {
923 			/* We should always return an error if we return a null ptr */
924 			assert3u(*err, !=, KERN_SUCCESS);
925 		}
926 	}
927 
928 	return ptr;
929 }
930 
931 /**
932  * Initializes a new kcdata buffer somewhere in a linked kcdata list.
933  * Allocates a buffer for the kcdata from the shared stackshot buffer.
934  *
935  * See `linked_kcdata_alloc_callback` for the implementation details of
936  * linked kcdata for stackshot.
937  */
938 __result_use_check
939 static kern_return_t
linked_kcdata_init(linked_kcdata_descriptor_t descriptor,size_t min_size,unsigned int data_type,unsigned int flags)940 linked_kcdata_init(
941 	linked_kcdata_descriptor_t descriptor,
942 	size_t min_size,
943 	unsigned int data_type,
944 	unsigned int flags)
945 {
946 	void              *buf_ptr;
947 	kern_return_t      error;
948 	size_t             buf_size = MAX(min_size, stackshot_ctx.sc_min_kcdata_size);
949 
950 	buf_ptr = stackshot_alloc_arr(uint8_t, buf_size, &error);
951 	if (error != KERN_SUCCESS) {
952 		return error;
953 	}
954 
955 	error = kcdata_memory_static_init(&descriptor->kcdata, (mach_vm_address_t) buf_ptr, data_type, buf_size, flags);
956 	if (error != KERN_SUCCESS) {
957 		return error;
958 	}
959 
960 	descriptor->kcdata.kcd_alloc_callback = linked_kcdata_alloc_callback;
961 
962 	return KERN_SUCCESS;
963 }
964 
965 static void
stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)966 stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)
967 {
968 	/*
969 	 * If we have free space at the end of the kcdata, we can add it to the
970 	 * freelist. We always add to *our* cluster's freelist, no matter where
971 	 * the data was originally allocated.
972 	 *
973 	 * Important Note: We do not use kcdata_memory_get_used_bytes here because
974 	 * that includes extra space for the end tag (which we do not care about).
975 	 */
976 	int    buffer;
977 	size_t used_size = descriptor->kcd_addr_end - descriptor->kcd_addr_begin;
978 	size_t free_size = (descriptor->kcd_length - used_size);
979 	if (free_size > 0) {
980 #if defined(__arm64__)
981 		buffer = cpu_cluster_id();
982 #else /* __arm64__ */
983 		buffer = 0;
984 #endif /* !__arm64__ */
985 		stackshot_buffer_free((void*) descriptor->kcd_addr_end, &stackshot_ctx.sc_buffers[buffer], free_size);
986 		descriptor->kcd_length = used_size;
987 	}
988 }
989 
990 /**
991  * The callback for linked kcdata, which is called when one of the kcdata
992  * buffers runs out of space. This allocates a new kcdata descriptor &
993  * buffer in the linked list and sets it up.
994  *
995  * When kcdata calls this callback, it takes the returned descriptor
996  * and copies it to its own descriptor (which will be the per-cpu kcdata
997  * descriptor, in the case of stackshot).
998  *
999  * --- Stackshot linked kcdata details ---
1000  * The way stackshot allocates kcdata buffers (in a non-panic context) is via
1001  * a basic bump allocator (see `stackshot_buffer_alloc`) and a linked list of
1002  * kcdata structures. The kcdata are allocated with a reasonable size based on
1003  * some system heuristics (or more if whatever is being pushed into the buffer
1004  * is larger). When the current kcdata buffer runs out of space, it calls this
1005  * callback, which allocates a new linked kcdata object at the tail of the
1006  * current list.
1007  *
1008  * The per-cpu `stackshot_kcdata_p` descriptor is the "tail" of the list, but
1009  * is not actually part of the linked list (this simplified implementation,
1010  * since it didn't require changing every kcdata call & a bunch of
1011  * kcdata code, since the current in-use descriptor is always in the same place
1012  * this way). When it is filled up and this callback is called, the
1013  * `stackshot_kcdata_p` descriptor is copied to the *actual* tail of the list
1014  * (in stackshot_cpu_ctx.scc_kcdata_tail), and a new linked kcdata struct is
1015  * allocated at the tail.
1016  */
1017 static kcdata_descriptor_t
linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor,size_t min_size)1018 linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size)
1019 {
1020 	kern_return_t error;
1021 	linked_kcdata_descriptor_t new_kcdata = NULL;
1022 
1023 	/* This callback should ALWAYS be coming from our per-cpu kcdata. If not, something has gone horribly wrong.*/
1024 	stackshot_panic_guard();
1025 	assert(descriptor == stackshot_kcdata_p);
1026 
1027 	/* Free the unused space in the buffer and copy it to the tail of the linked kcdata list. */
1028 	stackshot_kcdata_free_unused(descriptor);
1029 	stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *descriptor;
1030 
1031 	/* Allocate another linked_kcdata and initialize it. */
1032 	new_kcdata = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1033 	if (error != KERN_SUCCESS) {
1034 		return NULL;
1035 	}
1036 
1037 	/* It doesn't matter what we mark the data type as - we're throwing it away when weave the data together anyway. */
1038 	error = linked_kcdata_init(new_kcdata, min_size, KCDATA_BUFFER_BEGIN_STACKSHOT, descriptor->kcd_flags);
1039 	if (error != KERN_SUCCESS) {
1040 		return NULL;
1041 	}
1042 
1043 	bzero(descriptor, sizeof(struct kcdata_descriptor));
1044 	stackshot_cpu_ctx.scc_kcdata_tail->next = new_kcdata;
1045 	stackshot_cpu_ctx.scc_kcdata_tail = new_kcdata;
1046 
1047 	return &new_kcdata->kcdata;
1048 }
1049 
1050 /**
1051  * Allocates a new linked kcdata list for the current CPU and sets it up.
1052  * If there was a previous linked kcdata descriptor, you should call
1053  * `stackshot_finalize_linked_kcdata` first, or otherwise save it somewhere.
1054  */
1055 __result_use_check
1056 static kern_return_t
stackshot_new_linked_kcdata(void)1057 stackshot_new_linked_kcdata(void)
1058 {
1059 	kern_return_t error;
1060 
1061 	stackshot_panic_guard();
1062 	assert(!stackshot_ctx.sc_panic_stackshot);
1063 
1064 	stackshot_cpu_ctx.scc_kcdata_head = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1065 	if (error != KERN_SUCCESS) {
1066 		return error;
1067 	}
1068 
1069 	kcd_exit_on_error(linked_kcdata_init(stackshot_cpu_ctx.scc_kcdata_head, 0,
1070 	    KCDATA_BUFFER_BEGIN_STACKSHOT,
1071 	    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER | KCFLAG_ALLOC_CALLBACK));
1072 
1073 	stackshot_cpu_ctx.scc_kcdata_tail = stackshot_cpu_ctx.scc_kcdata_head;
1074 	*stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_head->kcdata;
1075 
1076 error_exit:
1077 	return error;
1078 }
1079 
1080 /**
1081  * Finalizes the current linked kcdata structure for the CPU by updating the
1082  * tail of the list with the per-cpu kcdata descriptor.
1083  */
1084 static void
stackshot_finalize_linked_kcdata(void)1085 stackshot_finalize_linked_kcdata(void)
1086 {
1087 	stackshot_panic_guard();
1088 	assert(!stackshot_ctx.sc_panic_stackshot);
1089 	stackshot_kcdata_free_unused(stackshot_kcdata_p);
1090 	if (stackshot_cpu_ctx.scc_kcdata_tail != NULL) {
1091 		stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *stackshot_kcdata_p;
1092 	}
1093 	*stackshot_kcdata_p = (struct kcdata_descriptor){};
1094 }
1095 
1096 /*
1097  * Initialize the mutex governing access to the stack snapshot subsystem
1098  * and other stackshot related bits.
1099  */
1100 __private_extern__ void
stackshot_init(void)1101 stackshot_init(void)
1102 {
1103 	mach_timebase_info_data_t timebase;
1104 
1105 	clock_timebase_info(&timebase);
1106 	stackshot_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
1107 
1108 	max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
1109 
1110 	PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
1111 }
1112 
1113 /*
1114  * Called with interrupts disabled after stackshot context has been
1115  * initialized.
1116  */
1117 static kern_return_t
stackshot_trap(void)1118 stackshot_trap(void)
1119 {
1120 	kern_return_t   rv;
1121 
1122 #if defined(__x86_64__)
1123 	/*
1124 	 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
1125 	 * operation, it's essential to apply mutual exclusion to the other when one
1126 	 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
1127 	 * capture CPUs.
1128 	 *
1129 	 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
1130 	 * allowed, so we check to ensure there there is no rendezvous in progress before
1131 	 * trying to grab the lock (if there is, a deadlock will occur when we try to
1132 	 * grab the lock).  This is accomplished by setting cpu_rendezvous_in_progress to
1133 	 * TRUE in the mp rendezvous action function.  If stackshot_trap() is called by
1134 	 * a subordinate of the call chain within the mp rendezvous action, this flag will
1135 	 * be set and can be used to detect the inevitable deadlock that would occur
1136 	 * if this thread tried to grab the rendezvous lock.
1137 	 */
1138 
1139 	if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
1140 		panic("Calling stackshot from a rendezvous is not allowed!");
1141 	}
1142 
1143 	mp_rendezvous_lock();
1144 #endif
1145 
1146 	stackshot_stats.last_abs_start = mach_absolute_time();
1147 	stackshot_stats.last_abs_end = 0;
1148 
1149 	rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0, NULL);
1150 
1151 	stackshot_stats.last_abs_end = mach_absolute_time();
1152 	stackshot_stats.stackshots_taken++;
1153 	stackshot_stats.stackshots_duration += (stackshot_stats.last_abs_end - stackshot_stats.last_abs_start);
1154 
1155 #if defined(__x86_64__)
1156 	mp_rendezvous_unlock();
1157 #endif
1158 	return rv;
1159 }
1160 
1161 extern void stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration);
1162 void
stackshot_get_timing(uint64_t * last_abs_start,uint64_t * last_abs_end,uint64_t * count,uint64_t * total_duration)1163 stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration)
1164 {
1165 	STACKSHOT_SUBSYS_LOCK();
1166 	*last_abs_start = stackshot_stats.last_abs_start;
1167 	*last_abs_end = stackshot_stats.last_abs_end;
1168 	*count = stackshot_stats.stackshots_taken;
1169 	*total_duration = stackshot_stats.stackshots_duration;
1170 	STACKSHOT_SUBSYS_UNLOCK();
1171 }
1172 
1173 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)1174 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
1175 {
1176 	kern_return_t error = KERN_SUCCESS;
1177 	boolean_t istate;
1178 	struct kdp_snapshot_args args;
1179 
1180 	args = (struct kdp_snapshot_args) {
1181 		.pid =               pid,
1182 		.buffer =            buf,
1183 		.buffer_size =       size,
1184 		.flags =             flags,
1185 		.since_timestamp =   delta_since_timestamp,
1186 		.pagetable_mask =    pagetable_mask
1187 	};
1188 
1189 #if DEVELOPMENT || DEBUG
1190 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
1191 		return KERN_NOT_SUPPORTED;
1192 	}
1193 #endif
1194 	if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
1195 		return KERN_INVALID_ARGUMENT;
1196 	}
1197 
1198 	/* zero caller's buffer to match KMA_ZERO in other path */
1199 	bzero(buf, size);
1200 
1201 	/* cap in individual stackshot to max_tracebuf_size */
1202 	if (size > max_tracebuf_size) {
1203 		size = max_tracebuf_size;
1204 	}
1205 
1206 	/* Serialize tracing */
1207 	if (flags & STACKSHOT_TRYLOCK) {
1208 		if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
1209 			return KERN_LOCK_OWNED;
1210 		}
1211 	} else {
1212 		STACKSHOT_SUBSYS_LOCK();
1213 	}
1214 
1215 #if CONFIG_EXCLAVES
1216 	assert(!stackshot_exclave_inspect_ctids);
1217 #endif
1218 
1219 	stackshot_initial_estimate = 0;
1220 	stackshot_duration_prior_abs = 0;
1221 	stackshot_duration_outer = NULL;
1222 
1223 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
1224 	    flags, size, pid, delta_since_timestamp);
1225 
1226 	/* Prepare the compressor for a stackshot */
1227 	error = vm_compressor_kdp_init();
1228 	if (error != KERN_SUCCESS) {
1229 		return error;
1230 	}
1231 
1232 #if STACKSHOT_COLLECTS_RDAR_126582377_DATA
1233 	// Opportunistically collect reports of the rdar://126582377 failure.
1234 	// If the allocation doesn't succeed, or if another CPU "steals" the
1235 	// allocated event first, that is acceptable.
1236 	ca_event_t new_event = CA_EVENT_ALLOCATE_FLAGS(bad_stackshot_upper16, Z_NOWAIT);
1237 	if (new_event) {
1238 		if (os_atomic_cmpxchg(&rdar_126582377_event, NULL, new_event, relaxed) == 0) {
1239 			// Already set up, so free it
1240 			CA_EVENT_DEALLOCATE(new_event);
1241 		}
1242 	}
1243 #endif
1244 
1245 	istate = ml_set_interrupts_enabled(FALSE);
1246 	uint64_t time_start      = mach_absolute_time();
1247 
1248 	/* Emit a SOCD tracepoint that we are initiating a stackshot */
1249 	SOCD_TRACE_XNU_START(STACKSHOT);
1250 
1251 	/* Preload trace parameters*/
1252 	error = kdp_snapshot_preflight_internal(args);
1253 
1254 	/*
1255 	 * Trap to the debugger to obtain a coherent stack snapshot; this populates
1256 	 * the trace buffer
1257 	 */
1258 	if (error == KERN_SUCCESS) {
1259 		error = stackshot_trap();
1260 	}
1261 
1262 	uint64_t time_end = mach_absolute_time();
1263 
1264 	/* Emit a SOCD tracepoint that we have completed the stackshot */
1265 	SOCD_TRACE_XNU_END(STACKSHOT);
1266 
1267 	ml_set_interrupts_enabled(istate);
1268 
1269 #if CONFIG_EXCLAVES
1270 	/* stackshot trap should only finish successfully or with no pending Exclave threads */
1271 	assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
1272 #endif
1273 
1274 	/*
1275 	 * Stackshot is no longer active.
1276 	 * (We have to do this here for the special interrupt disable timeout case to work)
1277 	 */
1278 	os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
1279 
1280 	/* Release kdp compressor buffers */
1281 	vm_compressor_kdp_teardown();
1282 
1283 	/* Collect multithreaded kcdata into one finalized buffer */
1284 	if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
1285 		error = stackshot_collect_kcdata();
1286 	}
1287 
1288 #if CONFIG_EXCLAVES
1289 	if (stackshot_exclave_inspect_ctids) {
1290 		error = collect_exclave_threads(flags);
1291 	}
1292 #endif /* CONFIG_EXCLAVES */
1293 
1294 	if (error == KERN_SUCCESS) {
1295 		if (!stackshot_ctx.sc_is_singlethreaded) {
1296 			error = stackshot_finalize_kcdata();
1297 		} else {
1298 			error = stackshot_finalize_singlethreaded_kcdata();
1299 		}
1300 	}
1301 
1302 	if (stackshot_duration_outer) {
1303 		*stackshot_duration_outer = time_end - time_start;
1304 	}
1305 	*bytes_traced = kdp_stack_snapshot_bytes_traced();
1306 
1307 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
1308 	    error, (time_end - time_start), size, *bytes_traced);
1309 
1310 	STACKSHOT_SUBSYS_UNLOCK();
1311 	return error;
1312 }
1313 
1314 #if CONFIG_TELEMETRY
1315 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)1316 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
1317 {
1318 	int error = KERN_SUCCESS;
1319 	uint32_t bytes_traced = 0;
1320 
1321 	*retval = -1;
1322 
1323 	/*
1324 	 * Control related operations
1325 	 */
1326 	if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
1327 		*retval = ENOTSUP;
1328 		goto exit;
1329 	} else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
1330 		*retval = ENOTSUP;
1331 		goto exit;
1332 	}
1333 
1334 	/*
1335 	 * Data related operations
1336 	 */
1337 	*retval = -1;
1338 
1339 	if ((((void*)tracebuf) == NULL) || (tracebuf_size == 0)) {
1340 		error = KERN_INVALID_ARGUMENT;
1341 		goto exit;
1342 	}
1343 
1344 	STACKSHOT_SUBSYS_LOCK();
1345 
1346 	if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
1347 		if (tracebuf_size > max_tracebuf_size) {
1348 			error = KERN_INVALID_ARGUMENT;
1349 			goto unlock_exit;
1350 		}
1351 
1352 		bytes_traced = tracebuf_size;
1353 		error = telemetry_gather(tracebuf, &bytes_traced,
1354 		    (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? true : false);
1355 		*retval = (int)bytes_traced;
1356 		goto unlock_exit;
1357 	}
1358 
1359 unlock_exit:
1360 	STACKSHOT_SUBSYS_UNLOCK();
1361 exit:
1362 	return error;
1363 }
1364 #endif /* CONFIG_TELEMETRY */
1365 
1366 /**
1367  * Grabs the next work item from the stackshot work queue.
1368  */
1369 static struct stackshot_workitem *
stackshot_get_workitem(struct stackshot_workqueue * queue)1370 stackshot_get_workitem(struct stackshot_workqueue *queue)
1371 {
1372 	uint32_t old_count, new_count;
1373 
1374 	/* note: this relies on give_up not performing the write, just bailing out immediately */
1375 	os_atomic_rmw_loop(&queue->sswq_cur_item, old_count, new_count, acq_rel, {
1376 		if (old_count >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1377 		        os_atomic_rmw_loop_give_up(return NULL);
1378 		}
1379 		new_count = old_count + 1;
1380 	});
1381 
1382 	return &queue->sswq_items[old_count];
1383 };
1384 
1385 /**
1386  * Puts an item on the appropriate stackshot work queue.
1387  * We don't need the lock for this, but only because it's
1388  * only called by one writer..
1389  *
1390  * @returns
1391  * true if the item fit in the queue, false if not.
1392  */
1393 static kern_return_t
stackshot_put_workitem(struct stackshot_workitem item)1394 stackshot_put_workitem(struct stackshot_workitem item)
1395 {
1396 	struct stackshot_workqueue *queue;
1397 
1398 	/* Put in higher queue if task has more threads, with highest queue having >= STACKSHOT_HARDEST_THREADCOUNT threads */
1399 	size_t queue_idx = ((item.sswi_task->thread_count * (STACKSHOT_NUM_WORKQUEUES - 1)) / STACKSHOT_HARDEST_THREADCOUNT);
1400 	queue_idx = MIN(queue_idx, STACKSHOT_NUM_WORKQUEUES - 1);
1401 
1402 	queue = &stackshot_ctx.sc_workqueues[queue_idx];
1403 
1404 	size_t num_items = os_atomic_load(&queue->sswq_num_items, relaxed);
1405 
1406 	if (num_items >= queue->sswq_capacity) {
1407 		return KERN_INSUFFICIENT_BUFFER_SIZE;
1408 	}
1409 
1410 	queue->sswq_items[num_items] = item;
1411 	os_atomic_inc(&queue->sswq_num_items, release);
1412 
1413 	return KERN_SUCCESS;
1414 }
1415 
1416 #define calc_num_linked_kcdata_frames(size, kcdata_size) (1 + ((size) - 1) / (kcdata_size))
1417 #define calc_linked_kcdata_size(size, kcdata_size) (calc_num_linked_kcdata_frames((size), (kcdata_size)) * ((kcdata_size) + sizeof(struct linked_kcdata_descriptor)))
1418 
1419 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
1420 #define TASK_SHARED_CACHE_AVG_SIZE (128) /* Average space consumed by task shared cache info */
1421 #define sizeof_if_traceflag(a, flag) (((trace_flags & (flag)) != 0) ? sizeof(a) : 0)
1422 
1423 #define FUDGED_SIZE(size, adj) (((size) * ((adj) + 100)) / 100)
1424 
1425 /*
1426  * Return the estimated size of a single task (including threads)
1427  * in a stackshot with the given flags.
1428  */
1429 static uint32_t
get_stackshot_est_tasksize(uint64_t trace_flags)1430 get_stackshot_est_tasksize(uint64_t trace_flags)
1431 {
1432 	size_t total_size;
1433 	size_t threads_per_task = (((threads_count + terminated_threads_count) - 1) / (tasks_count + terminated_tasks_count)) + 1;
1434 	size_t est_thread_size = sizeof(struct thread_snapshot_v4) + 42 * sizeof(uintptr_t);
1435 	size_t est_task_size = sizeof(struct task_snapshot_v2) +
1436 	    TASK_UUID_AVG_SIZE +
1437 	    TASK_SHARED_CACHE_AVG_SIZE +
1438 	    sizeof_if_traceflag(struct io_stats_snapshot, STACKSHOT_INSTRS_CYCLES) +
1439 	    sizeof_if_traceflag(uint32_t, STACKSHOT_ASID) +
1440 	    sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ, STACKSHOT_PAGE_TABLES) +
1441 	    sizeof_if_traceflag(struct instrs_cycles_snapshot_v2, STACKSHOT_INSTRS_CYCLES) +
1442 	    sizeof(struct stackshot_cpu_architecture) +
1443 	    sizeof(struct stackshot_task_codesigning_info);
1444 
1445 #if STACKSHOT_COLLECTS_LATENCY_INFO
1446 	if (collect_latency_info) {
1447 		est_thread_size += sizeof(struct stackshot_latency_thread);
1448 		est_task_size += sizeof(struct stackshot_latency_task);
1449 	}
1450 #endif
1451 
1452 	total_size = est_task_size + threads_per_task * est_thread_size;
1453 
1454 	return total_size;
1455 }
1456 
1457 /*
1458  * Return the estimated size of a stackshot based on the
1459  * number of currently running threads and tasks.
1460  *
1461  * adj is an adjustment in units of percentage
1462  */
1463 static uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj,uint64_t trace_flags,pid_t target_pid)1464 get_stackshot_estsize(
1465 	uint32_t prev_size_hint,
1466 	uint32_t adj,
1467 	uint64_t trace_flags,
1468 	pid_t target_pid)
1469 {
1470 	vm_size_t thread_and_task_total;
1471 	uint64_t  size;
1472 	uint32_t  estimated_size;
1473 	bool      process_scoped = ((target_pid != -1) && ((trace_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0));
1474 
1475 	/*
1476 	 * We use the estimated task size (with a fudge factor) as the default
1477 	 * linked kcdata buffer size in an effort to reduce overhead (ideally, we want
1478 	 * each task to only need a single kcdata buffer.)
1479 	 */
1480 	uint32_t est_task_size = get_stackshot_est_tasksize(trace_flags);
1481 	uint32_t est_kcdata_size = FUDGED_SIZE(est_task_size, adj);
1482 	uint64_t est_preamble_size = calc_linked_kcdata_size(8192 * 4, est_kcdata_size);
1483 	uint64_t est_postamble_size = calc_linked_kcdata_size(8192 * 2, est_kcdata_size);
1484 	uint64_t est_extra_size = 0;
1485 
1486 	adj = MIN(adj, 100u);   /* no more than double our estimate */
1487 
1488 #if STACKSHOT_COLLECTS_LATENCY_INFO
1489 	est_extra_size += real_ncpus * sizeof(struct stackshot_latency_cpu);
1490 	est_extra_size += sizeof(struct stackshot_latency_collection_v2);
1491 #endif
1492 
1493 	est_extra_size += real_ncpus * MAX_FRAMES * sizeof(uintptr_t); /* Stacktrace buffers */
1494 	est_extra_size += FUDGED_SIZE(tasks_count, 10) * sizeof(uintptr_t) * STACKSHOT_NUM_WORKQUEUES; /* Work queues */
1495 	est_extra_size += sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ * real_ncpus, STACKSHOT_PAGE_TABLES);
1496 
1497 	thread_and_task_total = calc_linked_kcdata_size(est_task_size, est_kcdata_size);
1498 	if (!process_scoped) {
1499 		thread_and_task_total *= tasks_count;
1500 	}
1501 	size = thread_and_task_total + est_preamble_size + est_postamble_size + est_extra_size; /* estimate */
1502 	size = FUDGED_SIZE(size, adj); /* add adj */
1503 	size = MAX(size, prev_size_hint); /* allow hint to increase */
1504 	size += stackshot_plh_est_size(); /* add space for the port label hash */
1505 	size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK)); /* avoid overflow */
1506 	estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
1507 
1508 	return estimated_size;
1509 }
1510 
1511 /**
1512  * Copies a linked list of kcdata structures into a final kcdata structure.
1513  * Only used from stackshot_finalize_kcdata.
1514  */
1515 __result_use_check
1516 static kern_return_t
stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata,linked_kcdata_descriptor_t linked_kcdata)1517 stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata, linked_kcdata_descriptor_t linked_kcdata)
1518 {
1519 	kern_return_t error = KERN_SUCCESS;
1520 
1521 	while (linked_kcdata) {
1522 		/* Walk linked kcdata list */
1523 		kcdata_descriptor_t cur_kcdata = &linked_kcdata->kcdata;
1524 		if ((cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin) == 0) {
1525 			linked_kcdata = linked_kcdata->next;
1526 			continue;
1527 		}
1528 
1529 		/* Every item in the linked kcdata should have a header tag of type KCDATA_BUFFER_BEGIN_STACKSHOT. */
1530 		assert(((struct kcdata_item*) cur_kcdata->kcd_addr_begin)->type == KCDATA_BUFFER_BEGIN_STACKSHOT);
1531 		assert((final_kcdata->kcd_addr_begin + final_kcdata->kcd_length) > final_kcdata->kcd_addr_end);
1532 		size_t header_size = sizeof(kcdata_item_t) + kcdata_calc_padding(sizeof(kcdata_item_t));
1533 		size_t size = cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin - header_size;
1534 		size_t free = (final_kcdata->kcd_length + final_kcdata->kcd_addr_begin) - final_kcdata->kcd_addr_end;
1535 		if (free < size) {
1536 			error = KERN_INSUFFICIENT_BUFFER_SIZE;
1537 			goto error_exit;
1538 		}
1539 
1540 		/* Just memcpy the data over (and compress if we need to.) */
1541 		kcdata_compression_window_open(final_kcdata);
1542 		error = kcdata_memcpy(final_kcdata, final_kcdata->kcd_addr_end, (void*) (cur_kcdata->kcd_addr_begin + header_size), size);
1543 		if (error != KERN_SUCCESS) {
1544 			goto error_exit;
1545 		}
1546 		final_kcdata->kcd_addr_end += size;
1547 		kcdata_compression_window_close(final_kcdata);
1548 
1549 		linked_kcdata = linked_kcdata->next;
1550 	}
1551 
1552 error_exit:
1553 	return error;
1554 }
1555 
1556 /**
1557  * Copies the duration, latency, and diagnostic info into a final kcdata buffer.
1558  * Only used by stackshot_finalize_kcdata and stackshot_finalize_singlethreaded_kcdata.
1559  */
1560 __result_use_check
1561 static kern_return_t
stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)1562 stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)
1563 {
1564 	kern_return_t error;
1565 	mach_vm_address_t out_addr;
1566 	bool use_fault_path = ((stackshot_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
1567 #if STACKSHOT_COLLECTS_LATENCY_INFO
1568 	size_t            buffer_used = 0;
1569 	size_t            buffer_overhead = 0;
1570 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1571 
1572 	if (use_fault_path) {
1573 		struct stackshot_fault_stats stats = (struct stackshot_fault_stats) {
1574 			.sfs_pages_faulted_in = 0,
1575 			.sfs_time_spent_faulting = 0,
1576 			.sfs_system_max_fault_time = stackshot_max_fault_time,
1577 			.sfs_stopped_faulting = false
1578 		};
1579 		percpu_foreach_base(base) {
1580 			struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
1581 			if (!cpu_ctx->scc_did_work) {
1582 				continue;
1583 			}
1584 			stats.sfs_pages_faulted_in += cpu_ctx->scc_fault_stats.sfs_pages_faulted_in;
1585 			stats.sfs_time_spent_faulting += cpu_ctx->scc_fault_stats.sfs_time_spent_faulting;
1586 			stats.sfs_stopped_faulting = stats.sfs_stopped_faulting || cpu_ctx->scc_fault_stats.sfs_stopped_faulting;
1587 		}
1588 		kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
1589 		    sizeof(struct stackshot_fault_stats), &stats);
1590 	}
1591 
1592 #if STACKSHOT_COLLECTS_LATENCY_INFO
1593 	int num_working_cpus = 0;
1594 	if (collect_latency_info) {
1595 		/* Add per-CPU latency info */
1596 		percpu_foreach(cpu_ctx, stackshot_cpu_ctx_percpu) {
1597 			if (cpu_ctx->scc_did_work) {
1598 				num_working_cpus++;
1599 			}
1600 		}
1601 		kcdata_compression_window_open(kcdata);
1602 		kcd_exit_on_error(kcdata_get_memory_addr_for_array(
1603 			    kcdata, STACKSHOT_KCTYPE_LATENCY_INFO_CPU, sizeof(struct stackshot_latency_cpu), num_working_cpus, &out_addr));
1604 		percpu_foreach_base(base) {
1605 			if (PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu)->scc_did_work) {
1606 				kcdata_memcpy(kcdata, out_addr, PERCPU_GET_WITH_BASE(base, stackshot_cpu_latency_percpu),
1607 				    sizeof(struct stackshot_latency_cpu));
1608 				out_addr += sizeof(struct stackshot_latency_cpu);
1609 			}
1610 		}
1611 		kcd_exit_on_error(kcdata_compression_window_close(kcdata));
1612 
1613 		/* Add up buffer info */
1614 		for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
1615 			struct stackshot_buffer *buf = &stackshot_ctx.sc_buffers[buf_idx];
1616 			buffer_used += os_atomic_load(&buf->ssb_used, relaxed);
1617 			buffer_overhead += os_atomic_load(&buf->ssb_overhead, relaxed);
1618 		}
1619 		stackshot_ctx.sc_latency.buffer_size = stackshot_ctx.sc_args.buffer_size;
1620 		stackshot_ctx.sc_latency.buffer_overhead = buffer_overhead;
1621 		stackshot_ctx.sc_latency.buffer_used = buffer_used;
1622 		stackshot_ctx.sc_latency.buffer_count = stackshot_ctx.sc_num_buffers;
1623 
1624 		/* Add overall latency info */
1625 		kcd_exit_on_error(kcdata_push_data(
1626 			    kcdata, STACKSHOT_KCTYPE_LATENCY_INFO,
1627 			    sizeof(stackshot_ctx.sc_latency), &stackshot_ctx.sc_latency));
1628 	}
1629 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1630 
1631 	if ((stackshot_flags & STACKSHOT_DO_COMPRESS) == 0) {
1632 		assert(!stackshot_ctx.sc_panic_stackshot);
1633 		kcd_exit_on_error(kcdata_get_memory_addr(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
1634 		    sizeof(struct stackshot_duration_v2), &out_addr));
1635 		struct stackshot_duration_v2 *duration_p = (void *) out_addr;
1636 		memcpy(duration_p, &stackshot_ctx.sc_duration, sizeof(*duration_p));
1637 		stackshot_duration_outer = (unaligned_u64 *) &duration_p->stackshot_duration_outer;
1638 		kcd_exit_on_error(kcdata_add_uint64_with_description(kcdata, stackshot_tries, "stackshot_tries"));
1639 	} else {
1640 		kcd_exit_on_error(kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_ctx.sc_duration), &stackshot_ctx.sc_duration));
1641 		stackshot_duration_outer = NULL;
1642 	}
1643 
1644 error_exit:
1645 	return error;
1646 }
1647 
1648 /**
1649  * Allocates the final kcdata buffer for a mulitithreaded stackshot,
1650  * where all of the per-task kcdata (and exclave kcdata) will end up.
1651  */
1652 __result_use_check
1653 static kern_return_t
stackshot_alloc_final_kcdata(void)1654 stackshot_alloc_final_kcdata(void)
1655 {
1656 	vm_offset_t   final_kcdata_buffer = 0;
1657 	kern_return_t error = KERN_SUCCESS;
1658 	uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1659 	    : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
1660 	    : KCDATA_BUFFER_BEGIN_STACKSHOT;
1661 
1662 	if (stackshot_ctx.sc_is_singlethreaded) {
1663 		return KERN_SUCCESS;
1664 	}
1665 
1666 	if ((error = kmem_alloc(kernel_map, &final_kcdata_buffer, stackshot_args.buffer_size,
1667 	    KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
1668 		os_log_error(OS_LOG_DEFAULT, "stackshot: final allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, stackshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
1669 		return KERN_RESOURCE_SHORTAGE;
1670 	}
1671 
1672 	stackshot_ctx.sc_finalized_kcdata = kcdata_memory_alloc_init(final_kcdata_buffer, hdr_tag,
1673 	    stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
1674 
1675 	if (stackshot_ctx.sc_finalized_kcdata == NULL) {
1676 		kmem_free(kernel_map, final_kcdata_buffer, stackshot_args.buffer_size);
1677 		return KERN_FAILURE;
1678 	}
1679 
1680 	return KERN_SUCCESS;
1681 }
1682 
1683 /**
1684  * Frees the final kcdata buffer.
1685  */
1686 static void
stackshot_free_final_kcdata(void)1687 stackshot_free_final_kcdata(void)
1688 {
1689 	if (stackshot_ctx.sc_is_singlethreaded || (stackshot_ctx.sc_finalized_kcdata == NULL)) {
1690 		return;
1691 	}
1692 
1693 	kmem_free(kernel_map, stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1694 	kcdata_memory_destroy(stackshot_ctx.sc_finalized_kcdata);
1695 	stackshot_ctx.sc_finalized_kcdata = NULL;
1696 }
1697 
1698 /**
1699  * Called once we exit the debugger trap to collate all of the separate linked
1700  * kcdata lists into one kcdata buffer. The calling thread will run this, and
1701  * it is guaranteed that nobody else is touching any stackshot state at this
1702  * point. In the case of a panic stackshot, this is never called since we only
1703  * use one thread.
1704  *
1705  * Called with interrupts enabled, stackshot subsys lock held.
1706  */
1707 __result_use_check
1708 static kern_return_t
stackshot_collect_kcdata(void)1709 stackshot_collect_kcdata(void)
1710 {
1711 	kern_return_t error = 0;
1712 	uint32_t      hdr_tag;
1713 
1714 	assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1715 	LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1716 
1717 	/* Allocate our final kcdata buffer. */
1718 	kcd_exit_on_error(stackshot_alloc_final_kcdata());
1719 	assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1720 
1721 	/* Setup compression if we need it. */
1722 	if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
1723 		hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1724 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
1725 		kcd_exit_on_error(kcdata_init_compress(stackshot_ctx.sc_finalized_kcdata, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
1726 	}
1727 
1728 	/* Copy over all of the pre task-iteration kcdata (to preserve order as if it were single-threaded) */
1729 	kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_pretask_kcdata));
1730 
1731 	/* Set each queue's cur_item to 0. */
1732 	for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1733 		os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_cur_item, 0, relaxed);
1734 	}
1735 
1736 	/*
1737 	 * Iterate over work queue(s) and copy the kcdata in.
1738 	 */
1739 	while (true) {
1740 		struct stackshot_workitem  *next_item = NULL;
1741 		struct stackshot_workqueue *next_queue = NULL;
1742 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1743 			struct stackshot_workqueue *queue = &stackshot_ctx.sc_workqueues[i];
1744 			size_t cur_item = os_atomic_load(&queue->sswq_cur_item, relaxed);
1745 
1746 			/* Check if we're done with this queue */
1747 			if (cur_item >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1748 				continue;
1749 			}
1750 
1751 			/* Check if this workitem should come next */
1752 			struct stackshot_workitem *item = &queue->sswq_items[cur_item];
1753 			if ((next_item == NULL) || (next_item->sswi_idx > item->sswi_idx)) {
1754 				next_item = item;
1755 				next_queue = queue;
1756 			}
1757 		}
1758 
1759 		/* Queues are empty. */
1760 		if (next_item == NULL) {
1761 			break;
1762 		}
1763 
1764 		assert(next_queue);
1765 		assert(next_item->sswi_data != NULL);
1766 
1767 		os_atomic_inc(&next_queue->sswq_cur_item, relaxed);
1768 		kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, next_item->sswi_data));
1769 	}
1770 
1771 	/* Write post-task kcdata */
1772 	kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_posttask_kcdata));
1773 error_exit:
1774 	if (error != KERN_SUCCESS) {
1775 		stackshot_free_final_kcdata();
1776 	}
1777 	return error;
1778 }
1779 
1780 
1781 /**
1782  * Called at the very end of stackshot data generation, to write final timing
1783  * data to the kcdata structure and close compression. Only called for
1784  * multi-threaded stackshots; see stackshot_finalize_singlethreaded_kcata for
1785  * single-threaded variant.
1786  *
1787  * Called with interrupts enabled, stackshot subsys lock held.
1788  */
1789 __result_use_check
1790 static kern_return_t
stackshot_finalize_kcdata(void)1791 stackshot_finalize_kcdata(void)
1792 {
1793 	kern_return_t error = 0;
1794 
1795 	assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1796 	LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1797 
1798 	assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1799 
1800 	/* Write stackshot timing info */
1801 	kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1802 
1803 	/* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1804 	kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1805 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1806 	kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1807 
1808 	stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1809 	stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1810 
1811 	if (os_atomic_load(&stackshot_ctx.sc_retval, relaxed) == KERN_SUCCESS) {
1812 		/* releases and zeros done */
1813 		kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1814 	}
1815 
1816 	memcpy(stackshot_args.buffer, (void*) stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1817 
1818 	/* Fix duration_outer offset */
1819 	if (stackshot_duration_outer != NULL) {
1820 		stackshot_duration_outer = (unaligned_u64*) ((mach_vm_address_t) stackshot_args.buffer + ((mach_vm_address_t) stackshot_duration_outer - stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin));
1821 	}
1822 
1823 error_exit:
1824 	stackshot_free_final_kcdata();
1825 	return error;
1826 }
1827 
1828 /**
1829  * Finalizes the kcdata for a singlethreaded stackshot.
1830  *
1831  * May be called from interrupt/panic context.
1832  */
1833 __result_use_check
1834 static kern_return_t
stackshot_finalize_singlethreaded_kcdata(void)1835 stackshot_finalize_singlethreaded_kcdata(void)
1836 {
1837 	kern_return_t error;
1838 
1839 	assert(stackshot_ctx.sc_is_singlethreaded);
1840 
1841 	kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1842 	/* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1843 	kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1844 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1845 	kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1846 
1847 	stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1848 	stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1849 
1850 	kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1851 
1852 	if (stackshot_ctx.sc_panic_stackshot) {
1853 		*stackshot_args.descriptor = *stackshot_ctx.sc_finalized_kcdata;
1854 	}
1855 
1856 error_exit:
1857 	return error;
1858 }
1859 
1860 /*
1861  * stackshot_remap_buffer:	Utility function to remap bytes_traced bytes starting at stackshotbuf
1862  *				into the current task's user space and subsequently copy out the address
1863  *				at which the buffer has been mapped in user space to out_buffer_addr.
1864  *
1865  * Inputs:			stackshotbuf - pointer to the original buffer in the kernel's address space
1866  *				bytes_traced - length of the buffer to remap starting from stackshotbuf
1867  *				out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
1868  *				out_size_addr - pointer to be filled in with the size of the buffer
1869  *
1870  * Outputs:			ENOSPC if there is not enough free space in the task's address space to remap the buffer
1871  *				EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
1872  *				an error from copyout
1873  */
1874 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)1875 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
1876 {
1877 	int                     error = 0;
1878 	mach_vm_offset_t        stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
1879 	vm_prot_t               cur_prot = VM_PROT_NONE, max_prot = VM_PROT_NONE;
1880 
1881 	error = mach_vm_remap(current_map(), &stackshotbuf_user_addr, bytes_traced, 0,
1882 	    VM_FLAGS_ANYWHERE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE,
1883 	    &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
1884 	/*
1885 	 * If the call to mach_vm_remap fails, we return the appropriate converted error
1886 	 */
1887 	if (error == KERN_SUCCESS) {
1888 		/* If the user addr somehow didn't get set, we should make sure that we fail, and (eventually)
1889 		 * panic on development kernels to find out why
1890 		 */
1891 		if (stackshotbuf_user_addr == (mach_vm_offset_t)NULL) {
1892 #if DEVELOPMENT || DEBUG
1893 			os_log_error(OS_LOG_DEFAULT, "stackshot: mach_vm_remap succeeded with NULL\n");
1894 #endif // DEVELOPMENT || DEBUG
1895 			return KERN_FAILURE;
1896 		}
1897 
1898 		/*
1899 		 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
1900 		 * we just made in the task's user space.
1901 		 */
1902 		error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
1903 		if (error != KERN_SUCCESS) {
1904 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1905 			return error;
1906 		}
1907 		error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
1908 		if (error != KERN_SUCCESS) {
1909 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1910 			return error;
1911 		}
1912 	}
1913 	return error;
1914 }
1915 
1916 #if CONFIG_EXCLAVES
1917 
1918 static kern_return_t
stackshot_setup_exclave_waitlist(void)1919 stackshot_setup_exclave_waitlist(void)
1920 {
1921 	kern_return_t error = KERN_SUCCESS;
1922 	size_t exclave_threads_max = exclaves_ipc_buffer_count();
1923 	size_t waitlist_size = 0;
1924 
1925 	assert(!stackshot_exclave_inspect_ctids);
1926 
1927 	if (exclaves_inspection_is_initialized() && exclave_threads_max) {
1928 		if (os_mul_overflow(exclave_threads_max, sizeof(ctid_t), &waitlist_size)) {
1929 			error = KERN_INVALID_ARGUMENT;
1930 			goto error;
1931 		}
1932 		stackshot_exclave_inspect_ctids = stackshot_alloc_with_size(waitlist_size, &error);
1933 		if (!stackshot_exclave_inspect_ctids) {
1934 			goto error;
1935 		}
1936 		stackshot_exclave_inspect_ctid_count = 0;
1937 		stackshot_exclave_inspect_ctid_capacity = exclave_threads_max;
1938 	}
1939 
1940 error:
1941 	return error;
1942 }
1943 
1944 static kern_return_t
collect_exclave_threads(uint64_t ss_flags)1945 collect_exclave_threads(uint64_t ss_flags)
1946 {
1947 	size_t i;
1948 	ctid_t ctid;
1949 	thread_t thread;
1950 	kern_return_t kr = KERN_SUCCESS;
1951 	STACKSHOT_SUBSYS_ASSERT_LOCKED();
1952 
1953 	lck_mtx_lock(&exclaves_collect_mtx);
1954 
1955 	if (stackshot_exclave_inspect_ctid_count == 0) {
1956 		/* Nothing to do */
1957 		goto out;
1958 	}
1959 
1960 	// When asking for ASIDs, make sure we get all exclaves asids and mappings as well
1961 	exclaves_stackshot_raw_addresses = (ss_flags & STACKSHOT_ASID);
1962 	exclaves_stackshot_all_address_spaces = (ss_flags & (STACKSHOT_ASID | STACKSHOT_EXCLAVES));
1963 
1964 	/* This error is intentionally ignored: we are now committed to collecting
1965 	 * these threads, or at least properly waking them. If this fails, the first
1966 	 * collected thread should also fail to append to the kcdata, and will abort
1967 	 * further collection, properly clearing the AST and waking these threads.
1968 	 */
1969 	kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_BEGIN,
1970 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
1971 
1972 	for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
1973 		ctid = stackshot_exclave_inspect_ctids[i];
1974 		thread = ctid_get_thread(ctid);
1975 		assert(thread);
1976 		exclaves_inspection_queue_add(&exclaves_inspection_queue_stackshot, &thread->th_exclaves_inspection_queue_stackshot);
1977 	}
1978 	exclaves_inspection_begin_collecting();
1979 	exclaves_inspection_wait_complete(&exclaves_inspection_queue_stackshot);
1980 	kr = stackshot_exclave_kr; /* Read the result of work done on our behalf, by collection thread */
1981 	if (kr != KERN_SUCCESS) {
1982 		goto out;
1983 	}
1984 
1985 	kr = kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_END,
1986 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
1987 	if (kr != KERN_SUCCESS) {
1988 		goto out;
1989 	}
1990 out:
1991 	/* clear Exclave buffer now that it's been used */
1992 	stackshot_exclave_inspect_ctids = NULL;
1993 	stackshot_exclave_inspect_ctid_capacity = 0;
1994 	stackshot_exclave_inspect_ctid_count = 0;
1995 
1996 	lck_mtx_unlock(&exclaves_collect_mtx);
1997 	return kr;
1998 }
1999 
2000 static kern_return_t
stackshot_exclaves_process_stacktrace(const address_v__opt_s * _Nonnull st,void * kcdata_ptr)2001 stackshot_exclaves_process_stacktrace(const address_v__opt_s *_Nonnull st, void *kcdata_ptr)
2002 {
2003 	kern_return_t error = KERN_SUCCESS;
2004 	exclave_ecstackentry_addr_t * addr = NULL;
2005 	__block size_t count = 0;
2006 
2007 	if (!st->has_value) {
2008 		goto error_exit;
2009 	}
2010 
2011 	address__v_visit(&st->value, ^(size_t __unused i, const stackshottypes_address_s __unused item) {
2012 		count++;
2013 	});
2014 
2015 	kcdata_compression_window_open(kcdata_ptr);
2016 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_ECSTACK,
2017 	    sizeof(exclave_ecstackentry_addr_t), count, (mach_vm_address_t*)&addr));
2018 
2019 	address__v_visit(&st->value, ^(size_t i, const stackshottypes_address_s item) {
2020 		addr[i] = (exclave_ecstackentry_addr_t)item;
2021 	});
2022 
2023 	kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2024 
2025 error_exit:
2026 	return error;
2027 }
2028 
2029 static kern_return_t
stackshot_exclaves_process_ipcstackentry(uint64_t index,const stackshottypes_ipcstackentry_s * _Nonnull ise,void * kcdata_ptr)2030 stackshot_exclaves_process_ipcstackentry(uint64_t index, const stackshottypes_ipcstackentry_s *_Nonnull ise, void *kcdata_ptr)
2031 {
2032 	kern_return_t error = KERN_SUCCESS;
2033 
2034 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2035 	    STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2036 
2037 	struct exclave_ipcstackentry_info info = { 0 };
2038 	info.eise_asid = ise->asid;
2039 
2040 	info.eise_tnid = ise->tnid;
2041 
2042 	if (ise->invocationid.has_value) {
2043 		info.eise_flags |= kExclaveIpcStackEntryHaveInvocationID;
2044 		info.eise_invocationid = ise->invocationid.value;
2045 	} else {
2046 		info.eise_invocationid = 0;
2047 	}
2048 
2049 	info.eise_flags |= (ise->stacktrace.has_value ? kExclaveIpcStackEntryHaveStack : 0);
2050 
2051 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_INFO, sizeof(struct exclave_ipcstackentry_info), &info));
2052 
2053 	if (ise->stacktrace.has_value) {
2054 		kcd_exit_on_error(stackshot_exclaves_process_stacktrace(&ise->stacktrace, kcdata_ptr));
2055 	}
2056 
2057 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2058 	    STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2059 
2060 error_exit:
2061 	return error;
2062 }
2063 
2064 static kern_return_t
stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s * _Nonnull ipcstack,void * kcdata_ptr)2065 stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s *_Nonnull ipcstack, void *kcdata_ptr)
2066 {
2067 	__block kern_return_t kr = KERN_SUCCESS;
2068 
2069 	if (!ipcstack->has_value) {
2070 		goto error_exit;
2071 	}
2072 
2073 	stackshottypes_ipcstackentry__v_visit(&ipcstack->value, ^(size_t i, const stackshottypes_ipcstackentry_s *_Nonnull item) {
2074 		if (kr == KERN_SUCCESS) {
2075 		        kr = stackshot_exclaves_process_ipcstackentry(i, item, kcdata_ptr);
2076 		}
2077 	});
2078 
2079 error_exit:
2080 	return kr;
2081 }
2082 
2083 static kern_return_t
stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s * _Nonnull se,void * kcdata_ptr)2084 stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s *_Nonnull se, void *kcdata_ptr)
2085 {
2086 	kern_return_t error = KERN_SUCCESS;
2087 
2088 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2089 	    STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2090 
2091 	struct exclave_scresult_info info = { 0 };
2092 	info.esc_id = se->scid;
2093 	info.esc_flags = se->ipcstack.has_value ? kExclaveScresultHaveIPCStack : 0;
2094 
2095 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_SCRESULT_INFO, sizeof(struct exclave_scresult_info), &info));
2096 
2097 	if (se->ipcstack.has_value) {
2098 		kcd_exit_on_error(stackshot_exclaves_process_ipcstack(&se->ipcstack, kcdata_ptr));
2099 	}
2100 
2101 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2102 	    STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2103 
2104 error_exit:
2105 	return error;
2106 }
2107 
2108 static kern_return_t
stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2109 stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2110 {
2111 	kern_return_t error = KERN_SUCCESS;
2112 	__block struct exclave_textlayout_segment * info = NULL;
2113 
2114 	__block size_t count = 0;
2115 	stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s __unused *_Nonnull item) {
2116 		count++;
2117 	});
2118 
2119 	if (!count) {
2120 		goto error_exit;
2121 	}
2122 
2123 	kcdata_compression_window_open(kcdata_ptr);
2124 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_SEGMENTS,
2125 	    sizeof(struct exclave_textlayout_segment), count, (mach_vm_address_t*)&info));
2126 
2127 	stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s *_Nonnull item) {
2128 		memcpy(&info->layoutSegment_uuid, item->uuid, sizeof(uuid_t));
2129 		if (want_raw_addresses) {
2130 		        info->layoutSegment_loadAddress = item->rawloadaddress.has_value ? item->rawloadaddress.value: 0;
2131 		} else {
2132 		        info->layoutSegment_loadAddress = item->loadaddress;
2133 		}
2134 		info++;
2135 	});
2136 
2137 	kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2138 
2139 error_exit:
2140 	return error;
2141 }
2142 
2143 static kern_return_t
stackshot_exclaves_process_textlayout(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2144 stackshot_exclaves_process_textlayout(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2145 {
2146 	kern_return_t error = KERN_SUCCESS;
2147 	__block struct exclave_textlayout_info info = { 0 };
2148 
2149 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2150 	    STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, tl->textlayoutid));
2151 
2152 	info.layout_id = tl->textlayoutid;
2153 
2154 	info.etl_flags = want_raw_addresses ? 0 : kExclaveTextLayoutLoadAddressesUnslid;
2155 
2156 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_INFO, sizeof(struct exclave_textlayout_info), &info));
2157 	kcd_exit_on_error(stackshot_exclaves_process_textlayout_segments(tl, kcdata_ptr, want_raw_addresses));
2158 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2159 	    STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, tl->textlayoutid));
2160 error_exit:
2161 	return error;
2162 }
2163 
2164 static kern_return_t
stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s * _Nonnull as,void * kcdata_ptr,bool want_raw_addresses)2165 stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s *_Nonnull as, void *kcdata_ptr, bool want_raw_addresses)
2166 {
2167 	kern_return_t error = KERN_SUCCESS;
2168 	struct exclave_addressspace_info info = { 0 };
2169 	__block size_t name_len = 0;
2170 	uint8_t * name = NULL;
2171 
2172 	u8__v_visit(&as->name, ^(size_t __unused i, const uint8_t __unused item) {
2173 		name_len++;
2174 	});
2175 
2176 	info.eas_id = as->asid;
2177 
2178 	if (want_raw_addresses && as->rawaddressslide.has_value) {
2179 		info.eas_flags = kExclaveAddressSpaceHaveSlide;
2180 		info.eas_slide = as->rawaddressslide.value;
2181 	} else {
2182 		info.eas_flags = 0;
2183 		info.eas_slide = UINT64_MAX;
2184 	}
2185 
2186 	info.eas_layoutid = as->textlayoutid; // text layout for this address space
2187 	info.eas_asroot = as->asroot.has_value ? as->asroot.value : 0;
2188 
2189 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2190 	    STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2191 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_INFO, sizeof(struct exclave_addressspace_info), &info));
2192 
2193 	if (name_len > 0) {
2194 		kcdata_compression_window_open(kcdata_ptr);
2195 		kcd_exit_on_error(kcdata_get_memory_addr(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_NAME, name_len + 1, (mach_vm_address_t*)&name));
2196 
2197 		u8__v_visit(&as->name, ^(size_t i, const uint8_t item) {
2198 			name[i] = item;
2199 		});
2200 		name[name_len] = 0;
2201 
2202 		kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2203 	}
2204 
2205 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2206 	    STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2207 error_exit:
2208 	return error;
2209 }
2210 
2211 kern_return_t
2212 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses);
2213 
2214 kern_return_t
stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s * result,void * kcdata_ptr,bool want_raw_addresses)2215 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses)
2216 {
2217 	__block kern_return_t kr = KERN_SUCCESS;
2218 
2219 	stackshot_stackshotentry__v_visit(&result->stackshotentries, ^(size_t __unused i, const stackshot_stackshotentry_s *_Nonnull item) {
2220 		if (kr == KERN_SUCCESS) {
2221 		        kr = stackshot_exclaves_process_stackshotentry(item, kcdata_ptr);
2222 		}
2223 	});
2224 
2225 	stackshottypes_addressspace__v_visit(&result->addressspaces, ^(size_t __unused i, const stackshottypes_addressspace_s *_Nonnull item) {
2226 		if (kr == KERN_SUCCESS) {
2227 		        kr = stackshot_exclaves_process_addressspace(item, kcdata_ptr, want_raw_addresses);
2228 		}
2229 	});
2230 
2231 	stackshottypes_textlayout__v_visit(&result->textlayouts, ^(size_t __unused i, const stackshottypes_textlayout_s *_Nonnull item) {
2232 		if (kr == KERN_SUCCESS) {
2233 		        kr = stackshot_exclaves_process_textlayout(item, kcdata_ptr, want_raw_addresses);
2234 		}
2235 	});
2236 
2237 	return kr;
2238 }
2239 
2240 kern_return_t
2241 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses);
2242 
2243 kern_return_t
stackshot_exclaves_process_result(kern_return_t collect_kr,const stackshot_stackshotresult_s * result,bool want_raw_addresses)2244 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses)
2245 {
2246 	kern_return_t kr = KERN_SUCCESS;
2247 	if (result == NULL) {
2248 		return collect_kr;
2249 	}
2250 
2251 	kr = stackshot_exclaves_process_stackshot(result, stackshot_ctx.sc_finalized_kcdata, want_raw_addresses);
2252 
2253 	stackshot_exclave_kr = kr;
2254 
2255 	return kr;
2256 }
2257 
2258 
2259 static void
commit_exclaves_ast(void)2260 commit_exclaves_ast(void)
2261 {
2262 	size_t i = 0;
2263 	thread_t thread = NULL;
2264 	size_t count;
2265 
2266 	assert(debug_mode_active());
2267 
2268 	count = os_atomic_load(&stackshot_exclave_inspect_ctid_count, acquire);
2269 
2270 	if (stackshot_exclave_inspect_ctids) {
2271 		for (i = 0; i < count; ++i) {
2272 			thread = ctid_get_thread(stackshot_exclave_inspect_ctids[i]);
2273 			assert(thread);
2274 			thread_reference(thread);
2275 			os_atomic_or(&thread->th_exclaves_inspection_state, TH_EXCLAVES_INSPECTION_STACKSHOT, relaxed);
2276 		}
2277 	}
2278 }
2279 
2280 #endif /* CONFIG_EXCLAVES */
2281 
2282 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)2283 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
2284 {
2285 	int error = 0;
2286 	boolean_t prev_interrupt_state;
2287 	bool did_copyout = false;
2288 	uint32_t bytes_traced = 0;
2289 	uint32_t stackshot_estimate = 0;
2290 	struct kdp_snapshot_args snapshot_args;
2291 
2292 	void * buf_to_free = NULL;
2293 	int size_to_free = 0;
2294 	bool is_traced = false;    /* has FUNC_START tracepoint fired? */
2295 	uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
2296 
2297 	/* Parsed arguments */
2298 	uint64_t                out_buffer_addr;
2299 	uint64_t                out_size_addr;
2300 	uint32_t                size_hint = 0;
2301 
2302 	snapshot_args.pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
2303 
2304 	if (stackshot_config == NULL) {
2305 		return KERN_INVALID_ARGUMENT;
2306 	}
2307 #if DEVELOPMENT || DEBUG
2308 	/* TBD: ask stackshot clients to avoid issuing stackshots in this
2309 	 * configuration in lieu of the kernel feature override.
2310 	 */
2311 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
2312 		return KERN_NOT_SUPPORTED;
2313 	}
2314 #endif
2315 
2316 	switch (stackshot_config_version) {
2317 	case STACKSHOT_CONFIG_TYPE:
2318 		if (stackshot_config_size != sizeof(stackshot_config_t)) {
2319 			return KERN_INVALID_ARGUMENT;
2320 		}
2321 		stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
2322 		out_buffer_addr = config->sc_out_buffer_addr;
2323 		out_size_addr = config->sc_out_size_addr;
2324 		snapshot_args.pid = config->sc_pid;
2325 		snapshot_args.flags = config->sc_flags;
2326 		snapshot_args.since_timestamp = config->sc_delta_timestamp;
2327 		if (config->sc_size <= max_tracebuf_size) {
2328 			size_hint = config->sc_size;
2329 		}
2330 		/*
2331 		 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
2332 		 * dump every level if the pagetable_mask is not set
2333 		 */
2334 		if (snapshot_args.flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
2335 			snapshot_args.pagetable_mask = config->sc_pagetable_mask;
2336 		}
2337 		break;
2338 	default:
2339 		return KERN_NOT_SUPPORTED;
2340 	}
2341 
2342 	/*
2343 	 * Currently saving a kernel buffer and trylock are only supported from the
2344 	 * internal/KEXT API.
2345 	 */
2346 	if (stackshot_from_user) {
2347 		if (snapshot_args.flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
2348 			return KERN_NO_ACCESS;
2349 		}
2350 #if !DEVELOPMENT && !DEBUG
2351 		if (snapshot_args.flags & (STACKSHOT_DO_COMPRESS)) {
2352 			return KERN_NO_ACCESS;
2353 		}
2354 #endif
2355 	} else {
2356 		if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2357 			return KERN_NOT_SUPPORTED;
2358 		}
2359 	}
2360 
2361 	if (!((snapshot_args.flags & STACKSHOT_KCDATA_FORMAT) || (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
2362 		return KERN_NOT_SUPPORTED;
2363 	}
2364 
2365 	/* Compresssed delta stackshots or page dumps are not yet supported */
2366 	if (((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (snapshot_args.flags & STACKSHOT_PAGE_TABLES))
2367 	    && (snapshot_args.flags & STACKSHOT_DO_COMPRESS)) {
2368 		return KERN_NOT_SUPPORTED;
2369 	}
2370 
2371 	/*
2372 	 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
2373 	 */
2374 	if ((!out_buffer_addr || !out_size_addr) && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2375 		return KERN_INVALID_ARGUMENT;
2376 	}
2377 
2378 	if (snapshot_args.since_timestamp != 0 && ((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
2379 		return KERN_INVALID_ARGUMENT;
2380 	}
2381 
2382 	/* EXCLAVES and SKIP_EXCLAVES conflict */
2383 	if ((snapshot_args.flags & (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) == (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) {
2384 		return KERN_INVALID_ARGUMENT;
2385 	}
2386 
2387 #if CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS
2388 	if (!mt_core_supported) {
2389 		snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2390 	}
2391 #else /* CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS */
2392 	snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2393 #endif /* !CONFIG_PERVASIVE_CPI || !CONFIG_CPU_COUNTERS */
2394 
2395 	STACKSHOT_TESTPOINT(TP_WAIT_START_STACKSHOT);
2396 	STACKSHOT_SUBSYS_LOCK();
2397 
2398 	stackshot_tries = 0;
2399 
2400 	if (snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
2401 		/*
2402 		 * Don't overwrite an existing stackshot
2403 		 */
2404 		if (kernel_stackshot_buf != NULL) {
2405 			error = KERN_MEMORY_PRESENT;
2406 			goto error_early_exit;
2407 		}
2408 	} else if (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
2409 		if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
2410 			error = KERN_NOT_IN_SET;
2411 			goto error_early_exit;
2412 		}
2413 		error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
2414 		    out_buffer_addr, out_size_addr);
2415 		/*
2416 		 * If we successfully remapped the buffer into the user's address space, we
2417 		 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
2418 		 * and then clear the kernel stackshot pointer and associated size.
2419 		 */
2420 		if (error == KERN_SUCCESS) {
2421 			did_copyout = true;
2422 			buf_to_free = kernel_stackshot_buf;
2423 			size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
2424 			kernel_stackshot_buf = NULL;
2425 			kernel_stackshot_buf_size = 0;
2426 		}
2427 
2428 		goto error_early_exit;
2429 	}
2430 
2431 	if (snapshot_args.flags & STACKSHOT_GET_BOOT_PROFILE) {
2432 		void *bootprofile = NULL;
2433 		uint32_t len = 0;
2434 #if CONFIG_TELEMETRY
2435 		bootprofile_get(&bootprofile, &len);
2436 #endif
2437 		if (!bootprofile || !len) {
2438 			error = KERN_NOT_IN_SET;
2439 			goto error_early_exit;
2440 		}
2441 		error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
2442 		if (error == KERN_SUCCESS) {
2443 			did_copyout = true;
2444 		}
2445 		goto error_early_exit;
2446 	}
2447 
2448 	stackshot_duration_prior_abs = 0;
2449 	stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
2450 	snapshot_args.buffer_size = stackshot_estimate =
2451 	    get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj, snapshot_args.flags, snapshot_args.pid);
2452 	stackshot_initial_estimate = stackshot_estimate;
2453 
2454 	// ensure at least one attempt, even if the initial size from estimate was too big
2455 	snapshot_args.buffer_size = MIN(snapshot_args.buffer_size, max_tracebuf_size);
2456 
2457 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
2458 	    snapshot_args.flags, snapshot_args.buffer_size, snapshot_args.pid, snapshot_args.since_timestamp);
2459 	is_traced = true;
2460 
2461 #if CONFIG_EXCLAVES
2462 	assert(!stackshot_exclave_inspect_ctids);
2463 #endif
2464 
2465 	for (; snapshot_args.buffer_size <= max_tracebuf_size; snapshot_args.buffer_size = MIN(snapshot_args.buffer_size << 1, max_tracebuf_size)) {
2466 		stackshot_tries++;
2467 		if ((error = kmem_alloc(kernel_map, (vm_offset_t *)&snapshot_args.buffer, snapshot_args.buffer_size,
2468 		    KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
2469 			os_log_error(OS_LOG_DEFAULT, "stackshot: initial allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, snapshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
2470 			error = KERN_RESOURCE_SHORTAGE;
2471 			goto error_exit;
2472 		}
2473 
2474 		uint32_t hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2475 		    : (snapshot_args.flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2476 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2477 		#pragma unused(hdr_tag)
2478 
2479 		stackshot_duration_outer = NULL;
2480 
2481 		/* if compression was requested, allocate the extra zlib scratch area */
2482 		if (snapshot_args.flags & STACKSHOT_DO_COMPRESS) {
2483 			hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2484 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2485 			if (error != KERN_SUCCESS) {
2486 				os_log_error(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
2487 				    (int) error);
2488 				goto error_exit;
2489 			}
2490 		}
2491 
2492 		/* Prepare the compressor for a stackshot */
2493 		error = vm_compressor_kdp_init();
2494 		if (error != KERN_SUCCESS) {
2495 			goto error_exit;
2496 		}
2497 
2498 		/*
2499 		 * Disable interrupts and save the current interrupt state.
2500 		 */
2501 		prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
2502 		uint64_t time_start  = mach_absolute_time();
2503 
2504 		/* Emit a SOCD tracepoint that we are initiating a stackshot */
2505 		SOCD_TRACE_XNU_START(STACKSHOT);
2506 
2507 		/*
2508 		 * Load stackshot parameters.
2509 		 */
2510 		error = kdp_snapshot_preflight_internal(snapshot_args);
2511 
2512 		if (error == KERN_SUCCESS) {
2513 			error = stackshot_trap();
2514 		}
2515 
2516 		/* Emit a SOCD tracepoint that we have completed the stackshot */
2517 		SOCD_TRACE_XNU_END(STACKSHOT);
2518 		ml_set_interrupts_enabled(prev_interrupt_state);
2519 
2520 #if CONFIG_EXCLAVES
2521 		/* stackshot trap should only finish successfully or with no pending Exclave threads */
2522 		assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
2523 #endif
2524 
2525 		/*
2526 		 * Stackshot is no longer active.
2527 		 * (We have to do this here for the special interrupt disable timeout case to work)
2528 		 */
2529 		os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
2530 
2531 		/* Release compressor kdp buffers */
2532 		vm_compressor_kdp_teardown();
2533 
2534 		/* Record duration that interrupts were disabled */
2535 		uint64_t time_end = mach_absolute_time();
2536 		tot_interrupts_off_abs += (time_end - time_start);
2537 
2538 		/* Collect multithreaded kcdata into one finalized buffer */
2539 		if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
2540 			error = stackshot_collect_kcdata();
2541 		}
2542 
2543 #if CONFIG_EXCLAVES
2544 		if (stackshot_exclave_inspect_ctids) {
2545 			if (stackshot_exclave_inspect_ctid_count > 0) {
2546 				STACKSHOT_TESTPOINT(TP_START_COLLECTION);
2547 			}
2548 			error = collect_exclave_threads(snapshot_args.flags);
2549 		}
2550 #endif /* CONFIG_EXCLAVES */
2551 
2552 		if (error == KERN_SUCCESS) {
2553 			if (stackshot_ctx.sc_is_singlethreaded) {
2554 				error = stackshot_finalize_singlethreaded_kcdata();
2555 			} else {
2556 				error = stackshot_finalize_kcdata();
2557 			}
2558 
2559 			if ((error != KERN_SUCCESS) && (error != KERN_INSUFFICIENT_BUFFER_SIZE)) {
2560 				goto error_exit;
2561 			}
2562 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE && snapshot_args.buffer_size == max_tracebuf_size) {
2563 				os_log_error(OS_LOG_DEFAULT, "stackshot: final buffer size was insufficient at maximum size\n");
2564 				error = KERN_RESOURCE_SHORTAGE;
2565 				goto error_exit;
2566 			}
2567 		}
2568 
2569 		/* record the duration that interupts were disabled + kcdata was being finalized */
2570 		if (stackshot_duration_outer) {
2571 			*stackshot_duration_outer = mach_absolute_time() - time_start;
2572 		}
2573 
2574 		if (error != KERN_SUCCESS) {
2575 			os_log_error(OS_LOG_DEFAULT, "stackshot: debugger call failed: %d, try %llu, buffer %u estimate %u\n", (int)error, stackshot_tries, snapshot_args.buffer_size, stackshot_estimate);
2576 			kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2577 			snapshot_args.buffer = NULL;
2578 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
2579 				/*
2580 				 * If we didn't allocate a big enough buffer, deallocate and try again.
2581 				 */
2582 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
2583 				    time_end - time_start, stackshot_estimate, snapshot_args.buffer_size);
2584 				stackshot_duration_prior_abs += (time_end - time_start);
2585 				if (snapshot_args.buffer_size == max_tracebuf_size) {
2586 					os_log_error(OS_LOG_DEFAULT, "stackshot: initial buffer size was insufficient at maximum size\n");
2587 					error = KERN_RESOURCE_SHORTAGE;
2588 					goto error_exit;
2589 				}
2590 				continue;
2591 			} else {
2592 				goto error_exit;
2593 			}
2594 		}
2595 
2596 		bytes_traced = kdp_stack_snapshot_bytes_traced();
2597 		if (bytes_traced <= 0) {
2598 			error = KERN_ABORTED;
2599 			goto error_exit;
2600 		}
2601 
2602 		if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2603 			error = stackshot_remap_buffer(snapshot_args.buffer, bytes_traced, out_buffer_addr, out_size_addr);
2604 			if (error == KERN_SUCCESS) {
2605 				did_copyout = true;
2606 			}
2607 			goto error_exit;
2608 		}
2609 
2610 		if (!(snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT)) {
2611 			os_log_info(OS_LOG_DEFAULT, "stackshot: succeeded, traced %u bytes to %u buffer (estimate %u) try %llu\n", bytes_traced, snapshot_args.buffer_size, stackshot_estimate, stackshot_tries);
2612 		}
2613 
2614 		/*
2615 		 * Save the stackshot in the kernel buffer.
2616 		 */
2617 		kernel_stackshot_buf = snapshot_args.buffer;
2618 		kernel_stackshot_buf_size =  bytes_traced;
2619 		/*
2620 		 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
2621 		 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
2622 		 * update size_to_free for kmem_free accordingly.
2623 		 */
2624 		size_to_free = snapshot_args.buffer_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
2625 
2626 		assert(size_to_free >= 0);
2627 
2628 		if (size_to_free != 0) {
2629 			buf_to_free = (void *)((uint64_t)snapshot_args.buffer + snapshot_args.buffer_size - size_to_free);
2630 		}
2631 
2632 		snapshot_args.buffer = NULL;
2633 		snapshot_args.buffer_size = 0;
2634 		goto error_exit;
2635 	}
2636 
2637 error_exit:
2638 	if (is_traced) {
2639 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
2640 		    error, tot_interrupts_off_abs, snapshot_args.buffer_size, bytes_traced);
2641 	}
2642 
2643 error_early_exit:
2644 	if (snapshot_args.buffer != NULL) {
2645 		kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2646 	}
2647 	if (buf_to_free != NULL) {
2648 		kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
2649 	}
2650 
2651 	if (error == KERN_SUCCESS && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) && !did_copyout) {
2652 		/* If we return success, we must have done the copyout to userspace. If
2653 		 * we somehow did not, we need to indicate failure instead.
2654 		 */
2655 #if DEVELOPMENT || DEBUG
2656 		os_log_error(OS_LOG_DEFAULT, "stackshot: reached end without doing copyout\n");
2657 #endif // DEVELOPMENT || DEBUG
2658 		error = KERN_FAILURE;
2659 	}
2660 
2661 	STACKSHOT_SUBSYS_UNLOCK();
2662 	STACKSHOT_TESTPOINT(TP_STACKSHOT_DONE);
2663 
2664 	return error;
2665 }
2666 
2667 /*
2668  * Set up state and parameters for a stackshot.
2669  * (This runs on the calling CPU before other CPUs enter the debugger trap.)
2670  * Called when interrupts are disabled, but we're not in the debugger trap yet.
2671  */
2672 __result_use_check
2673 static kern_return_t
kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)2674 kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)
2675 {
2676 	kern_return_t error = KERN_SUCCESS;
2677 	uint64_t microsecs = 0, secs = 0;
2678 	bool is_panic = ((args.flags & STACKSHOT_FROM_PANIC) != 0);
2679 	bool process_scoped = (stackshot_args.pid != -1) &&
2680 	    ((stackshot_args.flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
2681 	bool is_singlethreaded = stackshot_single_thread || (process_scoped || is_panic || ((args.flags & STACKSHOT_PAGE_TABLES) != 0));
2682 	clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)&microsecs);
2683 
2684 	cur_stackshot_ctx_idx = (is_panic ? STACKSHOT_CTX_IDX_PANIC : STACKSHOT_CTX_IDX_NORMAL);
2685 
2686 	/* Setup overall state */
2687 	stackshot_ctx = (struct stackshot_context) {
2688 		.sc_args               = args,
2689 		.sc_state              = SS_SETUP,
2690 		.sc_bytes_traced       = 0,
2691 		.sc_bytes_uncompressed = 0,
2692 		.sc_microsecs          = microsecs + (secs * USEC_PER_SEC),
2693 		.sc_panic_stackshot    = is_panic,
2694 		.sc_is_singlethreaded  = is_singlethreaded,
2695 		.sc_cpus_working       = 0,
2696 		.sc_retval             = 0,
2697 		.sc_calling_cpuid      = cpu_number(),
2698 		.sc_main_cpuid         = is_singlethreaded ? cpu_number() : -1,
2699 		.sc_min_kcdata_size    = get_stackshot_est_tasksize(args.flags),
2700 		.sc_enable_faulting    = false,
2701 	};
2702 
2703 	if (!stackshot_ctx.sc_panic_stackshot) {
2704 #if defined(__AMP__)
2705 		/* On AMP systems, we want to split the buffers up by cluster to avoid cache line effects. */
2706 		stackshot_ctx.sc_num_buffers = is_singlethreaded ? 1 : ml_get_cluster_count();
2707 #else /* __AMP__ */
2708 		stackshot_ctx.sc_num_buffers = 1;
2709 #endif /* !__AMP__ */
2710 		size_t bufsz = args.buffer_size / stackshot_ctx.sc_num_buffers;
2711 		for (int buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
2712 			stackshot_ctx.sc_buffers[buf_idx] = (struct stackshot_buffer) {
2713 				.ssb_ptr = (void*) ((mach_vm_address_t) args.buffer + (bufsz * buf_idx)),
2714 				.ssb_size = bufsz,
2715 				.ssb_used = 0,
2716 				.ssb_freelist = NULL,
2717 				.ssb_freelist_lock = 0,
2718 				.ssb_overhead = 0
2719 			};
2720 		}
2721 
2722 		/* Setup per-cpu state */
2723 		percpu_foreach_base(base) {
2724 			*PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu) = (struct stackshot_cpu_context) { 0 };
2725 		}
2726 
2727 		if (is_singlethreaded) {
2728 			/* If the stackshot is singlethreaded, set up the kcdata - we don't bother with linked-list kcdata in singlethreaded mode. */
2729 			uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2730 			    : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2731 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2732 			kcdata_memory_static_init(stackshot_kcdata_p, (mach_vm_address_t) stackshot_args.buffer, hdr_tag,
2733 			    stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
2734 			if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
2735 				hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2736 				    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2737 				kcd_exit_on_error(kcdata_init_compress(stackshot_kcdata_p, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
2738 			}
2739 			stackshot_cpu_ctx.scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES);
2740 		}
2741 	} else {
2742 		/*
2743 		 * If this is a panic stackshot, we need to handle things differently.
2744 		 * The panic code hands us a kcdata descriptor to work with instead of
2745 		 * us making one ourselves.
2746 		 */
2747 		*stackshot_kcdata_p = *stackshot_args.descriptor;
2748 		stackshot_cpu_ctx = (struct stackshot_cpu_context) {
2749 			.scc_can_work = true,
2750 			.scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES)
2751 		};
2752 #if STACKSHOT_COLLECTS_LATENCY_INFO
2753 		*(PERCPU_GET(stackshot_trace_buffer)) = (struct stackshot_trace_buffer) {};
2754 #endif
2755 	}
2756 
2757 	/* Set up our cpu state */
2758 	stackshot_cpu_preflight();
2759 
2760 error_exit:
2761 	return error;
2762 }
2763 
2764 /*
2765  * The old function signature for kdp_snapshot_preflight, used in the panic path.
2766  * Called when interrupts are disabled, but we're not in the debugger trap yet.
2767  */
2768 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)2769 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
2770     kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
2771 {
2772 	__assert_only kern_return_t err;
2773 	err = kdp_snapshot_preflight_internal((struct kdp_snapshot_args) {
2774 		.pid = pid,
2775 		.buffer = tracebuf,
2776 		.buffer_size = tracebuf_size,
2777 		.flags = flags,
2778 		.descriptor = data_p,
2779 		.since_timestamp = since_timestamp,
2780 		.pagetable_mask = pagetable_mask
2781 	});
2782 
2783 
2784 	/* This shouldn't ever return an error in the panic path. */
2785 	assert(err == KERN_SUCCESS);
2786 }
2787 
2788 static void
stackshot_reset_state(void)2789 stackshot_reset_state(void)
2790 {
2791 	stackshot_ctx = (struct stackshot_context) { 0 };
2792 }
2793 
2794 void
panic_stackshot_reset_state(void)2795 panic_stackshot_reset_state(void)
2796 {
2797 	stackshot_reset_state();
2798 }
2799 
2800 boolean_t
stackshot_active(void)2801 stackshot_active(void)
2802 {
2803 	return os_atomic_load(&stackshot_ctx.sc_state, relaxed) != SS_INACTIVE;
2804 }
2805 
2806 boolean_t
panic_stackshot_active(void)2807 panic_stackshot_active(void)
2808 {
2809 	return os_atomic_load(&stackshot_contexts[STACKSHOT_CTX_IDX_PANIC].sc_state, relaxed) != SS_INACTIVE;
2810 }
2811 
2812 uint32_t
kdp_stack_snapshot_bytes_traced(void)2813 kdp_stack_snapshot_bytes_traced(void)
2814 {
2815 	return stackshot_ctx.sc_bytes_traced;
2816 }
2817 
2818 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)2819 kdp_stack_snapshot_bytes_uncompressed(void)
2820 {
2821 	return stackshot_ctx.sc_bytes_uncompressed;
2822 }
2823 
2824 static boolean_t
memory_iszero(void * addr,size_t size)2825 memory_iszero(void *addr, size_t size)
2826 {
2827 	char *data = (char *)addr;
2828 	for (size_t i = 0; i < size; i++) {
2829 		if (data[i] != 0) {
2830 			return FALSE;
2831 		}
2832 	}
2833 	return TRUE;
2834 }
2835 
2836 static void
_stackshot_validation_reset(void)2837 _stackshot_validation_reset(void)
2838 {
2839 	percpu_foreach_base(base) {
2840 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2841 		cpu_ctx->scc_validation_state.last_valid_page_kva = -1;
2842 		cpu_ctx->scc_validation_state.last_valid_size = 0;
2843 	}
2844 }
2845 
2846 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)2847 _stackshot_validate_kva(vm_offset_t addr, size_t size)
2848 {
2849 	vm_offset_t page_addr = atop_kernel(addr);
2850 	if (stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva == page_addr &&
2851 	    stackshot_cpu_ctx.scc_validation_state.last_valid_size <= size) {
2852 		return true;
2853 	}
2854 
2855 	if (ml_validate_nofault(addr, size)) {
2856 		stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva = page_addr;
2857 		stackshot_cpu_ctx.scc_validation_state.last_valid_size = size;
2858 		return true;
2859 	}
2860 	return false;
2861 }
2862 
2863 static long
_stackshot_strlen(const char * s,size_t maxlen)2864 _stackshot_strlen(const char *s, size_t maxlen)
2865 {
2866 	size_t len = 0;
2867 	for (len = 0; _stackshot_validate_kva((vm_offset_t)s, 1); len++, s++) {
2868 		if (*s == 0) {
2869 			return len;
2870 		}
2871 		if (len >= maxlen) {
2872 			return -1;
2873 		}
2874 	}
2875 	return -1; /* failed before end of string */
2876 }
2877 
2878 
2879 static size_t
stackshot_plh_est_size(void)2880 stackshot_plh_est_size(void)
2881 {
2882 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
2883 	size_t size = STASKSHOT_PLH_SIZE(stackshot_port_label_size);
2884 
2885 	if (size == 0) {
2886 		return 0;
2887 	}
2888 #define SIZE_EST(x) ROUNDUP((x), sizeof (uintptr_t))
2889 	return SIZE_EST(size * sizeof(*plh->plh_array)) +
2890 	       SIZE_EST(size * sizeof(*plh->plh_chains)) +
2891 	       SIZE_EST(size * sizeof(*stackshot_cpu_ctx.scc_plh_gen.pgs_gen) * real_ncpus) +
2892 	       SIZE_EST((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh->plh_hash));
2893 #undef SIZE_EST
2894 }
2895 
2896 static void
stackshot_plh_reset(void)2897 stackshot_plh_reset(void)
2898 {
2899 	stackshot_ctx.sc_plh = (struct port_label_hash){.plh_size = 0};  /* structure assignment */
2900 }
2901 
2902 static kern_return_t
stackshot_plh_setup(void)2903 stackshot_plh_setup(void)
2904 {
2905 	kern_return_t error;
2906 	size_t size;
2907 	bool percpu_alloc_failed = false;
2908 	struct port_label_hash plh = {
2909 		.plh_size = STASKSHOT_PLH_SIZE(stackshot_port_label_size),
2910 		.plh_count = 0,
2911 	};
2912 
2913 	stackshot_plh_reset();
2914 
2915 	percpu_foreach_base(base) {
2916 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2917 		cpu_ctx->scc_plh_gen = (struct _stackshot_plh_gen_state){
2918 			.pgs_gen = NULL,
2919 			.pgs_curgen = 1,
2920 			.pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX,
2921 			.pgs_curgen_max = 0,
2922 		};
2923 	}
2924 
2925 	size = plh.plh_size;
2926 	if (size == 0) {
2927 		return KERN_SUCCESS;
2928 	}
2929 	plh.plh_array = stackshot_alloc_with_size(size * sizeof(*plh.plh_array), &error);
2930 	plh.plh_chains = stackshot_alloc_with_size(size * sizeof(*plh.plh_chains), &error);
2931 	percpu_foreach_base(base) {
2932 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2933 		cpu_ctx->scc_plh_gen.pgs_gen = stackshot_alloc_with_size(size * sizeof(*cpu_ctx->scc_plh_gen.pgs_gen), &error);
2934 		if (cpu_ctx->scc_plh_gen.pgs_gen == NULL) {
2935 			percpu_alloc_failed = true;
2936 			break;
2937 		}
2938 		for (int x = 0; x < size; x++) {
2939 			cpu_ctx->scc_plh_gen.pgs_gen[x] = 0;
2940 		}
2941 	}
2942 	plh.plh_hash = stackshot_alloc_with_size((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh.plh_hash), &error);
2943 	if (error != KERN_SUCCESS) {
2944 		return error;
2945 	}
2946 	if (plh.plh_array == NULL || plh.plh_chains == NULL || percpu_alloc_failed || plh.plh_hash == NULL) {
2947 		PLH_STAT_OP(os_atomic_inc(&stackshot_ctx.sc_plh.plh_bad, relaxed));
2948 		return KERN_SUCCESS;
2949 	}
2950 	for (int x = 0; x < size; x++) {
2951 		plh.plh_array[x] = NULL;
2952 		plh.plh_chains[x] = -1;
2953 	}
2954 	for (int x = 0; x < (1ul << STACKSHOT_PLH_SHIFT); x++) {
2955 		plh.plh_hash[x] = -1;
2956 	}
2957 	stackshot_ctx.sc_plh = plh;  /* structure assignment */
2958 	return KERN_SUCCESS;
2959 }
2960 
2961 static int16_t
stackshot_plh_hash(struct ipc_service_port_label * ispl)2962 stackshot_plh_hash(struct ipc_service_port_label *ispl)
2963 {
2964 	uintptr_t ptr = (uintptr_t)ispl;
2965 	static_assert(STACKSHOT_PLH_SHIFT < 16, "plh_hash must fit in 15 bits");
2966 #define PLH_HASH_STEP(ptr, x) \
2967 	    ((((x) * STACKSHOT_PLH_SHIFT) < (sizeof(ispl) * CHAR_BIT)) ? ((ptr) >> ((x) * STACKSHOT_PLH_SHIFT)) : 0)
2968 	ptr ^= PLH_HASH_STEP(ptr, 16);
2969 	ptr ^= PLH_HASH_STEP(ptr, 8);
2970 	ptr ^= PLH_HASH_STEP(ptr, 4);
2971 	ptr ^= PLH_HASH_STEP(ptr, 2);
2972 	ptr ^= PLH_HASH_STEP(ptr, 1);
2973 #undef PLH_HASH_STEP
2974 	return (int16_t)(ptr & ((1ul << STACKSHOT_PLH_SHIFT) - 1));
2975 }
2976 
2977 enum stackshot_plh_lookup_type {
2978 	STACKSHOT_PLH_LOOKUP_UNKNOWN,
2979 	STACKSHOT_PLH_LOOKUP_SEND,
2980 	STACKSHOT_PLH_LOOKUP_RECEIVE,
2981 };
2982 
2983 static void
stackshot_plh_resetgen(void)2984 stackshot_plh_resetgen(void)
2985 {
2986 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
2987 	uint16_t plh_size = stackshot_ctx.sc_plh.plh_size;
2988 
2989 	if (pgs->pgs_curgen_min == STACKSHOT_PLH_SIZE_MAX && pgs->pgs_curgen_max == 0) {
2990 		return;  // no lookups, nothing using the current generation
2991 	}
2992 	pgs->pgs_curgen++;
2993 	pgs->pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX;
2994 	pgs->pgs_curgen_max = 0;
2995 	if (pgs->pgs_curgen == 0) { // wrapped, zero the array and increment the generation
2996 		for (int x = 0; x < plh_size; x++) {
2997 			pgs->pgs_gen[x] = 0;
2998 		}
2999 		pgs->pgs_curgen = 1;
3000 	}
3001 }
3002 
3003 static int16_t
stackshot_plh_lookup_locked(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3004 stackshot_plh_lookup_locked(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3005 {
3006 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3007 	int depth;
3008 	int16_t cur;
3009 	if (ispl == NULL) {
3010 		return STACKSHOT_PORTLABELID_NONE;
3011 	}
3012 	switch (type) {
3013 	case STACKSHOT_PLH_LOOKUP_SEND:
3014 		PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_send, relaxed));
3015 		break;
3016 	case STACKSHOT_PLH_LOOKUP_RECEIVE:
3017 		PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_receive, relaxed));
3018 		break;
3019 	default:
3020 		break;
3021 	}
3022 	PLH_STAT_OP(os_atomic_inc(&plh->plh_lookups, relaxed));
3023 	if (plh->plh_size == 0) {
3024 		return STACKSHOT_PORTLABELID_MISSING;
3025 	}
3026 	int16_t hash = stackshot_plh_hash(ispl);
3027 	assert(hash >= 0 && hash < (1ul << STACKSHOT_PLH_SHIFT));
3028 	depth = 0;
3029 	for (cur = plh->plh_hash[hash]; cur >= 0; cur = plh->plh_chains[cur]) {
3030 		/* cur must be in-range, and chain depth can never be above our # allocated */
3031 		if (cur >= plh->plh_count || depth > plh->plh_count || depth > plh->plh_size) {
3032 			PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3033 			PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3034 			return STACKSHOT_PORTLABELID_MISSING;
3035 		}
3036 		assert(cur < plh->plh_count);
3037 		if (plh->plh_array[cur] == ispl) {
3038 			PLH_STAT_OP(os_atomic_inc(&plh->plh_found, relaxed));
3039 			PLH_STAT_OP(os_atomic_add(&plh->plh_found_depth, depth, relaxed));
3040 			goto found;
3041 		}
3042 		depth++;
3043 	}
3044 	/* not found in hash table, so alloc and insert it */
3045 	if (cur != -1) {
3046 		PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3047 		PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3048 		return STACKSHOT_PORTLABELID_MISSING; /* bad end of chain */
3049 	}
3050 	PLH_STAT_OP(os_atomic_inc(&plh->plh_insert, relaxed));
3051 	PLH_STAT_OP(os_atomic_add(&plh->plh_insert_depth, depth, relaxed));
3052 	if (plh->plh_count >= plh->plh_size) {
3053 		return STACKSHOT_PORTLABELID_MISSING; /* no space */
3054 	}
3055 	cur = plh->plh_count;
3056 	plh->plh_count++;
3057 	plh->plh_array[cur] = ispl;
3058 	plh->plh_chains[cur] = plh->plh_hash[hash];
3059 	plh->plh_hash[hash] = cur;
3060 found:  ;
3061 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3062 	pgs->pgs_gen[cur] = pgs->pgs_curgen;
3063 	if (pgs->pgs_curgen_min > cur) {
3064 		pgs->pgs_curgen_min = cur;
3065 	}
3066 	if (pgs->pgs_curgen_max < cur) {
3067 		pgs->pgs_curgen_max = cur;
3068 	}
3069 	return cur + 1;   /* offset to avoid 0 */
3070 }
3071 
3072 static kern_return_t
kdp_stackshot_plh_record_locked(void)3073 kdp_stackshot_plh_record_locked(void)
3074 {
3075 	kern_return_t error = KERN_SUCCESS;
3076 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3077 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3078 	uint16_t count = plh->plh_count;
3079 	uint8_t curgen = pgs->pgs_curgen;
3080 	int16_t curgen_min = pgs->pgs_curgen_min;
3081 	int16_t curgen_max = pgs->pgs_curgen_max;
3082 	if (curgen_min <= curgen_max && curgen_max < count &&
3083 	    count <= plh->plh_size && plh->plh_size <= STACKSHOT_PLH_SIZE_MAX) {
3084 		struct ipc_service_port_label **arr = plh->plh_array;
3085 		size_t ispl_size, max_namelen;
3086 		kdp_ipc_splabel_size(&ispl_size, &max_namelen);
3087 		for (int idx = curgen_min; idx <= curgen_max; idx++) {
3088 			struct ipc_service_port_label *ispl = arr[idx];
3089 			struct portlabel_info spl = {
3090 				.portlabel_id = (idx + 1),
3091 			};
3092 			const char *name = NULL;
3093 			long name_sz = 0;
3094 			if (pgs->pgs_gen[idx] != curgen) {
3095 				continue;
3096 			}
3097 			if (_stackshot_validate_kva((vm_offset_t)ispl, ispl_size)) {
3098 				kdp_ipc_fill_splabel(ispl, &spl, &name);
3099 #if STACKSHOT_COLLECTS_RDAR_126582377_DATA
3100 			} else {
3101 				if (ispl != NULL && (vm_offset_t)ispl >> 48 == 0x0000) {
3102 					ca_event_t event_to_send = os_atomic_xchg(&rdar_126582377_event, NULL, relaxed);
3103 					if (event_to_send) {
3104 						CA_EVENT_SEND(event_to_send);
3105 					}
3106 				}
3107 #endif
3108 			}
3109 
3110 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3111 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3112 			if (name != NULL && (name_sz = _stackshot_strlen(name, max_namelen)) > 0) {   /* validates the kva */
3113 				kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL_NAME, name_sz + 1, name));
3114 			} else {
3115 				spl.portlabel_flags |= STACKSHOT_PORTLABEL_READFAILED;
3116 			}
3117 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL, sizeof(spl), &spl));
3118 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3119 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3120 		}
3121 	}
3122 
3123 error_exit:
3124 	return error;
3125 }
3126 
3127 // record any PLH referenced since the last stackshot_plh_resetgen() call
3128 static kern_return_t
kdp_stackshot_plh_record(void)3129 kdp_stackshot_plh_record(void)
3130 {
3131 	kern_return_t error;
3132 	plh_lock(&stackshot_ctx.sc_plh);
3133 	error = kdp_stackshot_plh_record_locked();
3134 	plh_unlock(&stackshot_ctx.sc_plh);
3135 	return error;
3136 }
3137 
3138 static int16_t
stackshot_plh_lookup(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3139 stackshot_plh_lookup(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3140 {
3141 	int16_t result;
3142 	plh_lock(&stackshot_ctx.sc_plh);
3143 	result = stackshot_plh_lookup_locked(ispl, type);
3144 	plh_unlock(&stackshot_ctx.sc_plh);
3145 	return result;
3146 }
3147 
3148 #if DEVELOPMENT || DEBUG
3149 static kern_return_t
kdp_stackshot_plh_stats(void)3150 kdp_stackshot_plh_stats(void)
3151 {
3152 	kern_return_t error = KERN_SUCCESS;
3153 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3154 
3155 #define PLH_STAT(x) do { if (os_atomic_load(&plh->x, relaxed) != 0) { \
3156 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, os_atomic_load(&plh->x, relaxed), "stackshot_" #x)); \
3157 } } while (0)
3158 	PLH_STAT(plh_size);
3159 	PLH_STAT(plh_lookups);
3160 	PLH_STAT(plh_found);
3161 	PLH_STAT(plh_found_depth);
3162 	PLH_STAT(plh_insert);
3163 	PLH_STAT(plh_insert_depth);
3164 	PLH_STAT(plh_bad);
3165 	PLH_STAT(plh_bad_depth);
3166 	PLH_STAT(plh_lookup_send);
3167 	PLH_STAT(plh_lookup_receive);
3168 #undef PLH_STAT
3169 
3170 error_exit:
3171 	return error;
3172 }
3173 #endif /* DEVELOPMENT || DEBUG */
3174 
3175 static uint64_t
kcdata_get_task_ss_flags(task_t task)3176 kcdata_get_task_ss_flags(task_t task)
3177 {
3178 	uint64_t ss_flags = 0;
3179 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3180 	void *bsd_info = get_bsdtask_info(task);
3181 
3182 	if (task_64bit_addr) {
3183 		ss_flags |= kUser64_p;
3184 	}
3185 	if (!task->active || task_is_a_corpse(task) || proc_exiting(bsd_info)) {
3186 		ss_flags |= kTerminatedSnapshot;
3187 	}
3188 	if (task->pidsuspended) {
3189 		ss_flags |= kPidSuspended;
3190 	}
3191 	if (task->frozen) {
3192 		ss_flags |= kFrozen;
3193 	}
3194 	if (task->effective_policy.tep_darwinbg == 1) {
3195 		ss_flags |= kTaskDarwinBG;
3196 	}
3197 	if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
3198 		ss_flags |= kTaskIsForeground;
3199 	}
3200 	if (task->requested_policy.trp_boosted == 1) {
3201 		ss_flags |= kTaskIsBoosted;
3202 	}
3203 	if (task->effective_policy.tep_sup_active == 1) {
3204 		ss_flags |= kTaskIsSuppressed;
3205 	}
3206 #if CONFIG_MEMORYSTATUS
3207 
3208 	boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
3209 	memorystatus_proc_flags_unsafe(bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
3210 	if (dirty) {
3211 		ss_flags |= kTaskIsDirty;
3212 	}
3213 	if (dirty_tracked) {
3214 		ss_flags |= kTaskIsDirtyTracked;
3215 	}
3216 	if (allow_idle_exit) {
3217 		ss_flags |= kTaskAllowIdleExit;
3218 	}
3219 
3220 #endif
3221 	if (task->effective_policy.tep_tal_engaged) {
3222 		ss_flags |= kTaskTALEngaged;
3223 	}
3224 
3225 	ss_flags |= workqueue_get_task_ss_flags_from_pwq_state_kdp(bsd_info);
3226 
3227 #if IMPORTANCE_INHERITANCE
3228 	if (task->task_imp_base) {
3229 		if (task->task_imp_base->iit_donor) {
3230 			ss_flags |= kTaskIsImpDonor;
3231 		}
3232 		if (task->task_imp_base->iit_live_donor) {
3233 			ss_flags |= kTaskIsLiveImpDonor;
3234 		}
3235 	}
3236 #endif
3237 	return ss_flags;
3238 }
3239 
3240 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)3241 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
3242 {
3243 	kern_return_t error = KERN_SUCCESS;
3244 
3245 	uint64_t shared_cache_slide = 0;
3246 	uint64_t shared_cache_first_mapping = 0;
3247 	uint32_t kdp_fault_results = 0;
3248 	uint32_t shared_cache_id = 0;
3249 	struct dyld_shared_cache_loadinfo shared_cache_data = {0};
3250 
3251 
3252 	assert(task_snap_ss_flags != NULL);
3253 
3254 	/* Get basic info about the shared region pointer, regardless of any failures */
3255 	if (task->shared_region == NULL) {
3256 		*task_snap_ss_flags |= kTaskSharedRegionNone;
3257 	} else if (task->shared_region == primary_system_shared_region) {
3258 		*task_snap_ss_flags |= kTaskSharedRegionSystem;
3259 	} else {
3260 		*task_snap_ss_flags |= kTaskSharedRegionOther;
3261 	}
3262 
3263 	if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
3264 		struct vm_shared_region *sr = task->shared_region;
3265 		shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
3266 
3267 		shared_cache_id = sr->sr_id;
3268 	} else {
3269 		*task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
3270 		goto error_exit;
3271 	}
3272 
3273 	/* We haven't copied in the shared region UUID yet as part of setup */
3274 	if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
3275 		goto error_exit;
3276 	}
3277 
3278 
3279 	/*
3280 	 * No refcounting here, but we are in debugger context, so that should be safe.
3281 	 */
3282 	shared_cache_slide = task->shared_region->sr_slide;
3283 
3284 	if (task->shared_region == primary_system_shared_region) {
3285 		/* skip adding shared cache info -- it's the same as the system level one */
3286 		goto error_exit;
3287 	}
3288 	/*
3289 	 * New-style shared cache reference: for non-primary shared regions,
3290 	 * just include the ID of the shared cache we're attached to.  Consumers
3291 	 * should use the following info from the task's ts_ss_flags as well:
3292 	 *
3293 	 * kTaskSharedRegionNone - task is not attached to a shared region
3294 	 * kTaskSharedRegionSystem - task is attached to the shared region
3295 	 *     with kSharedCacheSystemPrimary set in sharedCacheFlags.
3296 	 * kTaskSharedRegionOther - task is attached to the shared region with
3297 	 *     sharedCacheID matching the STACKSHOT_KCTYPE_SHAREDCACHE_ID entry.
3298 	 */
3299 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_ID, sizeof(shared_cache_id), &shared_cache_id));
3300 
3301 	/*
3302 	 * For backwards compatibility; this should eventually be removed.
3303 	 *
3304 	 * Historically, this data was in a dyld_uuid_info_64 structure, but the
3305 	 * naming of both the structure and fields for this use wasn't great.  The
3306 	 * dyld_shared_cache_loadinfo structure has better names, but the same
3307 	 * layout and content as the original.
3308 	 *
3309 	 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
3310 	 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
3311 	 * entries; here, it's the slid first mapping, and we leave it that way
3312 	 * for backwards compatibility.
3313 	 */
3314 	shared_cache_data.sharedCacheSlide = shared_cache_slide;
3315 	kdp_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
3316 	shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
3317 	shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
3318 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
3319 
3320 error_exit:
3321 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3322 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
3323 	}
3324 
3325 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3326 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3327 	}
3328 
3329 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3330 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3331 	}
3332 
3333 	return error;
3334 }
3335 
3336 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)3337 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
3338 {
3339 	bool save_loadinfo_p         = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
3340 	bool save_kextloadinfo_p     = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
3341 	bool save_compactinfo_p      = ((trace_flags & STACKSHOT_SAVE_DYLD_COMPACTINFO) != 0);
3342 	bool should_fault            = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
3343 
3344 	kern_return_t error        = KERN_SUCCESS;
3345 	mach_vm_address_t out_addr = 0;
3346 
3347 	mach_vm_address_t dyld_compactinfo_addr = 0;
3348 	uint32_t dyld_compactinfo_size = 0;
3349 
3350 	uint32_t uuid_info_count         = 0;
3351 	mach_vm_address_t uuid_info_addr = 0;
3352 	uint64_t uuid_info_timestamp     = 0;
3353 	#pragma unused(uuid_info_timestamp)
3354 	kdp_fault_result_flags_t kdp_fault_results = 0;
3355 
3356 
3357 	assert(task_snap_ss_flags != NULL);
3358 
3359 	int task_pid     = pid_from_task(task);
3360 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3361 
3362 	if ((save_loadinfo_p || save_compactinfo_p) && have_pmap && task->active && task_pid > 0) {
3363 		/* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
3364 		if (task_64bit_addr) {
3365 			struct user64_dyld_all_image_infos task_image_infos;
3366 			if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3367 			    sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3368 				uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
3369 				uuid_info_addr = task_image_infos.uuidArray;
3370 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3371 					uuid_info_timestamp = task_image_infos.timestamp;
3372 				}
3373 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3374 					dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3375 					dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3376 				}
3377 
3378 			}
3379 		} else {
3380 			struct user32_dyld_all_image_infos task_image_infos;
3381 			if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3382 			    sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3383 				uuid_info_count = task_image_infos.uuidArrayCount;
3384 				uuid_info_addr = task_image_infos.uuidArray;
3385 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3386 					uuid_info_timestamp = task_image_infos.timestamp;
3387 				}
3388 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3389 					dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3390 					dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3391 				}
3392 			}
3393 		}
3394 
3395 		/*
3396 		 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
3397 		 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
3398 		 * for this task.
3399 		 */
3400 		if (!uuid_info_addr) {
3401 			uuid_info_count = 0;
3402 		}
3403 
3404 		if (!dyld_compactinfo_addr) {
3405 			dyld_compactinfo_size = 0;
3406 		}
3407 
3408 	}
3409 
3410 	if (have_pmap && task_pid == 0) {
3411 		if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
3412 			uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
3413 		} else {
3414 			uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
3415 		}
3416 	}
3417 
3418 	if (save_compactinfo_p && task_pid > 0) {
3419 		if (dyld_compactinfo_size == 0) {
3420 			*task_snap_ss_flags |= kTaskDyldCompactInfoNone;
3421 		} else if (dyld_compactinfo_size > MAX_DYLD_COMPACTINFO) {
3422 			*task_snap_ss_flags |= kTaskDyldCompactInfoTooBig;
3423 		} else {
3424 			kdp_fault_result_flags_t ci_kdp_fault_results = 0;
3425 
3426 			/* Open a compression window to avoid overflowing the stack */
3427 			kcdata_compression_window_open(kcd);
3428 			kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_DYLD_COMPACTINFO,
3429 			    dyld_compactinfo_size, &out_addr));
3430 
3431 			if (!stackshot_copyin(task->map, dyld_compactinfo_addr, (void *)out_addr,
3432 			    dyld_compactinfo_size, should_fault, &ci_kdp_fault_results)) {
3433 				bzero((void *)out_addr, dyld_compactinfo_size);
3434 			}
3435 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3436 				*task_snap_ss_flags |= kTaskDyldCompactInfoMissing;
3437 			}
3438 
3439 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3440 				*task_snap_ss_flags |= kTaskDyldCompactInfoTriedFault;
3441 			}
3442 
3443 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3444 				*task_snap_ss_flags |= kTaskDyldCompactInfoFaultedIn;
3445 			}
3446 
3447 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
3448 		}
3449 	}
3450 	if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
3451 		uint32_t copied_uuid_count = 0;
3452 		uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
3453 		uint32_t uuid_info_array_size = 0;
3454 
3455 		/* Open a compression window to avoid overflowing the stack */
3456 		kcdata_compression_window_open(kcd);
3457 
3458 		/* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
3459 		if (uuid_info_count > 0) {
3460 			uuid_info_array_size = uuid_info_count * uuid_info_size;
3461 
3462 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3463 			    uuid_info_size, uuid_info_count, &out_addr));
3464 
3465 			if (!stackshot_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
3466 				bzero((void *)out_addr, uuid_info_array_size);
3467 			} else {
3468 				copied_uuid_count = uuid_info_count;
3469 			}
3470 		}
3471 
3472 		uuid_t binary_uuid;
3473 		if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
3474 			/* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
3475 			if (uuid_info_array_size == 0) {
3476 				/* We just need to store one UUID */
3477 				uuid_info_array_size = uuid_info_size;
3478 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3479 				    uuid_info_size, 1, &out_addr));
3480 			}
3481 
3482 			if (task_64bit_addr) {
3483 				struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
3484 				uint64_t image_load_address = task->mach_header_vm_address;
3485 
3486 				kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3487 				kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3488 			} else {
3489 				struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
3490 				uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
3491 
3492 				kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3493 				kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3494 			}
3495 		}
3496 
3497 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
3498 	} else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
3499 		uintptr_t image_load_address;
3500 
3501 		do {
3502 #if defined(__arm64__)
3503 			if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
3504 				struct dyld_uuid_info_64 kc_uuid = {0};
3505 				kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3506 				kdp_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
3507 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
3508 				break;
3509 			}
3510 #endif /* defined(__arm64__) */
3511 
3512 			if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
3513 				/* Kernel UUID not found or inaccessible */
3514 				break;
3515 			}
3516 
3517 			uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
3518 			if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
3519 				uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
3520 #if  defined(__arm64__)
3521 				kc_format_t primary_kc_type = KCFormatUnknown;
3522 				if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
3523 					/* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
3524 					uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
3525 				}
3526 #endif
3527 			}
3528 
3529 			/*
3530 			 * The element count of the array can vary - avoid overflowing the
3531 			 * stack by opening a window.
3532 			 */
3533 			kcdata_compression_window_open(kcd);
3534 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
3535 			    sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
3536 			kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
3537 
3538 			image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
3539 #if defined(__arm64__)
3540 			if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3541 				/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3542 				extern vm_offset_t segTEXTEXECB;
3543 				image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
3544 			}
3545 #endif
3546 			uuid_info_array[0].imageLoadAddress = image_load_address;
3547 			kdp_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
3548 
3549 			if (save_kextloadinfo_p &&
3550 			    _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
3551 			    _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
3552 			    gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
3553 				uint32_t kexti;
3554 				for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
3555 					image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
3556 #if defined(__arm64__)
3557 					if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3558 						/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3559 						image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
3560 					}
3561 #endif
3562 					uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
3563 					kdp_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
3564 				}
3565 			}
3566 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
3567 		} while (0);
3568 	}
3569 
3570 error_exit:
3571 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3572 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
3573 	}
3574 
3575 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3576 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3577 	}
3578 
3579 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3580 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3581 	}
3582 
3583 	return error;
3584 }
3585 
3586 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)3587 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
3588 {
3589 	kern_return_t error = KERN_SUCCESS;
3590 	mach_vm_address_t out_addr = 0;
3591 
3592 	/* I/O Statistics if any counters are non zero */
3593 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
3594 	if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
3595 		/* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
3596 		kcdata_compression_window_open(kcd);
3597 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
3598 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
3599 		_iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
3600 		_iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
3601 		_iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
3602 		_iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
3603 		_iostat->ss_paging_count = task->task_io_stats->paging.count;
3604 		_iostat->ss_paging_size = task->task_io_stats->paging.size;
3605 		_iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
3606 		_iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
3607 		_iostat->ss_metadata_count = task->task_io_stats->metadata.count;
3608 		_iostat->ss_metadata_size = task->task_io_stats->metadata.size;
3609 		_iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
3610 		_iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
3611 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
3612 			_iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
3613 			_iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
3614 		}
3615 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
3616 	}
3617 
3618 
3619 error_exit:
3620 	return error;
3621 }
3622 
3623 #if CONFIG_PERVASIVE_CPI
3624 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)3625 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
3626 {
3627 	struct instrs_cycles_snapshot_v2 instrs_cycles = { 0 };
3628 	struct recount_usage usage = { 0 };
3629 	struct recount_usage perf_only = { 0 };
3630 	recount_task_terminated_usage_perf_only(task, &usage, &perf_only);
3631 	instrs_cycles.ics_instructions = recount_usage_instructions(&usage);
3632 	instrs_cycles.ics_cycles = recount_usage_cycles(&usage);
3633 	instrs_cycles.ics_p_instructions = recount_usage_instructions(&perf_only);
3634 	instrs_cycles.ics_p_cycles = recount_usage_cycles(&perf_only);
3635 
3636 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
3637 }
3638 #endif /* CONFIG_PERVASIVE_CPI */
3639 
3640 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)3641 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
3642 {
3643 	struct stackshot_cpu_architecture cpu_architecture = {0};
3644 	int32_t cputype;
3645 	int32_t cpusubtype;
3646 
3647 	proc_archinfo_kdp(get_bsdtask_info(task), &cputype, &cpusubtype);
3648 	cpu_architecture.cputype = cputype;
3649 	cpu_architecture.cpusubtype = cpusubtype;
3650 
3651 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
3652 }
3653 
3654 static kern_return_t
kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd,task_t task)3655 kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd, task_t task)
3656 {
3657 	struct stackshot_task_codesigning_info codesigning_info = {};
3658 	void * bsdtask_info = NULL;
3659 	uint32_t trust = 0;
3660 	kern_return_t ret = 0;
3661 	pmap_t pmap = get_task_pmap(task);
3662 	if (task != kernel_task) {
3663 		bsdtask_info = get_bsdtask_info(task);
3664 		codesigning_info.csflags = proc_getcsflags_kdp(bsdtask_info);
3665 		ret = get_trust_level_kdp(pmap, &trust);
3666 		if (ret != KERN_SUCCESS) {
3667 			trust = KCDATA_INVALID_CS_TRUST_LEVEL;
3668 		}
3669 		codesigning_info.cs_trust_level = trust;
3670 	} else {
3671 		return KERN_SUCCESS;
3672 	}
3673 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_CODESIGNING_INFO, sizeof(struct stackshot_task_codesigning_info), &codesigning_info);
3674 }
3675 
3676 static kern_return_t
kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd,task_t task)3677 kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd, task_t task)
3678 {
3679 	uint64_t jit_start_addr = 0;
3680 	uint64_t jit_end_addr = 0;
3681 	struct crashinfo_jit_address_range range = {};
3682 	kern_return_t ret = 0;
3683 	pmap_t pmap = get_task_pmap(task);
3684 	if (task == kernel_task || NULL == pmap) {
3685 		return KERN_SUCCESS;
3686 	}
3687 	ret = get_jit_address_range_kdp(pmap, (uintptr_t*)&jit_start_addr, (uintptr_t*)&jit_end_addr);
3688 	if (KERN_SUCCESS == ret) {
3689 		range.start_address = jit_start_addr;
3690 		range.end_address = jit_end_addr;
3691 		return kcdata_push_data(kcd, TASK_CRASHINFO_JIT_ADDRESS_RANGE, sizeof(struct crashinfo_jit_address_range), &range);
3692 	} else {
3693 		return KERN_SUCCESS;
3694 	}
3695 }
3696 
3697 #if CONFIG_TASK_SUSPEND_STATS
3698 static kern_return_t
kcdata_record_task_suspension_info(kcdata_descriptor_t kcd,task_t task)3699 kcdata_record_task_suspension_info(kcdata_descriptor_t kcd, task_t task)
3700 {
3701 	kern_return_t ret = KERN_SUCCESS;
3702 	struct stackshot_suspension_info suspension_info = {};
3703 	task_suspend_stats_data_t suspend_stats;
3704 	task_suspend_source_array_t suspend_sources;
3705 	struct stackshot_suspension_source suspension_sources[TASK_SUSPEND_SOURCES_MAX];
3706 	int i;
3707 
3708 	if (task == kernel_task) {
3709 		return KERN_SUCCESS;
3710 	}
3711 
3712 	ret = task_get_suspend_stats_kdp(task, &suspend_stats);
3713 	if (ret != KERN_SUCCESS) {
3714 		return ret;
3715 	}
3716 
3717 	suspension_info.tss_count = suspend_stats.tss_count;
3718 	suspension_info.tss_duration = suspend_stats.tss_duration;
3719 	suspension_info.tss_last_end = suspend_stats.tss_last_end;
3720 	suspension_info.tss_last_start = suspend_stats.tss_last_start;
3721 	ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_SUSPENSION_INFO, sizeof(suspension_info), &suspension_info);
3722 	if (ret != KERN_SUCCESS) {
3723 		return ret;
3724 	}
3725 
3726 	ret = task_get_suspend_sources_kdp(task, suspend_sources);
3727 	if (ret != KERN_SUCCESS) {
3728 		return ret;
3729 	}
3730 
3731 	for (i = 0; i < TASK_SUSPEND_SOURCES_MAX; ++i) {
3732 		suspension_sources[i].tss_pid = suspend_sources[i].tss_pid;
3733 		strlcpy(suspension_sources[i].tss_procname, suspend_sources[i].tss_procname, sizeof(suspend_sources[i].tss_procname));
3734 		suspension_sources[i].tss_tid = suspend_sources[i].tss_tid;
3735 		suspension_sources[i].tss_time = suspend_sources[i].tss_time;
3736 	}
3737 	return kcdata_push_array(kcd, STACKSHOT_KCTYPE_SUSPENSION_SOURCE, sizeof(suspension_sources[0]), TASK_SUSPEND_SOURCES_MAX, &suspension_sources);
3738 }
3739 #endif /* CONFIG_TASK_SUSPEND_STATS */
3740 
3741 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)3742 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
3743 {
3744 	kern_return_t error                 = KERN_SUCCESS;
3745 	mach_vm_address_t out_addr          = 0;
3746 	struct transitioning_task_snapshot * cur_tsnap = NULL;
3747 
3748 	int task_pid           = pid_from_task(task);
3749 	/* Is returning -1 ok for terminating task ok ??? */
3750 	uint64_t task_uniqueid = get_task_uniqueid(task);
3751 
3752 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3753 		/*
3754 		 * if this task is a transit task from another one, show the pid as
3755 		 * negative
3756 		 */
3757 		task_pid = 0 - task_pid;
3758 	}
3759 
3760 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
3761 	kcdata_compression_window_open(kcd);
3762 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
3763 	cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
3764 	bzero(cur_tsnap, sizeof(*cur_tsnap));
3765 
3766 	cur_tsnap->tts_unique_pid = task_uniqueid;
3767 	cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task);
3768 	cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
3769 	cur_tsnap->tts_transition_type = transition_type;
3770 	cur_tsnap->tts_pid = task_pid;
3771 
3772 	/* Add the BSD process identifiers */
3773 	if (task_pid != -1 && get_bsdtask_info(task) != NULL) {
3774 		proc_name_kdp(get_bsdtask_info(task), cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
3775 	} else {
3776 		cur_tsnap->tts_p_comm[0] = '\0';
3777 	}
3778 
3779 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
3780 
3781 error_exit:
3782 	return error;
3783 }
3784 
3785 static kern_return_t
3786 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)3787 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
3788 #else
3789 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
3790 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3791 {
3792 	bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3793 	bool collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
3794 #if CONFIG_PERVASIVE_CPI
3795 	bool collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
3796 #endif /* CONFIG_PERVASIVE_CPI */
3797 #if __arm64__
3798 	bool collect_asid            = ((trace_flags & STACKSHOT_ASID) != 0);
3799 #endif
3800 	bool collect_pagetables      = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
3801 
3802 
3803 	kern_return_t error                 = KERN_SUCCESS;
3804 	mach_vm_address_t out_addr          = 0;
3805 	struct task_snapshot_v2 * cur_tsnap = NULL;
3806 #if STACKSHOT_COLLECTS_LATENCY_INFO
3807 	latency_info->cur_tsnap_latency = mach_absolute_time();
3808 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3809 
3810 	int task_pid           = pid_from_task(task);
3811 	uint64_t task_uniqueid = get_task_uniqueid(task);
3812 	void *bsd_info = get_bsdtask_info(task);
3813 	uint64_t proc_starttime_secs = 0;
3814 
3815 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3816 		/*
3817 		 * if this task is a transit task from another one, show the pid as
3818 		 * negative
3819 		 */
3820 		task_pid = 0 - task_pid;
3821 	}
3822 
3823 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
3824 	kcdata_compression_window_open(kcd);
3825 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
3826 	cur_tsnap = (struct task_snapshot_v2 *)out_addr;
3827 	bzero(cur_tsnap, sizeof(*cur_tsnap));
3828 
3829 	cur_tsnap->ts_unique_pid = task_uniqueid;
3830 	cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
3831 	cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
3832 
3833 	struct recount_usage term_usage = { 0 };
3834 	recount_task_terminated_usage(task, &term_usage);
3835 	struct recount_times_mach term_times = recount_usage_times_mach(&term_usage);
3836 	cur_tsnap->ts_user_time_in_terminated_threads = term_times.rtm_user;
3837 	cur_tsnap->ts_system_time_in_terminated_threads = term_times.rtm_system;
3838 
3839 	proc_starttime_kdp(bsd_info, &proc_starttime_secs, NULL, NULL);
3840 	cur_tsnap->ts_p_start_sec = proc_starttime_secs;
3841 	cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
3842 	cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
3843 	cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
3844 	cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
3845 
3846 	cur_tsnap->ts_suspend_count = task->suspend_count;
3847 	cur_tsnap->ts_faults = counter_load(&task->faults);
3848 	cur_tsnap->ts_pageins = counter_load(&task->pageins);
3849 	cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
3850 	cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
3851 	    LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
3852 	cur_tsnap->ts_pid = task_pid;
3853 
3854 	/* Add the BSD process identifiers */
3855 	if (task_pid != -1 && bsd_info != NULL) {
3856 		proc_name_kdp(bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
3857 	} else {
3858 		cur_tsnap->ts_p_comm[0] = '\0';
3859 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
3860 		if (task->task_imp_base != NULL) {
3861 			kdp_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
3862 			    MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
3863 		}
3864 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
3865 	}
3866 
3867 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
3868 
3869 #if CONFIG_COALITIONS
3870 	if (task_pid != -1 && bsd_info != NULL &&
3871 	    (task->coalition[COALITION_TYPE_JETSAM] != NULL)) {
3872 		/*
3873 		 * The jetsam coalition ID is always saved, even if
3874 		 * STACKSHOT_SAVE_JETSAM_COALITIONS is not set.
3875 		 */
3876 		uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
3877 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
3878 	}
3879 #endif /* CONFIG_COALITIONS */
3880 
3881 #if __arm64__
3882 	if (collect_asid && have_pmap) {
3883 		uint32_t asid = PMAP_VASID(task->map->pmap);
3884 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
3885 	}
3886 #endif
3887 
3888 #if STACKSHOT_COLLECTS_LATENCY_INFO
3889 	latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
3890 	latency_info->pmap_latency = mach_absolute_time();
3891 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3892 
3893 	if (collect_pagetables && have_pmap) {
3894 #if SCHED_HYGIENE_DEBUG
3895 		// pagetable dumps can be large; reset the interrupt timeout to avoid a panic
3896 		ml_spin_debug_clear_self();
3897 #endif
3898 		assert(stackshot_ctx.sc_is_singlethreaded);
3899 		size_t bytes_dumped = 0;
3900 		error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stackshot_args.pagetable_mask, &bytes_dumped);
3901 		if (error != KERN_SUCCESS) {
3902 			goto error_exit;
3903 		} else {
3904 			/* Variable size array - better not have it on the stack. */
3905 			kcdata_compression_window_open(kcd);
3906 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
3907 			    sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
3908 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
3909 		}
3910 	}
3911 
3912 #if STACKSHOT_COLLECTS_LATENCY_INFO
3913 	latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
3914 	latency_info->bsd_proc_ids_latency = mach_absolute_time();
3915 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3916 
3917 #if STACKSHOT_COLLECTS_LATENCY_INFO
3918 	latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
3919 	latency_info->end_latency = mach_absolute_time();
3920 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3921 
3922 	if (collect_iostats) {
3923 		kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
3924 	}
3925 
3926 #if CONFIG_PERVASIVE_CPI
3927 	if (collect_instrs_cycles) {
3928 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
3929 	}
3930 #endif /* CONFIG_PERVASIVE_CPI */
3931 
3932 	kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
3933 	kcd_exit_on_error(kcdata_record_task_codesigning_info(kcd, task));
3934 	kcd_exit_on_error(kcdata_record_task_jit_address_range(kcd, task));
3935 
3936 #if CONFIG_TASK_SUSPEND_STATS
3937 	kcd_exit_on_error(kcdata_record_task_suspension_info(kcd, task));
3938 #endif /* CONFIG_TASK_SUSPEND_STATS */
3939 
3940 #if STACKSHOT_COLLECTS_LATENCY_INFO
3941 	latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
3942 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3943 
3944 error_exit:
3945 	return error;
3946 }
3947 
3948 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)3949 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
3950 {
3951 #if !CONFIG_PERVASIVE_CPI
3952 #pragma unused(trace_flags)
3953 #endif /* !CONFIG_PERVASIVE_CPI */
3954 	kern_return_t error                       = KERN_SUCCESS;
3955 	struct task_delta_snapshot_v2 * cur_tsnap = NULL;
3956 	mach_vm_address_t out_addr                = 0;
3957 	(void) trace_flags;
3958 #if __arm64__
3959 	boolean_t collect_asid                    = ((trace_flags & STACKSHOT_ASID) != 0);
3960 #endif
3961 #if CONFIG_PERVASIVE_CPI
3962 	boolean_t collect_instrs_cycles           = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
3963 #endif /* CONFIG_PERVASIVE_CPI */
3964 
3965 	uint64_t task_uniqueid = get_task_uniqueid(task);
3966 
3967 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
3968 
3969 	cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
3970 
3971 	cur_tsnap->tds_unique_pid = task_uniqueid;
3972 	cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
3973 	cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
3974 
3975 	struct recount_usage usage = { 0 };
3976 	recount_task_terminated_usage(task, &usage);
3977 	struct recount_times_mach term_times = recount_usage_times_mach(&usage);
3978 
3979 	cur_tsnap->tds_user_time_in_terminated_threads = term_times.rtm_user;
3980 	cur_tsnap->tds_system_time_in_terminated_threads = term_times.rtm_system;
3981 
3982 	cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
3983 
3984 	cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
3985 	cur_tsnap->tds_suspend_count = task->suspend_count;
3986 	cur_tsnap->tds_faults            = counter_load(&task->faults);
3987 	cur_tsnap->tds_pageins           = counter_load(&task->pageins);
3988 	cur_tsnap->tds_cow_faults        = counter_load(&task->cow_faults);
3989 	cur_tsnap->tds_was_throttled     = (uint32_t)proc_was_throttled_from_task(task);
3990 	cur_tsnap->tds_did_throttle      = (uint32_t)proc_did_throttle_from_task(task);
3991 	cur_tsnap->tds_latency_qos       = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
3992 	    ? LATENCY_QOS_TIER_UNSPECIFIED
3993 	    : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
3994 
3995 #if __arm64__
3996 	if (collect_asid && have_pmap) {
3997 		uint32_t asid = PMAP_VASID(task->map->pmap);
3998 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
3999 		kdp_memcpy((void*)out_addr, &asid, sizeof(asid));
4000 	}
4001 #endif
4002 
4003 #if CONFIG_PERVASIVE_CPI
4004 	if (collect_instrs_cycles) {
4005 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
4006 	}
4007 #endif /* CONFIG_PERVASIVE_CPI */
4008 
4009 error_exit:
4010 	return error;
4011 }
4012 
4013 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)4014 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
4015 {
4016 	kern_return_t error = KERN_SUCCESS;
4017 	mach_vm_address_t out_addr = 0;
4018 
4019 	/* I/O Statistics */
4020 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
4021 	if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
4022 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
4023 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
4024 		_iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
4025 		_iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
4026 		_iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
4027 		_iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
4028 		_iostat->ss_paging_count = thread->thread_io_stats->paging.count;
4029 		_iostat->ss_paging_size = thread->thread_io_stats->paging.size;
4030 		_iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
4031 		_iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
4032 		_iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
4033 		_iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
4034 		_iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
4035 		_iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
4036 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
4037 			_iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
4038 			_iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
4039 		}
4040 	}
4041 
4042 error_exit:
4043 	return error;
4044 }
4045 
4046 bool
machine_trace_thread_validate_kva(vm_offset_t addr)4047 machine_trace_thread_validate_kva(vm_offset_t addr)
4048 {
4049 	return _stackshot_validate_kva(addr, sizeof(uintptr_t));
4050 }
4051 
4052 struct _stackshot_backtrace_context {
4053 	vm_map_t sbc_map;
4054 	vm_offset_t sbc_prev_page;
4055 	vm_offset_t sbc_prev_kva;
4056 	uint32_t sbc_flags;
4057 	bool sbc_allow_faulting;
4058 };
4059 
4060 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)4061 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
4062 {
4063 	struct _stackshot_backtrace_context *ctx = vctx;
4064 	size_t map_page_mask = 0;
4065 	size_t __assert_only map_page_size = kdp_vm_map_get_page_size(ctx->sbc_map,
4066 	    &map_page_mask);
4067 	assert(size < map_page_size);
4068 	if (src & (size - 1)) {
4069 		// The source should be aligned to the size passed in, like a stack
4070 		// frame or word.
4071 		return EINVAL;
4072 	}
4073 
4074 	vm_offset_t src_page = src & ~map_page_mask;
4075 	vm_offset_t src_kva = 0;
4076 
4077 	if (src_page != ctx->sbc_prev_page) {
4078 		uint32_t res = 0;
4079 		uint32_t flags = 0;
4080 		vm_offset_t src_pa = stackshot_find_phys(ctx->sbc_map, src,
4081 		    ctx->sbc_allow_faulting, &res);
4082 
4083 		flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
4084 		flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
4085 		flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
4086 		ctx->sbc_flags |= flags;
4087 		if (src_pa == 0) {
4088 			return EFAULT;
4089 		}
4090 
4091 		src_kva = phystokv(src_pa);
4092 		ctx->sbc_prev_page = src_page;
4093 		ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
4094 	} else {
4095 		src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
4096 	}
4097 
4098 #if KASAN
4099 	/*
4100 	 * KASan does not monitor accesses to userspace pages. Therefore, it is
4101 	 * pointless to maintain a shadow map for them. Instead, they are all
4102 	 * mapped to a single, always valid shadow map page. This approach saves
4103 	 * a considerable amount of shadow map pages which are limited and
4104 	 * precious.
4105 	 */
4106 	kasan_notify_address_nopoison(src_kva, size);
4107 #endif
4108 	memcpy(dst, (const void *)src_kva, size);
4109 
4110 	return 0;
4111 }
4112 
4113 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)4114 kcdata_record_thread_snapshot(kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
4115 {
4116 	boolean_t dispatch_p              = ((trace_flags & STACKSHOT_GET_DQ) != 0);
4117 	boolean_t active_kthreads_only_p  = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4118 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4119 	boolean_t collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
4120 #if CONFIG_PERVASIVE_CPI
4121 	boolean_t collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
4122 #endif /* CONFIG_PERVASIVE_CPI */
4123 	kern_return_t error        = KERN_SUCCESS;
4124 
4125 #if STACKSHOT_COLLECTS_LATENCY_INFO
4126 	struct stackshot_latency_thread latency_info;
4127 	latency_info.cur_thsnap1_latency = mach_absolute_time();
4128 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4129 
4130 	mach_vm_address_t out_addr = 0;
4131 	int saved_count            = 0;
4132 
4133 	struct thread_snapshot_v4 * cur_thread_snap = NULL;
4134 	char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
4135 
4136 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
4137 	cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
4138 
4139 	/* Populate the thread snapshot header */
4140 	cur_thread_snap->ths_ss_flags = 0;
4141 	cur_thread_snap->ths_thread_id = thread_tid(thread);
4142 	cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
4143 	cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
4144 	cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
4145 
4146 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4147 		cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4148 	} else {
4149 		cur_thread_snap->ths_voucher_identifier = 0;
4150 	}
4151 
4152 #if STACKSHOT_COLLECTS_LATENCY_INFO
4153 	latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
4154 	latency_info.dispatch_serial_latency = mach_absolute_time();
4155 	latency_info.dispatch_label_latency = 0;
4156 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4157 
4158 	cur_thread_snap->ths_dqserialnum = 0;
4159 	if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
4160 		uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
4161 		if (dqkeyaddr != 0) {
4162 			uint64_t dqaddr = 0;
4163 			boolean_t copyin_ok = stackshot_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
4164 			if (copyin_ok && dqaddr != 0) {
4165 				uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
4166 				uint64_t dqserialnum = 0;
4167 				copyin_ok = stackshot_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
4168 				if (copyin_ok) {
4169 					cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
4170 					cur_thread_snap->ths_dqserialnum = dqserialnum;
4171 				}
4172 
4173 #if STACKSHOT_COLLECTS_LATENCY_INFO
4174 				latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
4175 				latency_info.dispatch_label_latency = mach_absolute_time();
4176 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4177 
4178 				/* try copying in the queue label */
4179 				uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
4180 				if (label_offs) {
4181 					uint64_t dqlabeladdr = dqaddr + label_offs;
4182 					uint64_t actual_dqlabeladdr = 0;
4183 
4184 					copyin_ok = stackshot_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
4185 					if (copyin_ok && actual_dqlabeladdr != 0) {
4186 						char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
4187 						int len;
4188 
4189 						bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
4190 						len = stackshot_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
4191 						if (len > 0) {
4192 							mach_vm_address_t label_addr = 0;
4193 							kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
4194 							kdp_strlcpy((char*)label_addr, &label_buf[0], len);
4195 						}
4196 					}
4197 				}
4198 #if STACKSHOT_COLLECTS_LATENCY_INFO
4199 				latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
4200 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4201 			}
4202 		}
4203 	}
4204 
4205 #if STACKSHOT_COLLECTS_LATENCY_INFO
4206 	if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
4207 		latency_info.dispatch_serial_latency = 0;
4208 	}
4209 	latency_info.cur_thsnap2_latency = mach_absolute_time();
4210 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4211 
4212 	struct recount_times_mach times = recount_thread_times(thread);
4213 	cur_thread_snap->ths_user_time = times.rtm_user;
4214 	cur_thread_snap->ths_sys_time = times.rtm_system;
4215 
4216 	if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
4217 		cur_thread_snap->ths_ss_flags |= kThreadMain;
4218 	}
4219 	if (thread->effective_policy.thep_darwinbg) {
4220 		cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
4221 	}
4222 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4223 		cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
4224 	}
4225 	if (thread->suspend_count > 0) {
4226 		cur_thread_snap->ths_ss_flags |= kThreadSuspended;
4227 	}
4228 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4229 		cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
4230 	}
4231 #if CONFIG_EXCLAVES
4232 	/* save exclave thread for later collection */
4233 	if ((thread->th_exclaves_state & TH_EXCLAVES_RPC) && stackshot_exclave_inspect_ctids && !stackshot_ctx.sc_panic_stackshot) {
4234 		/* certain threads, like the collector, must never be inspected */
4235 		if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) == 0) {
4236 			uint32_t ctid_index = os_atomic_inc_orig(&stackshot_exclave_inspect_ctid_count, acq_rel);
4237 			if (ctid_index < stackshot_exclave_inspect_ctid_capacity) {
4238 				stackshot_exclave_inspect_ctids[ctid_index] = thread_get_ctid(thread);
4239 			} else {
4240 				os_atomic_store(&stackshot_exclave_inspect_ctid_count, stackshot_exclave_inspect_ctid_capacity, release);
4241 			}
4242 			if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_STACKSHOT) != 0) {
4243 				panic("stackshot: trying to inspect already-queued thread");
4244 			}
4245 		}
4246 	}
4247 #endif /* CONFIG_EXCLAVES */
4248 	if (thread_on_core) {
4249 		cur_thread_snap->ths_ss_flags |= kThreadOnCore;
4250 	}
4251 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4252 		cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
4253 	}
4254 
4255 	/* make sure state flags defined in kcdata.h still match internal flags */
4256 	static_assert(SS_TH_WAIT == TH_WAIT);
4257 	static_assert(SS_TH_SUSP == TH_SUSP);
4258 	static_assert(SS_TH_RUN == TH_RUN);
4259 	static_assert(SS_TH_UNINT == TH_UNINT);
4260 	static_assert(SS_TH_TERMINATE == TH_TERMINATE);
4261 	static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
4262 	static_assert(SS_TH_IDLE == TH_IDLE);
4263 
4264 	cur_thread_snap->ths_last_run_time           = thread->last_run_time;
4265 	cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
4266 	cur_thread_snap->ths_state                   = thread->state;
4267 	cur_thread_snap->ths_sched_flags             = thread->sched_flags;
4268 	cur_thread_snap->ths_base_priority = thread->base_pri;
4269 	cur_thread_snap->ths_sched_priority = thread->sched_pri;
4270 	cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
4271 	cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
4272 	cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
4273 	    thread->requested_policy.thrp_qos_workq_override);
4274 	cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4275 	cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
4276 
4277 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4278 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4279 	cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4280 	cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4281 
4282 #if STACKSHOT_COLLECTS_LATENCY_INFO
4283 	latency_info.cur_thsnap2_latency = mach_absolute_time()  - latency_info.cur_thsnap2_latency;
4284 	latency_info.thread_name_latency = mach_absolute_time();
4285 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4286 
4287 	/* if there is thread name then add to buffer */
4288 	cur_thread_name[0] = '\0';
4289 	proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
4290 	if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
4291 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
4292 		kdp_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
4293 	}
4294 
4295 #if STACKSHOT_COLLECTS_LATENCY_INFO
4296 	latency_info.thread_name_latency = mach_absolute_time()  - latency_info.thread_name_latency;
4297 	latency_info.sur_times_latency = mach_absolute_time();
4298 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4299 
4300 	/* record system, user, and runnable times */
4301 	time_value_t runnable_time;
4302 	thread_read_times(thread, NULL, NULL, &runnable_time);
4303 	clock_sec_t user_sec = 0, system_sec = 0;
4304 	clock_usec_t user_usec = 0, system_usec = 0;
4305 	absolutetime_to_microtime(times.rtm_user, &user_sec, &user_usec);
4306 	absolutetime_to_microtime(times.rtm_system, &system_sec, &system_usec);
4307 
4308 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
4309 	struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
4310 	*stackshot_cpu_times = (struct stackshot_cpu_times_v2){
4311 		.user_usec = user_sec * USEC_PER_SEC + user_usec,
4312 		.system_usec = system_sec * USEC_PER_SEC + system_usec,
4313 		.runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
4314 	};
4315 
4316 #if STACKSHOT_COLLECTS_LATENCY_INFO
4317 	latency_info.sur_times_latency = mach_absolute_time()  - latency_info.sur_times_latency;
4318 	latency_info.user_stack_latency = mach_absolute_time();
4319 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4320 
4321 	/* Trace user stack, if any */
4322 	if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
4323 		uint32_t user_ths_ss_flags = 0;
4324 
4325 		/*
4326 		 * We don't know how big the stacktrace will be, so read it into our
4327 		 * per-cpu buffer, then copy it to the kcdata.
4328 		 */
4329 		struct _stackshot_backtrace_context ctx = {
4330 			.sbc_map = task->map,
4331 			.sbc_allow_faulting = stackshot_ctx.sc_enable_faulting,
4332 			.sbc_prev_page = -1,
4333 			.sbc_prev_kva = -1,
4334 		};
4335 		struct backtrace_control ctl = {
4336 			.btc_user_thread = thread,
4337 			.btc_user_copy = _stackshot_backtrace_copy,
4338 			.btc_user_copy_context = &ctx,
4339 		};
4340 		struct backtrace_user_info info = BTUINFO_INIT;
4341 
4342 		saved_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4343 		    &info);
4344 		if (saved_count > 0) {
4345 #if __LP64__
4346 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
4347 #else // __LP64__
4348 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
4349 #endif // !__LP64__
4350 			/* Now, copy the stacktrace into kcdata. */
4351 			kcd_exit_on_error(kcdata_push_array(kcd, STACKLR_WORDS, sizeof(uintptr_t),
4352 			    saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4353 			if (info.btui_info & BTI_64_BIT) {
4354 				user_ths_ss_flags |= kUser64_p;
4355 			}
4356 			if ((info.btui_info & BTI_TRUNCATED) ||
4357 			    (ctx.sbc_flags & kThreadTruncatedBT)) {
4358 				user_ths_ss_flags |= kThreadTruncatedBT;
4359 				user_ths_ss_flags |= kThreadTruncUserBT;
4360 			}
4361 			user_ths_ss_flags |= ctx.sbc_flags;
4362 			ctx.sbc_flags = 0;
4363 #if __LP64__
4364 			/* We only support async stacks on 64-bit kernels */
4365 			if (info.btui_async_frame_addr != 0) {
4366 				uint32_t async_start_offset = info.btui_async_start_index;
4367 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
4368 				    sizeof(async_start_offset), &async_start_offset));
4369 				ctl.btc_frame_addr = info.btui_async_frame_addr;
4370 				ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
4371 				info = BTUINFO_INIT;
4372 				unsigned int async_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4373 				    &info);
4374 				if (async_count > 0) {
4375 					kcd_exit_on_error(kcdata_push_array(kcd, STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64,
4376 					    sizeof(uintptr_t), async_count, stackshot_cpu_ctx.scc_stack_buffer));
4377 					if ((info.btui_info & BTI_TRUNCATED) ||
4378 					    (ctx.sbc_flags & kThreadTruncatedBT)) {
4379 						user_ths_ss_flags |= kThreadTruncatedBT;
4380 						user_ths_ss_flags |= kThreadTruncUserAsyncBT;
4381 					}
4382 					user_ths_ss_flags |= ctx.sbc_flags;
4383 				}
4384 			}
4385 #endif /* _LP64 */
4386 		}
4387 		if (user_ths_ss_flags != 0) {
4388 			cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
4389 		}
4390 	}
4391 
4392 #if STACKSHOT_COLLECTS_LATENCY_INFO
4393 	latency_info.user_stack_latency = mach_absolute_time()  - latency_info.user_stack_latency;
4394 	latency_info.kernel_stack_latency = mach_absolute_time();
4395 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4396 
4397 	/* Call through to the machine specific trace routines
4398 	 * Frames are added past the snapshot header.
4399 	 */
4400 	if (thread->kernel_stack != 0) {
4401 		uint32_t kern_ths_ss_flags = 0;
4402 #if defined(__LP64__)
4403 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
4404 		extern int machine_trace_thread64(thread_t thread, char *tracepos,
4405 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
4406 		saved_count = machine_trace_thread64(
4407 #else
4408 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
4409 		extern int machine_trace_thread(thread_t thread, char *tracepos,
4410 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
4411 		saved_count = machine_trace_thread(
4412 #endif
4413 			thread, (char*) stackshot_cpu_ctx.scc_stack_buffer,
4414 			(char *) (stackshot_cpu_ctx.scc_stack_buffer + MAX_FRAMES), MAX_FRAMES,
4415 			&kern_ths_ss_flags);
4416 		if (saved_count > 0) {
4417 			int frame_size = sizeof(uintptr_t);
4418 #if defined(__LP64__)
4419 			cur_thread_snap->ths_ss_flags |= kKernel64_p;
4420 #endif
4421 #if CONFIG_EXCLAVES
4422 			if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
4423 				struct thread_exclaves_info info = { 0 };
4424 
4425 				info.tei_flags = kExclaveRPCActive;
4426 				if (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) {
4427 					info.tei_flags |= kExclaveSchedulerRequest;
4428 				}
4429 				if (thread->th_exclaves_state & TH_EXCLAVES_UPCALL) {
4430 					info.tei_flags |= kExclaveUpcallActive;
4431 				}
4432 				info.tei_scid = thread->th_exclaves_ipc_ctx.scid;
4433 				info.tei_thread_offset = exclaves_stack_offset(stackshot_cpu_ctx.scc_stack_buffer, saved_count / frame_size, false);
4434 
4435 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERN_EXCLAVES_THREADINFO, sizeof(struct thread_exclaves_info), &info));
4436 			}
4437 #endif /* CONFIG_EXCLAVES */
4438 			kcd_exit_on_error(kcdata_push_array(kcd, stack_kcdata_type,
4439 			    frame_size, saved_count / frame_size, stackshot_cpu_ctx.scc_stack_buffer));
4440 		}
4441 		if (kern_ths_ss_flags & kThreadTruncatedBT) {
4442 			kern_ths_ss_flags |= kThreadTruncKernBT;
4443 		}
4444 		if (kern_ths_ss_flags != 0) {
4445 			cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
4446 		}
4447 	}
4448 
4449 #if STACKSHOT_COLLECTS_LATENCY_INFO
4450 	latency_info.kernel_stack_latency = mach_absolute_time()  - latency_info.kernel_stack_latency;
4451 	latency_info.misc_latency = mach_absolute_time();
4452 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4453 
4454 #if CONFIG_THREAD_GROUPS
4455 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
4456 		uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
4457 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
4458 		kdp_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
4459 	}
4460 #endif /* CONFIG_THREAD_GROUPS */
4461 
4462 	if (collect_iostats) {
4463 		kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
4464 	}
4465 
4466 #if CONFIG_PERVASIVE_CPI
4467 	if (collect_instrs_cycles) {
4468 		struct recount_usage usage = { 0 };
4469 		recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
4470 		    &usage);
4471 
4472 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
4473 		struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
4474 		    instrs_cycles->ics_instructions = recount_usage_instructions(&usage);
4475 		    instrs_cycles->ics_cycles = recount_usage_cycles(&usage);
4476 	}
4477 #endif /* CONFIG_PERVASIVE_CPI */
4478 
4479 #if STACKSHOT_COLLECTS_LATENCY_INFO
4480 	latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4481 	if (collect_latency_info) {
4482 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
4483 	}
4484 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4485 
4486 error_exit:
4487 	return error;
4488 }
4489 
4490 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)4491 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
4492 {
4493 	cur_thread_snap->tds_thread_id = thread_tid(thread);
4494 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4495 		cur_thread_snap->tds_voucher_identifier  = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4496 	} else {
4497 		cur_thread_snap->tds_voucher_identifier = 0;
4498 	}
4499 
4500 	cur_thread_snap->tds_ss_flags = 0;
4501 	if (thread->effective_policy.thep_darwinbg) {
4502 		cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
4503 	}
4504 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4505 		cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
4506 	}
4507 	if (thread->suspend_count > 0) {
4508 		cur_thread_snap->tds_ss_flags |= kThreadSuspended;
4509 	}
4510 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4511 		cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
4512 	}
4513 	if (thread_on_core) {
4514 		cur_thread_snap->tds_ss_flags |= kThreadOnCore;
4515 	}
4516 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4517 		cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
4518 	}
4519 
4520 	cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
4521 	cur_thread_snap->tds_state                   = thread->state;
4522 	cur_thread_snap->tds_sched_flags             = thread->sched_flags;
4523 	cur_thread_snap->tds_base_priority           = thread->base_pri;
4524 	cur_thread_snap->tds_sched_priority          = thread->sched_pri;
4525 	cur_thread_snap->tds_eqos                    = thread->effective_policy.thep_qos;
4526 	cur_thread_snap->tds_rqos                    = thread->requested_policy.thrp_qos;
4527 	cur_thread_snap->tds_rqos_override           = MAX(thread->requested_policy.thrp_qos_override,
4528 	    thread->requested_policy.thrp_qos_workq_override);
4529 	cur_thread_snap->tds_io_tier                 = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4530 
4531 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4532 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4533 	cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4534 	cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4535 
4536 	return 0;
4537 }
4538 
4539 /*
4540  * Why 12?  12 strikes a decent balance between allocating a large array on
4541  * the stack and having large kcdata item overheads for recording nonrunable
4542  * tasks.
4543  */
4544 #define UNIQUEIDSPERFLUSH 12
4545 
4546 struct saved_uniqueids {
4547 	uint64_t ids[UNIQUEIDSPERFLUSH];
4548 	unsigned count;
4549 };
4550 
4551 enum thread_classification {
4552 	tc_full_snapshot,  /* take a full snapshot */
4553 	tc_delta_snapshot, /* take a delta snapshot */
4554 };
4555 
4556 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)4557 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
4558 {
4559 	processor_t last_processor = thread->last_processor;
4560 
4561 	boolean_t thread_on_core = FALSE;
4562 	if (last_processor != PROCESSOR_NULL) {
4563 		/* Idle threads are always treated as on-core, since the processor state can change while they are running. */
4564 		thread_on_core = (thread == last_processor->idle_thread) ||
4565 		    (last_processor->state == PROCESSOR_RUNNING &&
4566 		    last_processor->active_thread == thread);
4567 	}
4568 
4569 	*thread_on_core_p = thread_on_core;
4570 
4571 	/* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
4572 	 * previous full stackshot */
4573 	if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stackshot_args.since_timestamp)) {
4574 		return tc_full_snapshot;
4575 	} else {
4576 		return tc_delta_snapshot;
4577 	}
4578 }
4579 
4580 
4581 static kern_return_t
kdp_stackshot_record_task(task_t task)4582 kdp_stackshot_record_task(task_t task)
4583 {
4584 	boolean_t active_kthreads_only_p  = ((stackshot_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4585 	boolean_t save_donating_pids_p    = ((stackshot_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
4586 	boolean_t collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4587 	boolean_t save_owner_info         = ((stackshot_flags & STACKSHOT_THREAD_WAITINFO) != 0);
4588 	boolean_t include_drivers         = ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) != 0);
4589 
4590 	kern_return_t error = KERN_SUCCESS;
4591 	mach_vm_address_t out_addr = 0;
4592 	int saved_count = 0;
4593 
4594 	int task_pid                   = 0;
4595 	uint64_t task_uniqueid         = 0;
4596 	int num_delta_thread_snapshots = 0;
4597 	int num_waitinfo_threads       = 0;
4598 	int num_turnstileinfo_threads  = 0;
4599 
4600 	uint64_t task_start_abstime    = 0;
4601 	boolean_t have_map = FALSE, have_pmap = FALSE;
4602 	boolean_t some_thread_ran = FALSE;
4603 	unaligned_u64 task_snap_ss_flags = 0;
4604 #if STACKSHOT_COLLECTS_LATENCY_INFO
4605 	struct stackshot_latency_task latency_info;
4606 	latency_info.setup_latency = mach_absolute_time();
4607 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4608 
4609 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4610 	uint64_t task_begin_cpu_cycle_count = 0;
4611 	if (!stackshot_ctx.sc_panic_stackshot) {
4612 		task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
4613 	}
4614 #endif
4615 
4616 	if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
4617 		error = KERN_FAILURE;
4618 		goto error_exit;
4619 	}
4620 
4621 	void *bsd_info = get_bsdtask_info(task);
4622 	boolean_t task_in_teardown        = (bsd_info == NULL) || proc_in_teardown(bsd_info);// has P_LPEXIT set during proc_exit()
4623 	boolean_t task_in_transition      = task_in_teardown;         // here we can add other types of transition.
4624 	uint32_t  container_type          = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
4625 	uint32_t  transition_type         = (task_in_teardown) ? kTaskIsTerminated : 0;
4626 
4627 	if (task_in_transition) {
4628 		collect_delta_stackshot = FALSE;
4629 	}
4630 
4631 	have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
4632 	have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
4633 
4634 	task_pid = pid_from_task(task);
4635 	/* Is returning -1 ok for terminating task ok ??? */
4636 	task_uniqueid = get_task_uniqueid(task);
4637 
4638 	if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
4639 		/*
4640 		 * Not interested in terminated tasks without threads.
4641 		 */
4642 		if (queue_empty(&task->threads) || task_pid == -1) {
4643 			return KERN_SUCCESS;
4644 		}
4645 	}
4646 
4647 	/* All PIDs should have the MSB unset */
4648 	assert((task_pid & (1ULL << 31)) == 0);
4649 
4650 #if STACKSHOT_COLLECTS_LATENCY_INFO
4651 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
4652 	latency_info.task_uniqueid = task_uniqueid;
4653 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4654 
4655 	/* Trace everything, unless a process was specified. Add in driver tasks if requested. */
4656 	if ((stackshot_args.pid == -1) || (stackshot_args.pid == task_pid) || (include_drivers && task_is_driver(task))) {
4657 #if STACKSHOT_COLLECTS_LATENCY_INFO
4658 		stackshot_cpu_latency.tasks_processed++;
4659 #endif
4660 
4661 		/* add task snapshot marker */
4662 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4663 		    container_type, task_uniqueid));
4664 
4665 		if (collect_delta_stackshot) {
4666 			/*
4667 			 * For delta stackshots we need to know if a thread from this task has run since the
4668 			 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
4669 			 */
4670 			thread_t thread = THREAD_NULL;
4671 			queue_iterate(&task->threads, thread, thread_t, task_threads)
4672 			{
4673 				if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4674 					error = KERN_FAILURE;
4675 					goto error_exit;
4676 				}
4677 
4678 				if (active_kthreads_only_p && thread->kernel_stack == 0) {
4679 					continue;
4680 				}
4681 
4682 				boolean_t thread_on_core;
4683 				enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4684 
4685 				switch (thread_classification) {
4686 				case tc_full_snapshot:
4687 					some_thread_ran = TRUE;
4688 					break;
4689 				case tc_delta_snapshot:
4690 					num_delta_thread_snapshots++;
4691 					break;
4692 				}
4693 			}
4694 		}
4695 
4696 		if (collect_delta_stackshot) {
4697 			proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &task_start_abstime);
4698 		}
4699 
4700 		/* Next record any relevant UUID info and store the task snapshot */
4701 		if (task_in_transition ||
4702 		    !collect_delta_stackshot ||
4703 		    (task_start_abstime == 0) ||
4704 		    (task_start_abstime > stackshot_args.since_timestamp) ||
4705 		    some_thread_ran) {
4706 			/*
4707 			 * Collect full task information in these scenarios:
4708 			 *
4709 			 * 1) a full stackshot or the task is in transition
4710 			 * 2) a delta stackshot where the task started after the previous full stackshot
4711 			 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
4712 			 *
4713 			 * because the task may have exec'ed, changing its name, architecture, load info, etc
4714 			 */
4715 
4716 			kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
4717 			kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, stackshot_flags, have_pmap, &task_snap_ss_flags));
4718 #if STACKSHOT_COLLECTS_LATENCY_INFO
4719 			if (!task_in_transition) {
4720 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags, &latency_info));
4721 			} else {
4722 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4723 			}
4724 #else
4725 			if (!task_in_transition) {
4726 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4727 			} else {
4728 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4729 			}
4730 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4731 		} else {
4732 			kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4733 		}
4734 
4735 #if STACKSHOT_COLLECTS_LATENCY_INFO
4736 		latency_info.misc_latency = mach_absolute_time();
4737 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4738 
4739 		struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
4740 		int current_delta_snapshot_index                  = 0;
4741 		if (num_delta_thread_snapshots > 0) {
4742 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
4743 			    sizeof(struct thread_delta_snapshot_v3),
4744 			    num_delta_thread_snapshots, &out_addr));
4745 			delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
4746 		}
4747 
4748 
4749 #if STACKSHOT_COLLECTS_LATENCY_INFO
4750 		latency_info.task_thread_count_loop_latency = mach_absolute_time();
4751 #endif
4752 		/*
4753 		 * Iterate over the task threads to save thread snapshots and determine
4754 		 * how much space we need for waitinfo and turnstile info
4755 		 */
4756 		thread_t thread = THREAD_NULL;
4757 		queue_iterate(&task->threads, thread, thread_t, task_threads)
4758 		{
4759 			if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4760 				error = KERN_FAILURE;
4761 				goto error_exit;
4762 			}
4763 
4764 			uint64_t thread_uniqueid;
4765 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
4766 				continue;
4767 			}
4768 			thread_uniqueid = thread_tid(thread);
4769 
4770 			boolean_t thread_on_core;
4771 			enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4772 
4773 #if STACKSHOT_COLLECTS_LATENCY_INFO
4774 			stackshot_cpu_latency.threads_processed++;
4775 #endif
4776 
4777 			switch (thread_classification) {
4778 			case tc_full_snapshot:
4779 				/* add thread marker */
4780 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4781 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
4782 
4783 				/* thread snapshot can be large, including strings, avoid overflowing the stack. */
4784 				kcdata_compression_window_open(stackshot_kcdata_p);
4785 
4786 				kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, stackshot_flags, have_pmap, thread_on_core));
4787 
4788 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4789 
4790 				/* mark end of thread snapshot data */
4791 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
4792 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
4793 				break;
4794 			case tc_delta_snapshot:
4795 				kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
4796 				break;
4797 			}
4798 
4799 			/*
4800 			 * We want to report owner information regardless of whether a thread
4801 			 * has changed since the last delta, whether it's a normal stackshot,
4802 			 * or whether it's nonrunnable
4803 			 */
4804 			if (save_owner_info) {
4805 				if (stackshot_thread_has_valid_waitinfo(thread)) {
4806 					num_waitinfo_threads++;
4807 				}
4808 
4809 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
4810 					num_turnstileinfo_threads++;
4811 				}
4812 			}
4813 		}
4814 #if STACKSHOT_COLLECTS_LATENCY_INFO
4815 		latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
4816 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4817 
4818 		thread_waitinfo_v2_t *thread_waitinfo           = NULL;
4819 		thread_turnstileinfo_v2_t *thread_turnstileinfo = NULL;
4820 		int current_waitinfo_index              = 0;
4821 		int current_turnstileinfo_index         = 0;
4822 		/* allocate space for the wait and turnstil info */
4823 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
4824 			/* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
4825 			kcdata_compression_window_open(stackshot_kcdata_p);
4826 
4827 			if (num_waitinfo_threads > 0) {
4828 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
4829 				    sizeof(thread_waitinfo_v2_t), num_waitinfo_threads, &out_addr));
4830 				thread_waitinfo = (thread_waitinfo_v2_t *)out_addr;
4831 			}
4832 
4833 			if (num_turnstileinfo_threads > 0) {
4834 				/* get space for the turnstile info */
4835 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
4836 				    sizeof(thread_turnstileinfo_v2_t), num_turnstileinfo_threads, &out_addr));
4837 				thread_turnstileinfo = (thread_turnstileinfo_v2_t *)out_addr;
4838 			}
4839 
4840 			stackshot_plh_resetgen();  // so we know which portlabel_ids are referenced
4841 		}
4842 
4843 #if STACKSHOT_COLLECTS_LATENCY_INFO
4844 		latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4845 		latency_info.task_thread_data_loop_latency = mach_absolute_time();
4846 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4847 
4848 		/* Iterate over the task's threads to save the wait and turnstile info */
4849 		queue_iterate(&task->threads, thread, thread_t, task_threads)
4850 		{
4851 			uint64_t thread_uniqueid;
4852 			#pragma unused(thread_uniqueid)
4853 
4854 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
4855 				continue;
4856 			}
4857 
4858 			thread_uniqueid = thread_tid(thread);
4859 
4860 			/* If we want owner info, we should capture it regardless of its classification */
4861 			if (save_owner_info) {
4862 				if (stackshot_thread_has_valid_waitinfo(thread)) {
4863 					stackshot_thread_wait_owner_info(
4864 						thread,
4865 						&thread_waitinfo[current_waitinfo_index++]);
4866 				}
4867 
4868 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
4869 					stackshot_thread_turnstileinfo(
4870 						thread,
4871 						&thread_turnstileinfo[current_turnstileinfo_index++]);
4872 				}
4873 			}
4874 		}
4875 
4876 #if STACKSHOT_COLLECTS_LATENCY_INFO
4877 		latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
4878 		latency_info.misc2_latency = mach_absolute_time();
4879 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4880 
4881 #if DEBUG || DEVELOPMENT
4882 		if (current_delta_snapshot_index != num_delta_thread_snapshots) {
4883 			panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
4884 			    num_delta_thread_snapshots, current_delta_snapshot_index);
4885 		}
4886 		if (current_waitinfo_index != num_waitinfo_threads) {
4887 			panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
4888 			    num_waitinfo_threads, current_waitinfo_index);
4889 		}
4890 #endif
4891 
4892 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
4893 			kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4894 			// now, record the portlabel hashes.
4895 			kcd_exit_on_error(kdp_stackshot_plh_record());
4896 		}
4897 
4898 #if IMPORTANCE_INHERITANCE
4899 		if (save_donating_pids_p) {
4900 			/* Ensure the buffer is big enough, since we're using the stack buffer for this. */
4901 			static_assert(TASK_IMP_WALK_LIMIT * sizeof(int32_t) <= MAX_FRAMES * sizeof(uintptr_t));
4902 			saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
4903 			    (char*) stackshot_cpu_ctx.scc_stack_buffer, TASK_IMP_WALK_LIMIT);
4904 			if (saved_count > 0) {
4905 				/* Variable size array - better not have it on the stack. */
4906 				kcdata_compression_window_open(stackshot_kcdata_p);
4907 				kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
4908 				    sizeof(int32_t), saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4909 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4910 			}
4911 		}
4912 #endif
4913 
4914 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4915 		if (!stackshot_ctx.sc_panic_stackshot) {
4916 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
4917 			    "task_cpu_cycle_count"));
4918 		}
4919 #endif
4920 
4921 #if STACKSHOT_COLLECTS_LATENCY_INFO
4922 		latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
4923 		if (collect_latency_info) {
4924 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
4925 		}
4926 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4927 
4928 		/* mark end of task snapshot data */
4929 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
4930 		    task_uniqueid));
4931 	}
4932 
4933 
4934 error_exit:
4935 	return error;
4936 }
4937 
4938 /* Record global shared regions */
4939 static kern_return_t
kdp_stackshot_shared_regions(uint64_t trace_flags)4940 kdp_stackshot_shared_regions(uint64_t trace_flags)
4941 {
4942 	kern_return_t error        = KERN_SUCCESS;
4943 
4944 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4945 	extern queue_head_t vm_shared_region_queue;
4946 	vm_shared_region_t sr;
4947 
4948 	extern queue_head_t vm_shared_region_queue;
4949 	queue_iterate(&vm_shared_region_queue,
4950 	    sr,
4951 	    vm_shared_region_t,
4952 	    sr_q) {
4953 		struct dyld_shared_cache_loadinfo_v2 scinfo = {0};
4954 		if (!_stackshot_validate_kva((vm_offset_t)sr, sizeof(*sr))) {
4955 			break;
4956 		}
4957 		if (collect_delta_stackshot && sr->sr_install_time < stackshot_args.since_timestamp) {
4958 			continue; // only include new shared caches in delta stackshots
4959 		}
4960 		uint32_t sharedCacheFlags = ((sr == primary_system_shared_region) ? kSharedCacheSystemPrimary : 0) |
4961 		    (sr->sr_driverkit ? kSharedCacheDriverkit : 0);
4962 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4963 		    STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
4964 		kdp_memcpy(scinfo.sharedCacheUUID, sr->sr_uuid, sizeof(sr->sr_uuid));
4965 		scinfo.sharedCacheSlide = sr->sr_slide;
4966 		scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_base_address + sr->sr_first_mapping;
4967 		scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
4968 		scinfo.sharedCacheID = sr->sr_id;
4969 		scinfo.sharedCacheFlags = sharedCacheFlags;
4970 
4971 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_INFO,
4972 		    sizeof(scinfo), &scinfo));
4973 
4974 		if ((trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) && sr->sr_images != NULL &&
4975 		    _stackshot_validate_kva((vm_offset_t)sr->sr_images, sr->sr_images_count * sizeof(struct dyld_uuid_info_64))) {
4976 			assert(sr->sr_images_count != 0);
4977 			kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
4978 		}
4979 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
4980 		    STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
4981 	}
4982 
4983 	/*
4984 	 * For backwards compatibility; this will eventually be removed.
4985 	 * Another copy of the Primary System Shared Region, for older readers.
4986 	 */
4987 	sr = primary_system_shared_region;
4988 	/* record system level shared cache load info (if available) */
4989 	if (!collect_delta_stackshot && sr &&
4990 	    _stackshot_validate_kva((vm_offset_t)sr, sizeof(struct vm_shared_region))) {
4991 		struct dyld_shared_cache_loadinfo scinfo = {0};
4992 
4993 		/*
4994 		 * Historically, this data was in a dyld_uuid_info_64 structure, but the
4995 		 * naming of both the structure and fields for this use isn't great.  The
4996 		 * dyld_shared_cache_loadinfo structure has better names, but the same
4997 		 * layout and content as the original.
4998 		 *
4999 		 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
5000 		 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
5001 		 * entries; here, it's the slid base address, and we leave it that way
5002 		 * for backwards compatibility.
5003 		 */
5004 		kdp_memcpy(scinfo.sharedCacheUUID, &sr->sr_uuid, sizeof(sr->sr_uuid));
5005 		scinfo.sharedCacheSlide = sr->sr_slide;
5006 		scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_slide + sr->sr_base_address;
5007 		scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
5008 
5009 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
5010 		    sizeof(scinfo), &scinfo));
5011 
5012 		if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
5013 			/*
5014 			 * Include a map of the system shared cache layout if it has been populated
5015 			 * (which is only when the system is using a custom shared cache).
5016 			 */
5017 			if (sr->sr_images && _stackshot_validate_kva((vm_offset_t)sr->sr_images,
5018 			    (sr->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
5019 				assert(sr->sr_images_count != 0);
5020 				kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
5021 			}
5022 		}
5023 	}
5024 
5025 error_exit:
5026 	return error;
5027 }
5028 
5029 static kern_return_t
kdp_stackshot_kcdata_format(void)5030 kdp_stackshot_kcdata_format(void)
5031 {
5032 	kern_return_t error        = KERN_SUCCESS;
5033 	mach_vm_address_t out_addr = 0;
5034 	uint64_t abs_time = 0;
5035 	uint64_t system_state_flags = 0;
5036 	task_t task = TASK_NULL;
5037 	mach_timebase_info_data_t timebase = {0, 0};
5038 	uint32_t length_to_copy = 0, tmp32 = 0;
5039 	abs_time = mach_absolute_time();
5040 	uint64_t last_task_start_time = 0;
5041 	int cur_workitem_index = 0;
5042 	uint64_t tasks_in_stackshot = 0;
5043 	uint64_t threads_in_stackshot = 0;
5044 
5045 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5046 	uint64_t stackshot_begin_cpu_cycle_count = 0;
5047 
5048 	if (!stackshot_ctx.sc_panic_stackshot) {
5049 		stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5050 	}
5051 #endif
5052 
5053 	/* the CPU entering here is participating in the stackshot */
5054 	stackshot_cpu_ctx.scc_did_work = true;
5055 
5056 #if STACKSHOT_COLLECTS_LATENCY_INFO
5057 	collect_latency_info = stackshot_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
5058 #endif
5059 	/* process the flags */
5060 	bool collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
5061 	bool collect_exclaves        = !disable_exclave_stackshot && ((stackshot_flags & STACKSHOT_SKIP_EXCLAVES) == 0);
5062 	stackshot_ctx.sc_enable_faulting = (stackshot_flags & (STACKSHOT_ENABLE_BT_FAULTING));
5063 
5064 	/* Currently we only support returning explicit KEXT load info on fileset kernels */
5065 	kc_format_t primary_kc_type = KCFormatUnknown;
5066 	if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
5067 		stackshot_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
5068 	}
5069 
5070 	if (sizeof(void *) == 8) {
5071 		system_state_flags |= kKernel64_p;
5072 	}
5073 
5074 #if CONFIG_EXCLAVES
5075 	if (!stackshot_ctx.sc_panic_stackshot && collect_exclaves) {
5076 		kcd_exit_on_error(stackshot_setup_exclave_waitlist()); /* Allocate list of exclave threads */
5077 	}
5078 #else
5079 #pragma unused(collect_exclaves)
5080 #endif /* CONFIG_EXCLAVES */
5081 
5082 	/* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
5083 	clock_timebase_info(&timebase);
5084 
5085 	/* begin saving data into the buffer */
5086 	if (stackshot_ctx.sc_bytes_uncompressed) {
5087 		stackshot_ctx.sc_bytes_uncompressed = 0;
5088 	}
5089 
5090 	/*
5091 	 * Setup pre-task linked kcdata buffer.
5092 	 * The idea here is that we want the kcdata to be in (roughly) the same order as it was
5093 	 * before we made this multithreaded, so we have separate buffers for pre and post task-iteration,
5094 	 * since that's the parallelized part.
5095 	 */
5096 	if (!stackshot_ctx.sc_is_singlethreaded) {
5097 		kcd_exit_on_error(stackshot_new_linked_kcdata());
5098 		stackshot_ctx.sc_pretask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5099 	}
5100 
5101 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, stackshot_flags, "stackshot_in_flags"));
5102 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)stackshot_flags, "stackshot_in_pid"));
5103 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
5104 	if (stackshot_flags & STACKSHOT_PAGE_TABLES) {
5105 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_args.pagetable_mask, "stackshot_pagetable_mask"));
5106 	}
5107 	if (stackshot_initial_estimate != 0) {
5108 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
5109 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
5110 	}
5111 
5112 #if STACKSHOT_COLLECTS_LATENCY_INFO
5113 	stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time();
5114 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5115 
5116 #if CONFIG_JETSAM
5117 	tmp32 = memorystatus_get_pressure_status_kdp();
5118 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
5119 #endif
5120 
5121 	if (!collect_delta_stackshot) {
5122 		tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
5123 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
5124 
5125 		tmp32 = PAGE_SIZE;
5126 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
5127 
5128 		/* save boot-args and osversion string */
5129 		length_to_copy =  MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
5130 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
5131 		length_to_copy = MIN((uint32_t)(strlen(osversion) + 1), OSVERSIZE);
5132 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OS_BUILD_VERSION, length_to_copy, (void *)osversion));
5133 
5134 
5135 		length_to_copy =  MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
5136 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
5137 
5138 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
5139 	} else {
5140 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stackshot_args.since_timestamp));
5141 	}
5142 
5143 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
5144 
5145 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_ctx.sc_microsecs));
5146 
5147 	kcd_exit_on_error(kdp_stackshot_shared_regions(stackshot_flags));
5148 
5149 	/* Add requested information first */
5150 	if (stackshot_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
5151 		struct mem_and_io_snapshot mais = {0};
5152 		kdp_mem_and_io_snapshot(&mais);
5153 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
5154 	}
5155 
5156 #if CONFIG_THREAD_GROUPS
5157 	struct thread_group_snapshot_v3 *thread_groups = NULL;
5158 	int num_thread_groups = 0;
5159 
5160 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5161 	uint64_t thread_group_begin_cpu_cycle_count = 0;
5162 
5163 	if (!stackshot_ctx.sc_is_singlethreaded && (stackshot_flags & STACKSHOT_THREAD_GROUP)) {
5164 		thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5165 	}
5166 #endif
5167 
5168 	/* Iterate over thread group names */
5169 	if (stackshot_flags & STACKSHOT_THREAD_GROUP) {
5170 		/* Variable size array - better not have it on the stack. */
5171 		kcdata_compression_window_open(stackshot_kcdata_p);
5172 
5173 		if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
5174 			stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5175 		}
5176 
5177 		if (num_thread_groups > 0) {
5178 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v3), num_thread_groups, &out_addr));
5179 			thread_groups = (struct thread_group_snapshot_v3 *)out_addr;
5180 		}
5181 
5182 		if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
5183 			error = KERN_FAILURE;
5184 			goto error_exit;
5185 		}
5186 
5187 		kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5188 	}
5189 
5190 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5191 	if (!stackshot_ctx.sc_panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
5192 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
5193 		    "thread_groups_cpu_cycle_count"));
5194 	}
5195 #endif
5196 #else
5197 	stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5198 #endif /* CONFIG_THREAD_GROUPS */
5199 
5200 
5201 #if STACKSHOT_COLLECTS_LATENCY_INFO
5202 	stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.setup_latency_mt;
5203 	if (stackshot_ctx.sc_is_singlethreaded) {
5204 		stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time();
5205 	} else {
5206 		stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time();
5207 	}
5208 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5209 
5210 	bool const process_scoped = (stackshot_args.pid != -1) &&
5211 	    ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
5212 
5213 	/* Iterate over tasks */
5214 	queue_iterate(&tasks, task, task_t, tasks)
5215 	{
5216 		stackshot_panic_guard();
5217 
5218 		if (collect_delta_stackshot) {
5219 			uint64_t abstime;
5220 			proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &abstime);
5221 
5222 			if (abstime > last_task_start_time) {
5223 				last_task_start_time = abstime;
5224 			}
5225 		}
5226 
5227 		pid_t task_pid = pid_from_task(task);
5228 
5229 		if (process_scoped && (task_pid != stackshot_args.pid)) {
5230 			continue;
5231 		}
5232 
5233 		if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5234 		    (!queue_empty(&task->threads) && task_pid != -1)) {
5235 			tasks_in_stackshot++;
5236 			threads_in_stackshot += task->thread_count;
5237 		}
5238 
5239 		/* If this is a singlethreaded stackshot, don't use the work queues. */
5240 		if (stackshot_ctx.sc_is_singlethreaded) {
5241 			kcd_exit_on_error(kdp_stackshot_record_task(task));
5242 		} else {
5243 			kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5244 				.sswi_task = task,
5245 				.sswi_data = NULL,
5246 				.sswi_idx = cur_workitem_index++
5247 			}));
5248 		}
5249 
5250 		if (process_scoped) {
5251 			/* Only targeting one process, we're done now. */
5252 			break;
5253 		}
5254 	}
5255 
5256 #if STACKSHOT_COLLECTS_LATENCY_INFO
5257 	if (stackshot_ctx.sc_is_singlethreaded) {
5258 		stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_task_iteration_latency_mt;
5259 	} else {
5260 		stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.task_queue_building_latency_mt;
5261 	}
5262 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5263 
5264 	/* Setup post-task kcdata buffer */
5265 	if (!stackshot_ctx.sc_is_singlethreaded) {
5266 		stackshot_finalize_linked_kcdata();
5267 		kcd_exit_on_error(stackshot_new_linked_kcdata());
5268 		stackshot_ctx.sc_posttask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5269 	}
5270 
5271 #if CONFIG_COALITIONS
5272 	/* Don't collect jetsam coalition snapshots in delta stackshots - these don't change */
5273 	if (!collect_delta_stackshot || (last_task_start_time > stackshot_args.since_timestamp)) {
5274 		int num_coalitions = 0;
5275 		struct jetsam_coalition_snapshot *coalitions = NULL;
5276 
5277 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5278 		uint64_t coalition_begin_cpu_cycle_count = 0;
5279 
5280 		if (!stackshot_ctx.sc_panic_stackshot && (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
5281 			coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5282 		}
5283 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5284 
5285 		/* Iterate over coalitions */
5286 		if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5287 			if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5288 				stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5289 			}
5290 		}
5291 		if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5292 			if (num_coalitions > 0) {
5293 				/* Variable size array - better not have it on the stack. */
5294 				kcdata_compression_window_open(stackshot_kcdata_p);
5295 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
5296 				coalitions = (struct jetsam_coalition_snapshot*)out_addr;
5297 
5298 				if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5299 					error = KERN_FAILURE;
5300 					goto error_exit;
5301 				}
5302 
5303 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5304 			}
5305 		}
5306 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5307 		if (!stackshot_ctx.sc_panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
5308 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
5309 			    "coalitions_cpu_cycle_count"));
5310 		}
5311 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5312 	}
5313 #else
5314 	stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5315 #endif /* CONFIG_COALITIONS */
5316 
5317 	stackshot_panic_guard();
5318 
5319 #if STACKSHOT_COLLECTS_LATENCY_INFO
5320 	if (stackshot_ctx.sc_is_singlethreaded) {
5321 		stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time();
5322 	} else {
5323 		stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time();
5324 	}
5325 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5326 
5327 	/*
5328 	 * Iterate over the tasks in the terminated tasks list. We only inspect
5329 	 * tasks that have a valid bsd_info pointer. The check for task transition
5330 	 * like past P_LPEXIT during proc_exit() is now checked for inside the
5331 	 * kdp_stackshot_record_task(), and then a safer and minimal
5332 	 * transitioning_task_snapshot struct is collected via
5333 	 * kcdata_record_transitioning_task_snapshot()
5334 	 */
5335 	queue_iterate(&terminated_tasks, task, task_t, tasks)
5336 	{
5337 		stackshot_panic_guard();
5338 
5339 		if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5340 		    (!queue_empty(&task->threads) && pid_from_task(task) != -1)) {
5341 			tasks_in_stackshot++;
5342 			threads_in_stackshot += task->thread_count;
5343 		}
5344 
5345 		/* Only use workqueues on non-panic and non-scoped stackshots. */
5346 		if (stackshot_ctx.sc_is_singlethreaded) {
5347 			kcd_exit_on_error(kdp_stackshot_record_task(task));
5348 		} else {
5349 			kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5350 				.sswi_task = task,
5351 				.sswi_data = NULL,
5352 				.sswi_idx = cur_workitem_index++
5353 			}));
5354 		}
5355 	}
5356 
5357 	/* Mark the queue(s) as populated. */
5358 	for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5359 		os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_populated, true, release);
5360 	}
5361 
5362 #if DEVELOPMENT || DEBUG
5363 	kcd_exit_on_error(kdp_stackshot_plh_stats());
5364 #endif /* DEVELOPMENT || DEBUG */
5365 
5366 #if STACKSHOT_COLLECTS_LATENCY_INFO
5367 	if (stackshot_ctx.sc_is_singlethreaded) {
5368 		stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt;
5369 	} else {
5370 		stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt;
5371 	}
5372 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5373 
5374 #if STACKSHOT_COLLECTS_LATENCY_INFO
5375 	if (collect_latency_info) {
5376 		stackshot_ctx.sc_latency.latency_version = 2;
5377 		stackshot_ctx.sc_latency.main_cpu_number = stackshot_ctx.sc_main_cpuid;
5378 		stackshot_ctx.sc_latency.calling_cpu_number = stackshot_ctx.sc_calling_cpuid;
5379 	}
5380 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5381 
5382 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5383 	if (!stackshot_ctx.sc_panic_stackshot) {
5384 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
5385 		    "stackshot_total_cpu_cycle_cnt"));
5386 	}
5387 #endif
5388 
5389 	kcdata_add_uint64_with_description(stackshot_kcdata_p, tasks_in_stackshot, "stackshot_tasks_count");
5390 	kcdata_add_uint64_with_description(stackshot_kcdata_p, threads_in_stackshot, "stackshot_threads_count");
5391 
5392 	stackshot_panic_guard();
5393 
5394 	if (!stackshot_ctx.sc_is_singlethreaded) {
5395 		/* Chip away at the queue. */
5396 		stackshot_finalize_linked_kcdata();
5397 		stackshot_cpu_do_work();
5398 		*stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_tail->kcdata;
5399 	}
5400 
5401 #if CONFIG_EXCLAVES
5402 	/* If this is the panic stackshot, check if Exclaves panic left its stackshot in the shared region */
5403 	if (stackshot_ctx.sc_panic_stackshot) {
5404 		struct exclaves_panic_stackshot excl_ss;
5405 		kdp_read_panic_exclaves_stackshot(&excl_ss);
5406 
5407 		if (excl_ss.stackshot_buffer != NULL && excl_ss.stackshot_buffer_size != 0) {
5408 			tb_error_t tberr = TB_ERROR_SUCCESS;
5409 			exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_FOUND;
5410 
5411 			/* this block does not escape, so this is okay... */
5412 			kern_return_t *error_in_block = &error;
5413 			kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
5414 			    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5415 			tberr = stackshot_stackshotresult__unmarshal(excl_ss.stackshot_buffer, excl_ss.stackshot_buffer_size, ^(stackshot_stackshotresult_s result){
5416 				*error_in_block = stackshot_exclaves_process_stackshot(&result, stackshot_kcdata_p, false);
5417 			});
5418 			kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5419 			    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5420 			if (tberr != TB_ERROR_SUCCESS) {
5421 				exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_DECODE_FAILED;
5422 			}
5423 		} else {
5424 			exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_NOT_FOUND;
5425 		}
5426 
5427 		/* check error from the block */
5428 		kcd_exit_on_error(error);
5429 	}
5430 #endif
5431 
5432 	/*  === END of populating stackshot data === */
5433 error_exit:;
5434 	if (error != KERN_SUCCESS) {
5435 		stackshot_set_error(error);
5436 	}
5437 
5438 	stackshot_panic_guard();
5439 
5440 	return error;
5441 }
5442 
5443 static uint64_t
proc_was_throttled_from_task(task_t task)5444 proc_was_throttled_from_task(task_t task)
5445 {
5446 	uint64_t was_throttled = 0;
5447 	void *bsd_info = get_bsdtask_info(task);
5448 
5449 	if (bsd_info) {
5450 		was_throttled = proc_was_throttled(bsd_info);
5451 	}
5452 
5453 	return was_throttled;
5454 }
5455 
5456 static uint64_t
proc_did_throttle_from_task(task_t task)5457 proc_did_throttle_from_task(task_t task)
5458 {
5459 	uint64_t did_throttle = 0;
5460 	void *bsd_info = get_bsdtask_info(task);
5461 
5462 	if (bsd_info) {
5463 		did_throttle = proc_did_throttle(bsd_info);
5464 	}
5465 
5466 	return did_throttle;
5467 }
5468 
5469 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)5470 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
5471 {
5472 	unsigned int pages_reclaimed;
5473 	unsigned int pages_wanted;
5474 	kern_return_t kErr;
5475 
5476 	uint64_t compressions = 0;
5477 	uint64_t decompressions = 0;
5478 
5479 	compressions = counter_load(&vm_statistics_compressions);
5480 	decompressions = counter_load(&vm_statistics_decompressions);
5481 
5482 	memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
5483 	memio_snap->free_pages = vm_page_free_count;
5484 	memio_snap->active_pages = vm_page_active_count;
5485 	memio_snap->inactive_pages = vm_page_inactive_count;
5486 	memio_snap->purgeable_pages = vm_page_purgeable_count;
5487 	memio_snap->wired_pages = vm_page_wire_count;
5488 	memio_snap->speculative_pages = vm_page_speculative_count;
5489 	memio_snap->throttled_pages = vm_page_throttled_count;
5490 	memio_snap->busy_buffer_count = count_busy_buffers();
5491 	memio_snap->filebacked_pages = vm_page_pageable_external_count;
5492 	memio_snap->compressions = (uint32_t)compressions;
5493 	memio_snap->decompressions = (uint32_t)decompressions;
5494 	memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
5495 	kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
5496 
5497 	if (!kErr) {
5498 		memio_snap->pages_wanted = (uint32_t)pages_wanted;
5499 		memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
5500 		memio_snap->pages_wanted_reclaimed_valid = 1;
5501 	} else {
5502 		memio_snap->pages_wanted = 0;
5503 		memio_snap->pages_reclaimed = 0;
5504 		memio_snap->pages_wanted_reclaimed_valid = 0;
5505 	}
5506 }
5507 
5508 static vm_offset_t
stackshot_find_phys(vm_map_t map,vm_offset_t target_addr,kdp_fault_flags_t fault_flags,uint32_t * kdp_fault_result_flags)5509 stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags)
5510 {
5511 	vm_offset_t result;
5512 	struct kdp_fault_result fault_results = {0};
5513 	if (stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting) {
5514 		fault_flags &= ~KDP_FAULT_FLAGS_ENABLE_FAULTING;
5515 	}
5516 	if (!stackshot_ctx.sc_panic_stackshot) {
5517 		fault_flags |= KDP_FAULT_FLAGS_MULTICPU;
5518 	}
5519 
5520 	result = kdp_find_phys(map, target_addr, fault_flags, &fault_results);
5521 
5522 	if ((fault_results.flags & KDP_FAULT_RESULT_TRIED_FAULT) || (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN)) {
5523 		stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting += fault_results.time_spent_faulting;
5524 
5525 #if STACKSHOT_COLLECTS_LATENCY_INFO
5526 		stackshot_cpu_latency.faulting_time_mt += fault_results.time_spent_faulting;
5527 #endif
5528 
5529 		if ((stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting >= stackshot_max_fault_time) && !stackshot_ctx.sc_panic_stackshot) {
5530 			stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
5531 		}
5532 	}
5533 
5534 	if (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN) {
5535 		stackshot_cpu_ctx.scc_fault_stats.sfs_pages_faulted_in++;
5536 	}
5537 
5538 	if (kdp_fault_result_flags) {
5539 		*kdp_fault_result_flags = fault_results.flags;
5540 	}
5541 
5542 	return result;
5543 }
5544 
5545 /*
5546  * Wrappers around kdp_generic_copyin, kdp_generic_copyin_word, kdp_generic_copyin_string that use stackshot_find_phys
5547  * in order to:
5548  *   1. collect statistics on the number of pages faulted in
5549  *   2. stop faulting if the time spent faulting has exceeded the limit.
5550  */
5551 static boolean_t
stackshot_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5552 stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5553 {
5554 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5555 	if (try_fault) {
5556 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5557 	}
5558 	return kdp_generic_copyin(map, uaddr, dest, size, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5559 }
5560 static boolean_t
stackshot_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5561 stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5562 {
5563 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5564 	if (try_fault) {
5565 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5566 	}
5567 	return kdp_generic_copyin_word(task, addr, result, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5568 }
5569 static int
stackshot_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5570 stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5571 {
5572 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5573 	if (try_fault) {
5574 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5575 	}
5576 	return kdp_generic_copyin_string(task, addr, buf, buf_sz, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags);
5577 }
5578 
5579 kern_return_t
do_stackshot(void * context)5580 do_stackshot(void *context)
5581 {
5582 #pragma unused(context)
5583 	kern_return_t error;
5584 	size_t queue_size;
5585 	uint64_t abs_time = mach_absolute_time(), abs_time_end = 0;
5586 	kdp_snapshot++;
5587 
5588 	_stackshot_validation_reset();
5589 	error = stackshot_plh_setup(); /* set up port label hash */
5590 
5591 	if (!stackshot_ctx.sc_is_singlethreaded) {
5592 		/* Set up queues. These numbers shouldn't change, but slightly fudge queue size just in case. */
5593 		queue_size = FUDGED_SIZE(tasks_count + terminated_tasks_count, 10);
5594 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5595 			stackshot_ctx.sc_workqueues[i] = (struct stackshot_workqueue) {
5596 				.sswq_items     = stackshot_alloc_arr(struct stackshot_workitem, queue_size, &error),
5597 				.sswq_capacity  = queue_size,
5598 				.sswq_num_items = 0,
5599 				.sswq_cur_item  = 0,
5600 				.sswq_populated = false
5601 			};
5602 			if (error != KERN_SUCCESS) {
5603 				break;
5604 			}
5605 		}
5606 	}
5607 
5608 	if (error != KERN_SUCCESS) {
5609 		stackshot_set_error(error);
5610 		return error;
5611 	}
5612 
5613 	/*
5614 	 * If no main CPU has been selected at this point, (since every CPU has
5615 	 * called stackshot_cpu_preflight by now), then there was no CLPC
5616 	 * recommended P-core available. In that case, we should volunteer ourself
5617 	 * to be the main CPU, because someone has to do it.
5618 	 */
5619 	if (stackshot_ctx.sc_main_cpuid == -1) {
5620 		os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, cpu_number(), acquire);
5621 		stackshot_cpu_ctx.scc_can_work = true;
5622 	}
5623 
5624 	/* After this, auxiliary CPUs can begin work. */
5625 	os_atomic_store(&stackshot_ctx.sc_state, SS_RUNNING, release);
5626 
5627 	/* If we are the main CPU, populate the queues / do other main CPU work. */
5628 	if (stackshot_ctx.sc_panic_stackshot || (stackshot_ctx.sc_main_cpuid == cpu_number())) {
5629 		stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5630 	} else if (stackshot_cpu_ctx.scc_can_work) {
5631 		stackshot_cpu_do_work();
5632 	}
5633 
5634 	/* Wait for every CPU to finish. */
5635 #if STACKSHOT_COLLECTS_LATENCY_INFO
5636 	stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time();
5637 #endif
5638 	if (stackshot_cpu_ctx.scc_can_work) {
5639 		os_atomic_dec(&stackshot_ctx.sc_cpus_working, seq_cst);
5640 		stackshot_cpu_ctx.scc_can_work = false;
5641 	}
5642 	while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
5643 		loop_wait();
5644 	}
5645 	stackshot_panic_guard();
5646 #if STACKSHOT_COLLECTS_LATENCY_INFO
5647 	stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.cpu_wait_latency_mt;
5648 #endif
5649 
5650 	/* update timestamp of the stackshot */
5651 	abs_time_end = mach_absolute_time();
5652 	stackshot_ctx.sc_duration = (struct stackshot_duration_v2) {
5653 		.stackshot_duration       = (abs_time_end - abs_time),
5654 		.stackshot_duration_outer = 0,
5655 		.stackshot_duration_prior = stackshot_duration_prior_abs,
5656 	};
5657 
5658 	stackshot_plh_reset();
5659 
5660 	/* Check interrupts disabled time. */
5661 #if SCHED_HYGIENE_DEBUG
5662 	bool disable_interrupts_masked_check = kern_feature_override(
5663 		KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
5664 	    (stackshot_flags & STACKSHOT_DO_COMPRESS) != 0;
5665 
5666 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
5667 	disable_interrupts_masked_check = true;
5668 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
5669 
5670 	if (disable_interrupts_masked_check) {
5671 		ml_spin_debug_clear_self();
5672 	}
5673 
5674 	if (!stackshot_ctx.sc_panic_stackshot && interrupt_masked_debug_mode) {
5675 		/*
5676 		 * Try to catch instances where stackshot takes too long BEFORE returning from
5677 		 * the debugger
5678 		 */
5679 		ml_handle_stackshot_interrupt_disabled_duration(current_thread());
5680 	}
5681 #endif /* SCHED_HYGIENE_DEBUG */
5682 
5683 	kdp_snapshot--;
5684 
5685 	/* If any other CPU had an error, make sure we return it */
5686 	if (stackshot_ctx.sc_retval == KERN_SUCCESS) {
5687 		stackshot_ctx.sc_retval = stackshot_status_check();
5688 	}
5689 
5690 #if CONFIG_EXCLAVES
5691 	/* Avoid setting AST until as late as possible, in case the stackshot fails */
5692 	if (!stackshot_ctx.sc_panic_stackshot && stackshot_ctx.sc_retval == KERN_SUCCESS) {
5693 		commit_exclaves_ast();
5694 	}
5695 	if (stackshot_ctx.sc_retval != KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
5696 		/* Clear inspection CTID list: no need to wait for these threads */
5697 		stackshot_exclave_inspect_ctid_count = 0;
5698 		stackshot_exclave_inspect_ctid_capacity = 0;
5699 		stackshot_exclave_inspect_ctids = NULL;
5700 	}
5701 #endif
5702 
5703 	/* If this is a singlethreaded stackshot, the "final" kcdata buffer is just our CPU's kcdata buffer */
5704 	if (stackshot_ctx.sc_is_singlethreaded) {
5705 		stackshot_ctx.sc_finalized_kcdata = stackshot_kcdata_p;
5706 	}
5707 
5708 	return stackshot_ctx.sc_retval;
5709 }
5710 
5711 kern_return_t
do_panic_stackshot(void * context)5712 do_panic_stackshot(void *context)
5713 {
5714 	kern_return_t ret = do_stackshot(context);
5715 	if (ret != KERN_SUCCESS) {
5716 		goto out;
5717 	}
5718 
5719 	ret = stackshot_finalize_singlethreaded_kcdata();
5720 
5721 out:
5722 	return ret;
5723 }
5724 
5725 /*
5726  * Set up needed state for this CPU before participating in a stackshot.
5727  * Namely, we want to signal that we're available to do work.
5728  * Called while interrupts are disabled & in the debugger trap.
5729  */
5730 void
stackshot_cpu_preflight(void)5731 stackshot_cpu_preflight(void)
5732 {
5733 	bool is_recommended, is_calling_cpu;
5734 	int my_cpu_no = cpu_number();
5735 
5736 #if STACKSHOT_COLLECTS_LATENCY_INFO
5737 	stackshot_cpu_latency = (typeof(stackshot_cpu_latency)) {
5738 		.cpu_number            =  cpu_number(),
5739 #if defined(__AMP__)
5740 		.cluster_type          =  current_cpu_datap()->cpu_cluster_type,
5741 #else /* __AMP__ */
5742 		.cluster_type = CLUSTER_TYPE_SMP,
5743 #endif /* __AMP__ */
5744 		.faulting_time_mt      = 0,
5745 		.total_buf             = 0,
5746 		.intercluster_buf_used = 0
5747 	};
5748 #if CONFIG_PERVASIVE_CPI
5749 	mt_cur_cpu_cycles_instrs_speculative(&stackshot_cpu_latency.total_cycles, &stackshot_cpu_latency.total_instrs);
5750 #endif /* CONFIG_PERVASIVE_CPI */
5751 	stackshot_cpu_latency.init_latency_mt = stackshot_cpu_latency.total_latency_mt = mach_absolute_time();
5752 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5753 
5754 	is_recommended = current_processor()->is_recommended;
5755 
5756 	/* If this is a recommended P-core (or SMP), try making it the main CPU */
5757 	if (is_recommended
5758 #if defined(__AMP__)
5759 	    && current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_P
5760 #endif /* __AMP__ */
5761 	    ) {
5762 		os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, my_cpu_no, acquire);
5763 	}
5764 
5765 	is_calling_cpu = stackshot_ctx.sc_calling_cpuid == my_cpu_no;
5766 
5767 	stackshot_cpu_ctx.scc_did_work = false;
5768 	stackshot_cpu_ctx.scc_can_work = is_calling_cpu || (is_recommended && !stackshot_ctx.sc_is_singlethreaded);
5769 
5770 	if (stackshot_cpu_ctx.scc_can_work) {
5771 		os_atomic_inc(&stackshot_ctx.sc_cpus_working, relaxed);
5772 	}
5773 }
5774 
5775 __result_use_check
5776 static kern_return_t
stackshot_cpu_work_on_queue(struct stackshot_workqueue * queue)5777 stackshot_cpu_work_on_queue(struct stackshot_workqueue *queue)
5778 {
5779 	struct stackshot_workitem     *cur_workitemp;
5780 	kern_return_t                  error = KERN_SUCCESS;
5781 
5782 	while (((cur_workitemp = stackshot_get_workitem(queue)) != NULL || !os_atomic_load(&queue->sswq_populated, acquire))) {
5783 		/* Check to make sure someone hasn't errored out or panicked. */
5784 		if (__improbable(stackshot_status_check() != KERN_SUCCESS)) {
5785 			return KERN_ABORTED;
5786 		}
5787 
5788 		if (cur_workitemp) {
5789 			kcd_exit_on_error(stackshot_new_linked_kcdata());
5790 			cur_workitemp->sswi_data = stackshot_cpu_ctx.scc_kcdata_head;
5791 			kcd_exit_on_error(kdp_stackshot_record_task(cur_workitemp->sswi_task));
5792 			stackshot_finalize_linked_kcdata();
5793 		} else {
5794 #if STACKSHOT_COLLECTS_LATENCY_INFO
5795 			uint64_t time_begin = mach_absolute_time();
5796 #endif
5797 			loop_wait();
5798 #if STACKSHOT_COLLECTS_LATENCY_INFO
5799 			stackshot_cpu_latency.workqueue_latency_mt += mach_absolute_time() - time_begin;
5800 #endif
5801 		}
5802 	}
5803 
5804 error_exit:
5805 	return error;
5806 }
5807 
5808 static void
stackshot_cpu_do_work(void)5809 stackshot_cpu_do_work(void)
5810 {
5811 	kern_return_t                  error;
5812 
5813 	stackshot_cpu_ctx.scc_stack_buffer = stackshot_alloc_arr(uintptr_t, MAX_FRAMES, &error);
5814 	if (error != KERN_SUCCESS) {
5815 		goto error_exit;
5816 	}
5817 
5818 #if STACKSHOT_COLLECTS_LATENCY_INFO
5819 	stackshot_cpu_latency.init_latency_mt = mach_absolute_time() - stackshot_cpu_latency.init_latency_mt;
5820 #endif
5821 
5822 	bool high_perf = true;
5823 
5824 #if defined(__AMP__)
5825 	if (current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_E) {
5826 		high_perf = false;
5827 	}
5828 #endif /* __AMP__ */
5829 
5830 	if (high_perf) {
5831 		/* Non-E cores: Work from most difficult to least difficult */
5832 		for (size_t i = STACKSHOT_NUM_WORKQUEUES; i > 0; i--) {
5833 			kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i - 1]));
5834 		}
5835 	} else {
5836 		/* E: Work from least difficult to most difficult */
5837 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5838 			kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i]));
5839 		}
5840 	}
5841 #if STACKSHOT_COLLECTS_LATENCY_INFO
5842 	stackshot_cpu_latency.total_latency_mt = mach_absolute_time() - stackshot_cpu_latency.total_latency_mt;
5843 #if CONFIG_PERVASIVE_CPI
5844 	uint64_t cycles, instrs;
5845 	mt_cur_cpu_cycles_instrs_speculative(&cycles, &instrs);
5846 	stackshot_cpu_latency.total_cycles = cycles - stackshot_cpu_latency.total_cycles;
5847 	stackshot_cpu_latency.total_instrs = instrs - stackshot_cpu_latency.total_instrs;
5848 #endif /* CONFIG_PERVASIVE_CPI */
5849 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5850 
5851 error_exit:
5852 	if (error != KERN_SUCCESS) {
5853 		stackshot_set_error(error);
5854 	}
5855 	stackshot_panic_guard();
5856 }
5857 
5858 /*
5859  * This is where the other CPUs will end up when we take a stackshot.
5860  * If they're available to do work, they'll do so here.
5861  * Called with interrupts disabled & from the debugger trap.
5862  */
5863 void
stackshot_aux_cpu_entry(void)5864 stackshot_aux_cpu_entry(void)
5865 {
5866 	/*
5867 	 * This is where the other CPUs will end up when we take a stackshot.
5868 	 * Also, the main CPU will call this in the middle of its work to chip
5869 	 * away at the queue.
5870 	 */
5871 
5872 	/* Don't do work if we said we couldn't... */
5873 	if (!stackshot_cpu_ctx.scc_can_work) {
5874 		return;
5875 	}
5876 
5877 	/* Spin until we're ready to run. */
5878 	while (os_atomic_load(&stackshot_ctx.sc_state, acquire) == SS_SETUP) {
5879 		loop_wait();
5880 	}
5881 
5882 	/* Check to make sure the setup didn't error out or panic. */
5883 	if (stackshot_status_check() != KERN_SUCCESS) {
5884 		goto exit;
5885 	}
5886 
5887 	/* the CPU entering here is participating in the stackshot */
5888 	stackshot_cpu_ctx.scc_did_work = true;
5889 
5890 	if (stackshot_ctx.sc_main_cpuid == cpu_number()) {
5891 		stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5892 	} else {
5893 		stackshot_cpu_do_work();
5894 	}
5895 
5896 exit:
5897 	os_atomic_dec(&stackshot_ctx.sc_cpus_working, release);
5898 }
5899 
5900 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)5901 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
5902 {
5903 	/* When the pthread kext puts a worker thread to sleep, it will
5904 	 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
5905 	 * struct. See parkit() in kern/kern_support.c in libpthread.
5906 	 */
5907 	return (thread->state & TH_WAIT) &&
5908 	       (thread->block_hint == kThreadWaitParkedWorkQueue);
5909 }
5910 
5911 #if CONFIG_COALITIONS
5912 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)5913 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
5914 {
5915 #pragma unused(i, coal)
5916 	unsigned int *coalition_count = (unsigned int*)arg;
5917 	(*coalition_count)++;
5918 }
5919 
5920 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)5921 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
5922 {
5923 	if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
5924 		return;
5925 	}
5926 
5927 	struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
5928 	struct jetsam_coalition_snapshot *jcs = &coalitions[i];
5929 	task_t leader = TASK_NULL;
5930 	jcs->jcs_id = coalition_id(coal);
5931 	jcs->jcs_flags = 0;
5932 	jcs->jcs_thread_group = 0;
5933 
5934 	if (coalition_term_requested(coal)) {
5935 		jcs->jcs_flags |= kCoalitionTermRequested;
5936 	}
5937 	if (coalition_is_terminated(coal)) {
5938 		jcs->jcs_flags |= kCoalitionTerminated;
5939 	}
5940 	if (coalition_is_reaped(coal)) {
5941 		jcs->jcs_flags |= kCoalitionReaped;
5942 	}
5943 	if (coalition_is_privileged(coal)) {
5944 		jcs->jcs_flags |= kCoalitionPrivileged;
5945 	}
5946 
5947 #if CONFIG_THREAD_GROUPS
5948 	struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
5949 	if (thread_group) {
5950 		jcs->jcs_thread_group = thread_group_get_id(thread_group);
5951 	}
5952 #endif /* CONFIG_THREAD_GROUPS */
5953 
5954 	leader = kdp_coalition_get_leader(coal);
5955 	if (leader) {
5956 		jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
5957 	} else {
5958 		jcs->jcs_leader_task_uniqueid = 0;
5959 	}
5960 }
5961 #endif /* CONFIG_COALITIONS */
5962 
5963 #if CONFIG_THREAD_GROUPS
5964 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)5965 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
5966 {
5967 #pragma unused(i, tg)
5968 	unsigned int *n = (unsigned int*)arg;
5969 	(*n)++;
5970 }
5971 
5972 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)5973 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
5974 {
5975 	struct thread_group_snapshot_v3 *thread_groups = arg;
5976 	struct thread_group_snapshot_v3 *tgs = &thread_groups[i];
5977 	const char *name = thread_group_get_name(tg);
5978 	uint32_t flags = thread_group_get_flags(tg);
5979 	tgs->tgs_id = thread_group_get_id(tg);
5980 	static_assert(THREAD_GROUP_MAXNAME > sizeof(tgs->tgs_name));
5981 	kdp_memcpy(tgs->tgs_name, name, sizeof(tgs->tgs_name));
5982 	kdp_memcpy(tgs->tgs_name_cont, name + sizeof(tgs->tgs_name),
5983 	    sizeof(tgs->tgs_name_cont));
5984 	tgs->tgs_flags =
5985 	    ((flags & THREAD_GROUP_FLAGS_EFFICIENT)     ? kThreadGroupEfficient     : 0) |
5986 	    ((flags & THREAD_GROUP_FLAGS_APPLICATION)   ? kThreadGroupApplication   : 0) |
5987 	    ((flags & THREAD_GROUP_FLAGS_CRITICAL)      ? kThreadGroupCritical      : 0) |
5988 	    ((flags & THREAD_GROUP_FLAGS_BEST_EFFORT)   ? kThreadGroupBestEffort    : 0) |
5989 	    ((flags & THREAD_GROUP_FLAGS_UI_APP)        ? kThreadGroupUIApplication : 0) |
5990 	    ((flags & THREAD_GROUP_FLAGS_MANAGED)       ? kThreadGroupManaged       : 0) |
5991 	    ((flags & THREAD_GROUP_FLAGS_STRICT_TIMERS) ? kThreadGroupStrictTimers  : 0) |
5992 	    0;
5993 }
5994 #endif /* CONFIG_THREAD_GROUPS */
5995 
5996 /* Determine if a thread has waitinfo that stackshot can provide */
5997 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)5998 stackshot_thread_has_valid_waitinfo(thread_t thread)
5999 {
6000 	if (!(thread->state & TH_WAIT)) {
6001 		return 0;
6002 	}
6003 
6004 	switch (thread->block_hint) {
6005 	// If set to None or is a parked work queue, ignore it
6006 	case kThreadWaitParkedWorkQueue:
6007 	case kThreadWaitNone:
6008 		return 0;
6009 	// There is a short window where the pthread kext removes a thread
6010 	// from its ksyn wait queue before waking the thread up
6011 	case kThreadWaitPThreadMutex:
6012 	case kThreadWaitPThreadRWLockRead:
6013 	case kThreadWaitPThreadRWLockWrite:
6014 	case kThreadWaitPThreadCondVar:
6015 		return kdp_pthread_get_thread_kwq(thread) != NULL;
6016 	// All other cases are valid block hints if in a wait state
6017 	default:
6018 		return 1;
6019 	}
6020 }
6021 
6022 /* Determine if a thread has turnstileinfo that stackshot can provide */
6023 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)6024 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
6025 {
6026 	struct turnstile *ts = thread_get_waiting_turnstile(thread);
6027 
6028 	return stackshot_thread_has_valid_waitinfo(thread) &&
6029 	       ts != TURNSTILE_NULL;
6030 }
6031 
6032 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_v2_t * tsinfo)6033 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo)
6034 {
6035 	struct turnstile *ts;
6036 	struct ipc_service_port_label *ispl = NULL;
6037 
6038 	/* acquire turnstile information and store it in the stackshot */
6039 	ts = thread_get_waiting_turnstile(thread);
6040 	tsinfo->waiter = thread_tid(thread);
6041 	kdp_turnstile_fill_tsinfo(ts, tsinfo, &ispl);
6042 	tsinfo->portlabel_id = stackshot_plh_lookup(ispl,
6043 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_SENDPORT) ? STACKSHOT_PLH_LOOKUP_SEND :
6044 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_RECEIVEPORT) ? STACKSHOT_PLH_LOOKUP_RECEIVE :
6045 	    STACKSHOT_PLH_LOOKUP_UNKNOWN);
6046 }
6047 
6048 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_v2_t * waitinfo)6049 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t *waitinfo)
6050 {
6051 	thread_waitinfo_t *waitinfo_v1 = (thread_waitinfo_t *)waitinfo;
6052 	struct ipc_service_port_label *ispl = NULL;
6053 
6054 	waitinfo->waiter        = thread_tid(thread);
6055 	waitinfo->wait_type     = thread->block_hint;
6056 	waitinfo->wait_flags    = 0;
6057 
6058 	switch (waitinfo->wait_type) {
6059 	case kThreadWaitKernelMutex:
6060 		kdp_lck_mtx_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6061 		break;
6062 	case kThreadWaitPortReceive:
6063 		kdp_mqueue_recv_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6064 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_RECEIVE);
6065 		break;
6066 	case kThreadWaitPortSend:
6067 		kdp_mqueue_send_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6068 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_SEND);
6069 		break;
6070 	case kThreadWaitSemaphore:
6071 		kdp_sema_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6072 		break;
6073 	case kThreadWaitUserLock:
6074 		kdp_ulock_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6075 		break;
6076 	case kThreadWaitKernelRWLockRead:
6077 	case kThreadWaitKernelRWLockWrite:
6078 	case kThreadWaitKernelRWLockUpgrade:
6079 		kdp_rwlck_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6080 		break;
6081 	case kThreadWaitPThreadMutex:
6082 	case kThreadWaitPThreadRWLockRead:
6083 	case kThreadWaitPThreadRWLockWrite:
6084 	case kThreadWaitPThreadCondVar:
6085 		kdp_pthread_find_owner(thread, waitinfo_v1);
6086 		break;
6087 	case kThreadWaitWorkloopSyncWait:
6088 		kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo_v1);
6089 		break;
6090 	case kThreadWaitOnProcess:
6091 		kdp_wait4_find_process(thread, thread->wait_event, waitinfo_v1);
6092 		break;
6093 	case kThreadWaitSleepWithInheritor:
6094 		kdp_sleep_with_inheritor_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6095 		break;
6096 	case kThreadWaitEventlink:
6097 		kdp_eventlink_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6098 		break;
6099 	case kThreadWaitCompressor:
6100 		kdp_compressor_busy_find_owner(thread->wait_event, waitinfo_v1);
6101 		break;
6102 #ifdef CONFIG_EXCLAVES
6103 	case kThreadWaitExclaveCore:
6104 	case kThreadWaitExclaveKit:
6105 		kdp_esync_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6106 		break;
6107 #endif /* CONFIG_EXCLAVES */
6108 	case kThreadWaitPageBusy:
6109 		kdp_vm_page_sleep_find_owner(thread->wait_event, waitinfo_v1);
6110 		break;
6111 	case kThreadWaitPagingInProgress:
6112 	case kThreadWaitPagingActivity:
6113 	case kThreadWaitPagerInit:
6114 	case kThreadWaitPagerReady:
6115 	case kThreadWaitMemoryBlocked:
6116 	case kThreadWaitPageInThrottle:
6117 		kdp_vm_object_sleep_find_owner(thread->wait_event, waitinfo->wait_type, waitinfo_v1);
6118 		break;
6119 	default:
6120 		waitinfo->owner = 0;
6121 		waitinfo->context = 0;
6122 		break;
6123 	}
6124 }
6125