1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/kdebug_common.h>
24 #include <vm/vm_kern_xnu.h>
25
26 LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug");
27 int kdbg_debug = 0;
28
29 extern struct kd_control kd_control_trace, kd_control_triage;
30
31 int
kdebug_storage_lock(struct kd_control * kd_ctrl_page)32 kdebug_storage_lock(struct kd_control *kd_ctrl_page)
33 {
34 int intrs_en = ml_set_interrupts_enabled(false);
35 lck_spin_lock_grp(&kd_ctrl_page->kdc_storage_lock, &kdebug_lck_grp);
36 return intrs_en;
37 }
38
39 void
kdebug_storage_unlock(struct kd_control * kd_ctrl_page,int intrs_en)40 kdebug_storage_unlock(struct kd_control *kd_ctrl_page, int intrs_en)
41 {
42 lck_spin_unlock(&kd_ctrl_page->kdc_storage_lock);
43 ml_set_interrupts_enabled(intrs_en);
44 }
45
46 // Turn on boot tracing and set the number of events.
47 static TUNABLE(unsigned int, new_nkdbufs, "trace", 0);
48 // Enable wrapping during boot tracing.
49 TUNABLE(unsigned int, trace_wrap, "trace_wrap", 0);
50 // The filter description to apply to boot tracing.
51 static TUNABLE_STR(trace_typefilter, 256, "trace_typefilter", "");
52
53 // Turn on wake tracing and set the number of events.
54 TUNABLE(unsigned int, wake_nkdbufs, "trace_wake", 0);
55 // Write trace events to a file in the event of a panic.
56 TUNABLE(unsigned int, write_trace_on_panic, "trace_panic", 0);
57
58 // Obsolete leak logging system.
59 TUNABLE(int, log_leaks, "-l", 0);
60
61 void
kdebug_startup(void)62 kdebug_startup(void)
63 {
64 lck_spin_init(&kd_control_trace.kdc_storage_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
65 lck_spin_init(&kd_control_triage.kdc_storage_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
66 kdebug_init(new_nkdbufs, trace_typefilter,
67 (trace_wrap ? KDOPT_WRAPPING : 0) | KDOPT_ATBOOT);
68 create_buffers_triage();
69 }
70
71 uint32_t
kdbg_cpu_count(void)72 kdbg_cpu_count(void)
73 {
74 #if defined(__x86_64__)
75 return ml_early_cpu_max_number() + 1;
76 #else // defined(__x86_64__)
77 return ml_get_cpu_count();
78 #endif // !defined(__x86_64__)
79 }
80
81 /*
82 * Both kdebug_timestamp and kdebug_using_continuous_time are known
83 * to kexts. And going forward we always want to use mach_continuous_time().
84 * So we keep these 2 routines as-is to keep the TRACE mode use outside
85 * the kernel intact. TRIAGE mode will explicitly only use mach_continuous_time()
86 * for its timestamp.
87 */
88 bool
kdebug_using_continuous_time(void)89 kdebug_using_continuous_time(void)
90 {
91 return kd_control_trace.kdc_flags & KDBG_CONTINUOUS_TIME;
92 }
93
94 uint64_t
kdebug_timestamp(void)95 kdebug_timestamp(void)
96 {
97 if (kdebug_using_continuous_time()) {
98 return mach_continuous_time();
99 } else {
100 return mach_absolute_time();
101 }
102 }
103
104 int
create_buffers(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,vm_tag_t tag)105 create_buffers(
106 struct kd_control *kd_ctrl_page,
107 struct kd_buffer *kd_data_page,
108 vm_tag_t tag)
109 {
110 unsigned int i;
111 unsigned int p_buffer_size;
112 unsigned int f_buffer_size;
113 unsigned int f_buffers;
114 int error = 0;
115 int ncpus, count_storage_units = 0;
116
117 struct kd_bufinfo *kdbip = NULL;
118 struct kd_region *kd_bufs = NULL;
119 int kdb_storage_count = kd_data_page->kdb_storage_count;
120
121 ncpus = kd_ctrl_page->alloc_cpus;
122
123 kdbip = kalloc_type_tag(struct kd_bufinfo, ncpus, Z_WAITOK | Z_ZERO, tag);
124 if (kdbip == NULL) {
125 error = ENOSPC;
126 goto out;
127 }
128 kd_data_page->kdb_info = kdbip;
129
130 f_buffers = kdb_storage_count / N_STORAGE_UNITS_PER_BUFFER;
131 kd_data_page->kdb_region_count = f_buffers;
132
133 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
134 p_buffer_size = (kdb_storage_count % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
135
136 if (p_buffer_size) {
137 kd_data_page->kdb_region_count++;
138 }
139
140 if (kd_data_page->kdcopybuf == 0) {
141 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_data_page->kdcopybuf,
142 (vm_size_t) kd_ctrl_page->kdebug_kdcopybuf_size,
143 KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
144 error = ENOSPC;
145 goto out;
146 }
147 }
148
149 kd_bufs = kalloc_type_tag(struct kd_region, kd_data_page->kdb_region_count,
150 Z_WAITOK | Z_ZERO, tag);
151 if (kd_bufs == NULL) {
152 error = ENOSPC;
153 goto out;
154 }
155 kd_data_page->kd_bufs = kd_bufs;
156
157 for (i = 0; i < f_buffers; i++) {
158 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdr_addr,
159 (vm_size_t)f_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
160 error = ENOSPC;
161 goto out;
162 }
163
164 kd_bufs[i].kdr_size = f_buffer_size;
165 }
166 if (p_buffer_size) {
167 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdr_addr,
168 (vm_size_t)p_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
169 error = ENOSPC;
170 goto out;
171 }
172
173 kd_bufs[i].kdr_size = p_buffer_size;
174 }
175
176 count_storage_units = 0;
177 for (i = 0; i < kd_data_page->kdb_region_count; i++) {
178 struct kd_storage *kds;
179 uint16_t n_elements;
180 static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX);
181 assert(kd_bufs[i].kdr_size <= N_STORAGE_UNITS_PER_BUFFER *
182 sizeof(struct kd_storage));
183
184 n_elements = kd_bufs[i].kdr_size / sizeof(struct kd_storage);
185 kds = kd_bufs[i].kdr_addr;
186
187 for (uint16_t n = 0; n < n_elements; n++) {
188 kds[n].kds_next.buffer_index = kd_ctrl_page->kds_free_list.buffer_index;
189 kds[n].kds_next.offset = kd_ctrl_page->kds_free_list.offset;
190
191 kd_ctrl_page->kds_free_list.buffer_index = i;
192 kd_ctrl_page->kds_free_list.offset = n;
193 }
194 count_storage_units += n_elements;
195 }
196
197 kd_data_page->kdb_storage_count = count_storage_units;
198
199 for (i = 0; i < ncpus; i++) {
200 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
201 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
202 kdbip[i].kd_lostevents = false;
203 kdbip[i].num_bufs = 0;
204 }
205
206 kd_ctrl_page->kdc_flags |= KDBG_BUFINIT;
207
208 kd_ctrl_page->kdc_storage_used = 0;
209 out:
210 if (error) {
211 delete_buffers(kd_ctrl_page, kd_data_page);
212 }
213
214 return error;
215 }
216
217 void
delete_buffers(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page)218 delete_buffers(struct kd_control *kd_ctrl_page,
219 struct kd_buffer *kd_data_page)
220 {
221 unsigned int i;
222 int kdb_region_count = kd_data_page->kdb_region_count;
223
224 struct kd_bufinfo *kdbip = kd_data_page->kdb_info;
225 struct kd_region *kd_bufs = kd_data_page->kd_bufs;
226
227 if (kd_bufs) {
228 for (i = 0; i < kdb_region_count; i++) {
229 if (kd_bufs[i].kdr_addr) {
230 kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdr_addr, (vm_size_t)kd_bufs[i].kdr_size);
231 }
232 }
233 kfree_type(struct kd_region, kdb_region_count, kd_bufs);
234
235 kd_data_page->kd_bufs = NULL;
236 kd_data_page->kdb_region_count = 0;
237 }
238 if (kd_data_page->kdcopybuf) {
239 kmem_free(kernel_map, (vm_offset_t)kd_data_page->kdcopybuf, kd_ctrl_page->kdebug_kdcopybuf_size);
240
241 kd_data_page->kdcopybuf = NULL;
242 }
243 kd_ctrl_page->kds_free_list.raw = KDS_PTR_NULL;
244
245 if (kdbip) {
246 kfree_type(struct kd_bufinfo, kd_ctrl_page->alloc_cpus, kdbip);
247 kd_data_page->kdb_info = NULL;
248 }
249 kd_ctrl_page->kdc_coprocs = NULL;
250 kd_ctrl_page->kdebug_cpus = 0;
251 kd_ctrl_page->alloc_cpus = 0;
252 kd_ctrl_page->kdc_flags &= ~KDBG_BUFINIT;
253 }
254
255 static bool
allocate_storage_unit(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,int cpu)256 allocate_storage_unit(struct kd_control *kd_ctrl_page,
257 struct kd_buffer *kd_data_page, int cpu)
258 {
259 union kds_ptr kdsp;
260 struct kd_storage *kdsp_actual, *kdsp_next_actual;
261 struct kd_bufinfo *kdbip, *kdbp, *kdbp_vict, *kdbp_try;
262 uint64_t oldest_ts, ts;
263 bool retval = true;
264 struct kd_region *kd_bufs;
265
266 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
267
268 kdbp = &kd_data_page->kdb_info[cpu];
269 kd_bufs = kd_data_page->kd_bufs;
270 kdbip = kd_data_page->kdb_info;
271
272 /* If someone beat us to the allocate, return success */
273 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
274 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail);
275
276 if (kdsp_actual->kds_bufindx < kd_ctrl_page->kdebug_events_per_storage_unit) {
277 goto out;
278 }
279 }
280
281 if ((kdsp = kd_ctrl_page->kds_free_list).raw != KDS_PTR_NULL) {
282 /*
283 * If there's a free page, grab it from the free list.
284 */
285 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
286 kd_ctrl_page->kds_free_list = kdsp_actual->kds_next;
287
288 kd_ctrl_page->kdc_storage_used++;
289 } else {
290 /*
291 * Otherwise, we're going to lose events and repurpose the oldest
292 * storage unit we can find.
293 */
294 if (kd_ctrl_page->kdc_live_flags & KDBG_NOWRAP) {
295 kd_ctrl_page->kdc_emit = KDEMIT_DISABLE;
296 kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED;
297 kdebug_enable = 0;
298 kd_ctrl_page->enabled = 0;
299 commpage_update_kdebug_state();
300 kdbp->kd_lostevents = true;
301 retval = false;
302 goto out;
303 }
304 kdbp_vict = NULL;
305 oldest_ts = UINT64_MAX;
306
307 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page->kdebug_cpus]; kdbp_try++) {
308 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
309 /*
310 * no storage unit to steal
311 */
312 continue;
313 }
314
315 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_try->kd_list_head);
316
317 if (kdsp_actual->kds_bufcnt < kd_ctrl_page->kdebug_events_per_storage_unit) {
318 /*
319 * make sure we don't steal the storage unit
320 * being actively recorded to... need to
321 * move on because we don't want an out-of-order
322 * set of events showing up later
323 */
324 continue;
325 }
326
327 /*
328 * When wrapping, steal the storage unit with the
329 * earliest timestamp on its last event, instead of the
330 * earliest timestamp on the first event. This allows a
331 * storage unit with more recent events to be preserved,
332 * even if the storage unit contains events that are
333 * older than those found in other CPUs.
334 */
335 ts = kdbg_get_timestamp(&kdsp_actual->kds_records[kd_ctrl_page->kdebug_events_per_storage_unit - 1]);
336 if (ts < oldest_ts) {
337 oldest_ts = ts;
338 kdbp_vict = kdbp_try;
339 }
340 }
341 if (kdbp_vict == NULL && kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
342 kd_ctrl_page->kdc_emit = KDEMIT_DISABLE;
343 kdebug_enable = 0;
344 kd_ctrl_page->enabled = 0;
345 commpage_update_kdebug_state();
346 retval = false;
347 goto out;
348 }
349 kdsp = kdbp_vict->kd_list_head;
350 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
351 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
352
353 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
354 kdsp_next_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_vict->kd_list_head);
355 kdsp_next_actual->kds_lostevents = true;
356 } else {
357 kdbp_vict->kd_lostevents = true;
358 }
359
360 if (kd_ctrl_page->kdc_oldest_time < oldest_ts) {
361 kd_ctrl_page->kdc_oldest_time = oldest_ts;
362 }
363 kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED;
364 }
365
366 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
367 kdsp_actual->kds_timestamp = kdebug_timestamp();
368 } else {
369 kdsp_actual->kds_timestamp = mach_continuous_time();
370 }
371
372 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
373 kdsp_actual->kds_bufcnt = 0;
374 kdsp_actual->kds_readlast = 0;
375
376 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
377 kdbp->kd_lostevents = false;
378 kdsp_actual->kds_bufindx = 0;
379
380 if (kdbp->kd_list_head.raw == KDS_PTR_NULL) {
381 kdbp->kd_list_head = kdsp;
382 } else {
383 POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail)->kds_next = kdsp;
384 }
385 kdbp->kd_list_tail = kdsp;
386 out:
387 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
388
389 return retval;
390 }
391
392 static void
release_storage_unit(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,int cpu,uint32_t kdsp_raw)393 release_storage_unit(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, int cpu, uint32_t kdsp_raw)
394 {
395 struct kd_storage *kdsp_actual;
396 struct kd_bufinfo *kdbp;
397 union kds_ptr kdsp;
398
399 kdbp = &kd_data_page->kdb_info[cpu];
400
401 kdsp.raw = kdsp_raw;
402
403 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
404
405 if (kdsp.raw == kdbp->kd_list_head.raw) {
406 /*
407 * it's possible for the storage unit pointed to
408 * by kdsp to have already been stolen... so
409 * check to see if it's still the head of the list
410 * now that we're behind the lock that protects
411 * adding and removing from the queue...
412 * since we only ever release and steal units from
413 * that position, if it's no longer the head
414 * we having nothing to do in this context
415 */
416 kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kdsp);
417 kdbp->kd_list_head = kdsp_actual->kds_next;
418
419 kdsp_actual->kds_next = kd_ctrl_page->kds_free_list;
420 kd_ctrl_page->kds_free_list = kdsp;
421
422 kd_ctrl_page->kdc_storage_used--;
423 }
424
425 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
426 }
427
428 bool
kdebug_disable_wrap(struct kd_control * ctl,kdebug_emit_filter_t * old_emit,kdebug_live_flags_t * old_live)429 kdebug_disable_wrap(struct kd_control *ctl,
430 kdebug_emit_filter_t *old_emit, kdebug_live_flags_t *old_live)
431 {
432 int intrs_en = kdebug_storage_lock(ctl);
433
434 *old_emit = ctl->kdc_emit;
435 *old_live = ctl->kdc_live_flags;
436
437 bool wrapped = ctl->kdc_live_flags & KDBG_WRAPPED;
438 ctl->kdc_live_flags &= ~KDBG_WRAPPED;
439 ctl->kdc_live_flags |= KDBG_NOWRAP;
440
441 kdebug_storage_unlock(ctl, intrs_en);
442
443 return wrapped;
444 }
445
446 static void
_enable_wrap(struct kd_control * kd_ctrl_page,kdebug_emit_filter_t emit)447 _enable_wrap(struct kd_control *kd_ctrl_page, kdebug_emit_filter_t emit)
448 {
449 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
450 kd_ctrl_page->kdc_live_flags &= ~KDBG_NOWRAP;
451 if (emit) {
452 kd_ctrl_page->kdc_emit = emit;
453 }
454 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
455 }
456
457 __attribute__((always_inline))
458 void
kernel_debug_write(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,struct kd_record kd_rec)459 kernel_debug_write(struct kd_control *kd_ctrl_page,
460 struct kd_buffer *kd_data_page,
461 struct kd_record kd_rec)
462 {
463 uint64_t now = 0;
464 uint32_t bindx;
465 kd_buf *kd;
466 int cpu;
467 struct kd_bufinfo *kdbp;
468 struct kd_storage *kdsp_actual;
469 union kds_ptr kds_raw;
470
471 disable_preemption();
472
473 if (kd_ctrl_page->enabled == 0) {
474 goto out;
475 }
476
477 if (kd_rec.cpu == -1) {
478 cpu = cpu_number();
479 } else {
480 cpu = kd_rec.cpu;
481 }
482
483 kdbp = &kd_data_page->kdb_info[cpu];
484
485 bool timestamp_is_continuous = kdbp->continuous_timestamps;
486
487 if (kd_rec.timestamp != -1) {
488 if (kdebug_using_continuous_time()) {
489 if (!timestamp_is_continuous) {
490 kd_rec.timestamp = absolutetime_to_continuoustime(kd_rec.timestamp);
491 }
492 } else {
493 if (timestamp_is_continuous) {
494 kd_rec.timestamp = continuoustime_to_absolutetime(kd_rec.timestamp);
495 }
496 }
497 kd_rec.timestamp &= KDBG_TIMESTAMP_MASK;
498 if (kd_rec.timestamp < kd_ctrl_page->kdc_oldest_time) {
499 if (kdbp->latest_past_event_timestamp < kd_rec.timestamp) {
500 kdbp->latest_past_event_timestamp = kd_rec.timestamp;
501 }
502 goto out;
503 }
504 }
505
506 retry_q:
507 kds_raw = kdbp->kd_list_tail;
508
509 if (kds_raw.raw != KDS_PTR_NULL) {
510 kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kds_raw);
511 bindx = kdsp_actual->kds_bufindx;
512 } else {
513 kdsp_actual = NULL;
514 bindx = kd_ctrl_page->kdebug_events_per_storage_unit;
515 }
516
517 if (kdsp_actual == NULL || bindx >= kd_ctrl_page->kdebug_events_per_storage_unit) {
518 if (allocate_storage_unit(kd_ctrl_page, kd_data_page, cpu) == false) {
519 /*
520 * this can only happen if wrapping
521 * has been disabled
522 */
523 goto out;
524 }
525 goto retry_q;
526 }
527
528 if (kd_rec.timestamp != -1) {
529 /*
530 * IOP entries can be allocated before xnu allocates and inits the buffer
531 * And, Intel uses a special 0 value as a early tracing timestamp sentinel
532 * to set the start of trace-time-start-of-interest.
533 */
534 if (kd_rec.timestamp < kdsp_actual->kds_timestamp) {
535 kdsp_actual->kds_timestamp = kd_rec.timestamp;
536 }
537 now = kd_rec.timestamp;
538 } else {
539 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
540 now = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
541 } else {
542 now = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
543 }
544 }
545
546 if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
547 goto retry_q;
548 }
549
550 kd = &kdsp_actual->kds_records[bindx];
551
552 if (kd_ctrl_page->kdc_flags & KDBG_DEBUGID_64) {
553 /*DebugID has been passed in arg 4*/
554 kd->debugid = 0;
555 } else {
556 kd->debugid = kd_rec.debugid;
557 }
558
559 kd->arg1 = kd_rec.arg1;
560 kd->arg2 = kd_rec.arg2;
561 kd->arg3 = kd_rec.arg3;
562 kd->arg4 = kd_rec.arg4;
563 kd->arg5 = kd_rec.arg5;
564
565 kdbg_set_timestamp_and_cpu(kd, now, cpu);
566
567 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
568
569 out:
570 enable_preemption();
571 }
572
573 // Read events from kdebug storage units into a user space buffer or file.
574 //
575 // This code runs while events are emitted -- storage unit allocation and
576 // deallocation wll synchronize with the emitters. Only one reader per control
577 // structure is allowed.
578 int
kernel_debug_read(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,user_addr_t buffer,size_t * number,vnode_t vp,vfs_context_t ctx,uint32_t file_version)579 kernel_debug_read(struct kd_control *kd_ctrl_page,
580 struct kd_buffer *kd_data_page, user_addr_t buffer, size_t *number,
581 vnode_t vp, vfs_context_t ctx, uint32_t file_version)
582 {
583 size_t count;
584 unsigned int cpu, min_cpu;
585 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
586 int error = 0;
587 kd_buf *tempbuf;
588 uint32_t rcursor;
589 kd_buf lostevent;
590 union kds_ptr kdsp;
591 bool traced_retrograde = false;
592 struct kd_storage *kdsp_actual;
593 struct kd_bufinfo *kdbp;
594 struct kd_bufinfo *min_kdbp;
595 size_t tempbuf_count;
596 uint32_t tempbuf_number;
597 kdebug_emit_filter_t old_emit;
598 uint32_t old_live_flags;
599 bool out_of_events = false;
600 bool wrapped = false;
601 bool set_preempt = true;
602 bool should_disable = false;
603
604 struct kd_bufinfo *kdbip = kd_data_page->kdb_info;
605 struct kd_region *kd_bufs = kd_data_page->kd_bufs;
606
607 assert(number != NULL);
608 count = *number / sizeof(kd_buf);
609 *number = 0;
610
611 if (count == 0 || !(kd_ctrl_page->kdc_flags & KDBG_BUFINIT) || kd_data_page->kdcopybuf == 0) {
612 return EINVAL;
613 }
614
615 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
616 /*
617 * A corpse can be created due to 'TASK_HAS_TOO_MANY_THREADS'
618 * and that can be handled by a callout thread that already
619 * has the eager-preemption set.
620 * So check to see if we are dealing with one such thread.
621 */
622 set_preempt = !(thread_is_eager_preempt(current_thread()));
623 }
624
625 if (set_preempt) {
626 thread_set_eager_preempt(current_thread());
627 }
628
629 memset(&lostevent, 0, sizeof(lostevent));
630 lostevent.debugid = TRACE_LOST_EVENTS;
631
632 /*
633 * Capture the current time. Only sort events that have occured
634 * before now. Since the IOPs are being flushed here, it is possible
635 * that events occur on the AP while running live tracing.
636 */
637 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
638 barrier_max = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
639 } else {
640 barrier_max = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
641 }
642
643 /*
644 * Disable wrap so storage units cannot be stolen out from underneath us
645 * while merging events.
646 *
647 * Because we hold ktrace_lock, no other control threads can be playing
648 * with kdc_flags. The code that emits new events could be running,
649 * but it grabs kdc_storage_lock if it needs to acquire a new storage
650 * chunk, which is where it examines kdc_flags. If it is adding to
651 * the same chunk we're reading from, check for that below.
652 */
653 wrapped = kdebug_disable_wrap(kd_ctrl_page, &old_emit, &old_live_flags);
654
655 if (count > kd_data_page->kdb_event_count) {
656 count = kd_data_page->kdb_event_count;
657 }
658
659 if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
660 tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
661 }
662
663 /*
664 * If the buffers have wrapped, do not emit additional lost events for the
665 * oldest storage units.
666 */
667 if (wrapped) {
668 kd_ctrl_page->kdc_live_flags &= ~KDBG_WRAPPED;
669
670 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
671 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
672 continue;
673 }
674 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
675 kdsp_actual->kds_lostevents = false;
676 }
677 }
678
679 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
680 /*
681 * In TRIAGE mode we want to extract all the current
682 * records regardless of where we stopped reading last
683 * time so that we have the best shot at getting older
684 * records for threads before the buffers are wrapped.
685 * So set:-
686 * a) kd_prev_timebase to 0 so we (re-)consider older records
687 * b) readlast to 0 to initiate the search from the
688 * 1st record.
689 */
690 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
691 kdbp->kd_prev_timebase = 0;
692 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
693 continue;
694 }
695 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
696 kdsp_actual->kds_readlast = 0;
697 }
698 }
699
700 /*
701 * Capture the earliest time where there are events for all CPUs and don't
702 * emit events with timestamps prior.
703 */
704 barrier_min = kd_ctrl_page->kdc_oldest_time;
705
706 while (count) {
707 tempbuf = kd_data_page->kdcopybuf;
708 tempbuf_number = 0;
709
710 if (wrapped) {
711 /*
712 * Emit a lost events tracepoint to indicate that previous events
713 * were lost -- the thread map cannot be trusted. A new one must
714 * be taken so tools can analyze the trace in a backwards-facing
715 * fashion.
716 */
717 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
718 *tempbuf = lostevent;
719 wrapped = false;
720 goto nextevent;
721 }
722
723 /* While space left in merged events scratch buffer. */
724 while (tempbuf_count) {
725 bool lostevents = false;
726 int lostcpu = 0;
727 earliest_time = UINT64_MAX;
728 min_kdbp = NULL;
729 min_cpu = 0;
730
731 /* Check each CPU's buffers for the earliest event. */
732 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
733 /* Skip CPUs without data in their oldest storage unit. */
734 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
735 next_cpu:
736 continue;
737 }
738 /* From CPU data to buffer header to buffer. */
739 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
740
741 next_event:
742 /* The next event to be read from this buffer. */
743 rcursor = kdsp_actual->kds_readlast;
744
745 /* Skip this buffer if there are no events left. */
746 if (rcursor == kdsp_actual->kds_bufindx) {
747 continue;
748 }
749
750 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
751 /*
752 * TRIAGE mode record keeping doesn't (currently)
753 * use lostevent markers. It also doesn't want to
754 * call release_storage_unit() in this read call.
755 * It expects the buffers to wrap and records reclaimed
756 * in that way solely.
757 */
758 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
759 goto skip_record_checks;
760 }
761
762 /*
763 * Check that this storage unit wasn't stolen and events were
764 * lost. This must have happened while wrapping was disabled
765 * in this function.
766 */
767 if (kdsp_actual->kds_lostevents) {
768 lostevents = true;
769 kdsp_actual->kds_lostevents = false;
770
771 /*
772 * The earliest event we can trust is the first one in this
773 * stolen storage unit.
774 */
775 uint64_t lost_time =
776 kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
777 if (kd_ctrl_page->kdc_oldest_time < lost_time) {
778 /*
779 * If this is the first time we've seen lost events for
780 * this gap, record its timestamp as the oldest
781 * timestamp we're willing to merge for the lost events
782 * tracepoint.
783 */
784 kd_ctrl_page->kdc_oldest_time = barrier_min = lost_time;
785 lostcpu = cpu;
786 }
787 }
788
789 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
790
791 if (t > barrier_max) {
792 goto next_cpu;
793 }
794 if (t < kdsp_actual->kds_timestamp) {
795 /*
796 * This indicates the event emitter hasn't completed
797 * filling in the event (becuase we're looking at the
798 * buffer that the record head is using). The max barrier
799 * timestamp should have saved us from seeing these kinds
800 * of things, but other CPUs might be slow on the up-take.
801 *
802 * Bail out so we don't get out-of-order events by
803 * continuing to read events from other CPUs' events.
804 */
805 out_of_events = true;
806 break;
807 }
808
809 /*
810 * Ignore events that have aged out due to wrapping or storage
811 * unit exhaustion while merging events.
812 */
813 if (t < barrier_min) {
814 kdsp_actual->kds_readlast++;
815 if (kdsp_actual->kds_readlast >= kd_ctrl_page->kdebug_events_per_storage_unit) {
816 release_storage_unit(kd_ctrl_page, kd_data_page, cpu, kdsp.raw);
817
818 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
819 goto next_cpu;
820 }
821 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
822 }
823 goto next_event;
824 }
825
826 /*
827 * Don't worry about merging any events -- just walk through
828 * the CPUs and find the latest timestamp of lost events.
829 */
830 if (lostevents) {
831 continue;
832 }
833 skip_record_checks:
834 if (t < earliest_time) {
835 earliest_time = t;
836 min_kdbp = kdbp;
837 min_cpu = cpu;
838 }
839 }
840 if (lostevents) {
841 /*
842 * If any lost events were hit in the buffers, emit an event
843 * with the latest timestamp.
844 */
845 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu);
846 *tempbuf = lostevent;
847 tempbuf->arg1 = 1;
848 goto nextevent;
849 }
850 if (min_kdbp == NULL) {
851 /* All buffers ran empty. */
852 out_of_events = true;
853 }
854 if (out_of_events) {
855 break;
856 }
857
858 kdsp = min_kdbp->kd_list_head;
859 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
860
861 if (min_kdbp->latest_past_event_timestamp != 0) {
862 if (kdbg_debug) {
863 printf("kdebug: PAST EVENT: debugid %#8x: "
864 "time %lld from CPU %u "
865 "(barrier at time %lld)\n",
866 kdsp_actual->kds_records[rcursor].debugid,
867 t, cpu, barrier_min);
868 }
869
870 kdbg_set_timestamp_and_cpu(tempbuf, earliest_time, min_cpu);
871 tempbuf->arg1 = (kd_buf_argtype)min_kdbp->latest_past_event_timestamp;
872 tempbuf->arg2 = 0;
873 tempbuf->arg3 = 0;
874 tempbuf->arg4 = 0;
875 tempbuf->debugid = TRACE_PAST_EVENTS;
876 min_kdbp->latest_past_event_timestamp = 0;
877 goto nextevent;
878 }
879
880 /* Copy earliest event into merged events scratch buffer. */
881 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
882 kd_buf *earliest_event = tempbuf;
883 if (kd_control_trace.kdc_flags & KDBG_MATCH_DISABLE) {
884 kd_event_matcher *match = &kd_control_trace.disable_event_match;
885 kd_event_matcher *mask = &kd_control_trace.disable_event_mask;
886 if ((earliest_event->debugid & mask->kem_debugid) == match->kem_debugid &&
887 (earliest_event->arg1 & mask->kem_args[0]) == match->kem_args[0] &&
888 (earliest_event->arg2 & mask->kem_args[1]) == match->kem_args[1] &&
889 (earliest_event->arg3 & mask->kem_args[2]) == match->kem_args[2] &&
890 (earliest_event->arg4 & mask->kem_args[3]) == match->kem_args[3]) {
891 should_disable = true;
892 }
893 }
894
895 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
896 if (kdsp_actual->kds_readlast == kd_ctrl_page->kdebug_events_per_storage_unit) {
897 release_storage_unit(kd_ctrl_page, kd_data_page, min_cpu, kdsp.raw);
898 }
899 }
900
901 /*
902 * Watch for out of order timestamps (from IOPs).
903 */
904 if (earliest_time < min_kdbp->kd_prev_timebase) {
905 /*
906 * If we haven't already, emit a retrograde events event.
907 * Otherwise, ignore this event.
908 */
909 if (traced_retrograde) {
910 continue;
911 }
912 if (kdbg_debug) {
913 printf("kdebug: RETRO EVENT: debugid %#8x: "
914 "time %lld from CPU %u "
915 "(barrier at time %lld)\n",
916 kdsp_actual->kds_records[rcursor].debugid,
917 t, cpu, barrier_min);
918 }
919
920 kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase,
921 kdbg_get_cpu(tempbuf));
922 tempbuf->arg1 = tempbuf->debugid;
923 tempbuf->arg2 = (kd_buf_argtype)earliest_time;
924 tempbuf->arg3 = 0;
925 tempbuf->arg4 = 0;
926 tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
927 traced_retrograde = true;
928 } else {
929 min_kdbp->kd_prev_timebase = earliest_time;
930 }
931 nextevent:
932 tempbuf_count--;
933 tempbuf_number++;
934 tempbuf++;
935
936 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE &&
937 (RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) {
938 break;
939 }
940 }
941
942 if (tempbuf_number) {
943 /*
944 * Remember the latest timestamp of events that we've merged so we
945 * don't think we've lost events later.
946 */
947 uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1);
948 if (kd_ctrl_page->kdc_oldest_time < latest_time) {
949 kd_ctrl_page->kdc_oldest_time = latest_time;
950 }
951
952 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
953 extern int kernel_debug_trace_write_to_file(user_addr_t *buffer,
954 size_t *number, size_t *count, size_t tempbuf_number,
955 vnode_t vp, vfs_context_t ctx, uint32_t file_version);
956 error = kernel_debug_trace_write_to_file(&buffer, number,
957 &count, tempbuf_number, vp, ctx, file_version);
958 } else if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
959 memcpy((void*)buffer, kd_data_page->kdcopybuf,
960 tempbuf_number * sizeof(kd_buf));
961 buffer += tempbuf_number * sizeof(kd_buf);
962 } else {
963 panic("kdebug: invalid kdebug mode %d", kd_ctrl_page->mode);
964 }
965 if (error) {
966 *number = 0;
967 error = EINVAL;
968 break;
969 }
970 count -= tempbuf_number;
971 *number += tempbuf_number;
972 }
973 if (out_of_events) {
974 break;
975 }
976
977 if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
978 tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
979 }
980 }
981 if ((old_live_flags & KDBG_NOWRAP) == 0) {
982 _enable_wrap(kd_ctrl_page, old_emit);
983 }
984
985 if (set_preempt) {
986 thread_clear_eager_preempt(current_thread());
987 }
988
989 if (should_disable) {
990 kernel_debug_disable();
991 }
992
993 return error;
994 }
995