1 // Copyright (c) 2018-2023 Apple Inc. All rights reserved.
2
3 #include <darwintest.h>
4 #include <ktrace/config.h>
5 #include <ktrace/session.h>
6 #include <inttypes.h>
7 #include <libproc.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <sys/resource.h>
11 #include <sys/sysctl.h>
12
13 #include <kperf/kpc.h>
14 #include <kperf/kperf.h>
15 #include <kperfdata/kpep.h>
16
17 #include "ktrace_helpers.h"
18 #include "kperf_helpers.h"
19 #include "test_utils.h"
20
21 T_GLOBAL_META(
22 T_META_NAMESPACE("xnu.cpu_counters"),
23 T_META_RADAR_COMPONENT_NAME("xnu"),
24 T_META_RADAR_COMPONENT_VERSION("cpu counters"),
25 T_META_OWNER("mwidmann"),
26 T_META_ASROOT(true),
27 T_META_CHECK_LEAKS(false));
28
29 struct machine {
30 unsigned int ncpus;
31 unsigned int nfixed;
32 unsigned int nconfig;
33 uint64_t selector;
34 };
35
36 #ifndef ABSV64
37 #define ABSV64(n) ((((int64_t)(n)) < 0) ? -((int64_t)(n)) : ((int64_t)(n)))
38 #endif
39
40 static void
skip_if_unsupported(void)41 skip_if_unsupported(void)
42 {
43 int r;
44 int supported = 0;
45 size_t supported_size = sizeof(supported);
46
47 r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
48 NULL, 0);
49 if (r < 0) {
50 T_WITH_ERRNO;
51 T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
52 }
53
54 if (!supported) {
55 T_SKIP("PMCs are not supported on this platform");
56 }
57 }
58
59 static struct rusage_info_v4 pre_ru = {};
60
61 static void
start_kpc(void)62 start_kpc(void)
63 {
64 T_SETUPBEGIN;
65
66 kpc_classmask_t classes = KPC_CLASS_FIXED_MASK |
67 KPC_CLASS_CONFIGURABLE_MASK;
68 int ret = kpc_set_counting(classes);
69 T_ASSERT_POSIX_SUCCESS(ret, "started counting");
70
71 ret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4, (rusage_info_t *)&pre_ru);
72 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "got rusage information");
73
74 kpc_classmask_t classes_on = kpc_get_counting();
75 T_QUIET;
76 T_ASSERT_EQ(classes, classes_on, "classes counting is correct");
77
78 T_SETUPEND;
79 }
80
81 static void kpc_reset_atend(void);
82
83 static void
_assert_kpep_ok(int kpep_err,const char * fmt,...)84 _assert_kpep_ok(int kpep_err, const char *fmt, ...)
85 {
86 char msg[1024] = "";
87 va_list args;
88 va_start(args, fmt);
89 vsnprintf(msg, sizeof(msg), fmt, args);
90 va_end(args);
91 T_QUIET;
92 T_ASSERT_EQ(kpep_err, KPEP_ERR_NONE, "%s: %s", msg, kpep_strerror(kpep_err));
93 }
94
95 static void
prepare_kpc(struct machine * mch,unsigned int n,const char * event_name,uint64_t period)96 prepare_kpc(struct machine *mch, unsigned int n, const char *event_name,
97 uint64_t period)
98 {
99 T_SETUPBEGIN;
100
101 T_ATEND(kpc_reset_atend);
102
103 kpep_db_t db = NULL;
104 int ret = kpep_db_create(NULL, &db);
105 _assert_kpep_ok(ret, "get kpep database");
106 kpep_config_t config = NULL;
107 ret = kpep_config_create(db, &config);
108 _assert_kpep_ok(ret, "creating event configuration");
109 ret = kpep_config_force_counters(config);
110 _assert_kpep_ok(ret, "forcing counters with configuration");
111 kpep_event_t event = NULL;
112 ret = kpep_db_event(db, event_name, &event);
113 _assert_kpep_ok(ret, "finding event named %s", event_name);
114
115 size_t ncpus_sz = sizeof(mch->ncpus);
116 ret = sysctlbyname("hw.logicalcpu_max", &mch->ncpus, &ncpus_sz,
117 NULL, 0);
118 T_QUIET;
119 T_ASSERT_POSIX_SUCCESS(ret, "sysctlbyname(hw.logicalcpu_max)");
120 T_QUIET;
121 T_ASSERT_GT(mch->ncpus, 0, "must have some number of CPUs");
122
123 ret = kpc_force_all_ctrs_set(1);
124 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_set(1)");
125
126 int forcing = 0;
127 ret = kpc_force_all_ctrs_get(&forcing);
128 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_get");
129 T_QUIET; T_ASSERT_EQ(forcing, 1, "counters must be forced");
130
131 mch->nfixed = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
132 mch->nconfig = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
133
134 T_LOG("machine: ncpus = %d, nfixed = %d, nconfig = %d", mch->ncpus,
135 mch->nfixed, mch->nconfig);
136
137 uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
138 for (uint32_t i = 0; i < nconfigs; i++) {
139 if (period != 0 && (n == 0 || i == 0)) {
140 ret = kpep_config_add_event_trigger(config, &event, 0,
141 period + i * 1000, NULL);
142 } else {
143 ret = kpep_config_add_event(config, &event, 0, NULL);
144 }
145 if (ret == KPEP_ERR_CONFIG_CONFLICT) {
146 T_LOG("configured %d counters with %s", i, event_name);
147 break;
148 }
149 _assert_kpep_ok(ret, "adding %d event %s to configuration", i,
150 event_name);
151 }
152
153 uint64_t *configs = calloc(nconfigs, sizeof(*configs));
154 T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
155 ret = kpep_config_kpc(config, configs, nconfigs * sizeof(*configs));
156 _assert_kpep_ok(ret, "get kpc configuration");
157 for (uint32_t i = 0; i < nconfigs; i++) {
158 if (configs[i] != 0) {
159 mch->selector = configs[i];
160 break;
161 }
162 }
163 T_QUIET; T_ASSERT_NE(mch->selector, 0ULL, "found event selector to check");
164 ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, configs);
165 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_config");
166
167 ret = kpep_config_kpc_periods(config, configs, nconfigs * sizeof(*configs));
168 _assert_kpep_ok(ret, "get kpc periods");
169 ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, configs);
170 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
171
172 free(configs);
173
174 T_SETUPEND;
175 }
176
177 static void
kpc_reset_atend(void)178 kpc_reset_atend(void)
179 {
180 uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
181 uint64_t *configs = calloc(nconfigs, sizeof(*configs));
182 T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
183
184 int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, configs);
185 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
186 ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, configs);
187 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_config");
188
189 free(configs);
190 }
191
192 static void *
spin(void * arg)193 spin(void *arg)
194 {
195 while (*(volatile int *)arg == 0) {
196 ;
197 }
198
199 return NULL;
200 }
201
202 static pthread_t *
start_threads(const struct machine * mch,void * (* func)(void *),void * arg)203 start_threads(const struct machine *mch, void *(*func)(void *), void *arg)
204 {
205 T_SETUPBEGIN;
206
207 pthread_t *threads = calloc((unsigned int)mch->ncpus,
208 sizeof(*threads));
209 T_QUIET; T_ASSERT_NOTNULL(threads, "allocated array of threads");
210 for (unsigned int i = 0; i < mch->ncpus; i++) {
211 int error = pthread_create(&threads[i], NULL, func, arg);
212 T_QUIET; T_ASSERT_POSIX_ZERO(error, "pthread_create");
213 }
214
215 T_SETUPEND;
216
217 return threads;
218 }
219
220 static void
end_threads(const struct machine * mch,pthread_t * threads)221 end_threads(const struct machine *mch, pthread_t *threads)
222 {
223 for (unsigned int i = 0; i < mch->ncpus; i++) {
224 int error = pthread_join(threads[i], NULL);
225 T_QUIET; T_ASSERT_POSIX_ZERO(error, "joined thread %d", i);
226 }
227 free(threads);
228 }
229
230 struct tally {
231 uint64_t firstvalue;
232 uint64_t lastvalue;
233 uint64_t nchecks;
234 uint64_t nzero;
235 uint64_t nstuck;
236 uint64_t ndecrease;
237 };
238
239 static void
check_counters(unsigned int ncpus,unsigned int nctrs,struct tally * tallies,uint64_t * counts)240 check_counters(unsigned int ncpus, unsigned int nctrs, struct tally *tallies,
241 uint64_t *counts)
242 {
243 for (unsigned int i = 0; i < ncpus; i++) {
244 for (unsigned int j = 0; j < nctrs; j++) {
245 unsigned int ctr = i * nctrs + j;
246 struct tally *tly = &tallies[ctr];
247 uint64_t count = counts[ctr];
248
249 if (counts[ctr] == 0) {
250 tly->nzero++;
251 }
252 if (tly->lastvalue == count) {
253 tly->nstuck++;
254 }
255 if (tly->lastvalue > count) {
256 tly->ndecrease++;
257 }
258 tly->lastvalue = count;
259 if (tly->nchecks == 0) {
260 tly->firstvalue = count;
261 }
262 tly->nchecks++;
263 }
264 }
265 }
266
267 static void
check_tally(unsigned int ncpus,unsigned int nctrs,struct tally * tallies)268 check_tally(unsigned int ncpus, unsigned int nctrs, struct tally *tallies)
269 {
270 uint64_t nstuck = 0;
271 uint64_t nchecks = 0;
272 uint64_t nzero = 0;
273 uint64_t ndecrease = 0;
274
275 for (unsigned int i = 0; i < ncpus; i++) {
276 for (unsigned int j = 0; j < nctrs; j++) {
277 unsigned int ctr = i * nctrs + j;
278 struct tally *tly = &tallies[ctr];
279
280 T_LOG("CPU %2u PMC %u: nchecks = %llu, last value = %llx, "
281 "delta = %llu, nstuck = %llu", i, j,
282 tly->nchecks, tly->lastvalue, tly->lastvalue - tly->firstvalue,
283 tly->nstuck);
284
285 nchecks += tly->nchecks;
286 nstuck += tly->nstuck;
287 nzero += tly->nzero;
288 ndecrease += tly->ndecrease;
289 }
290 }
291
292 T_EXPECT_GT(nchecks, 0ULL, "checked 0x%" PRIx64 " counter values", nchecks);
293 T_EXPECT_EQ(nzero, 0ULL, "found 0x%" PRIx64 " zero values", nzero);
294 T_EXPECT_EQ(nstuck, 0ULL, "found 0x%" PRIx64 " stuck values", nstuck);
295 T_EXPECT_EQ(ndecrease, 0ULL,
296 "found 0x%" PRIx64 " decreasing values", ndecrease);
297 }
298
299 #define TESTDUR_NS (5 * NSEC_PER_SEC)
300
301 T_DECL(kpc_cpu_direct_configurable,
302 "test that configurable counters return monotonically increasing values",
303 XNU_T_META_SOC_SPECIFIC,
304 T_META_BOOTARGS_SET("enable_skstb=1"), T_META_TAG_VM_NOT_ELIGIBLE)
305 {
306 skip_if_unsupported();
307
308 struct machine mch = {};
309 prepare_kpc(&mch, 0, "CORE_ACTIVE_CYCLE", 0);
310
311 int until = 0;
312 pthread_t *threads = start_threads(&mch, spin, &until);
313 start_kpc();
314
315 T_SETUPBEGIN;
316
317 uint64_t startns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
318 uint64_t *counts = kpc_counterbuf_alloc();
319 T_QUIET; T_ASSERT_NOTNULL(counts, "allocated space for counter values");
320 memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
321 struct tally *tly = calloc(mch.ncpus * mch.nconfig, sizeof(*tly));
322 T_QUIET; T_ASSERT_NOTNULL(tly, "allocated space for tallies");
323
324 T_SETUPEND;
325
326 int n = 0;
327 while (clock_gettime_nsec_np(CLOCK_MONOTONIC) - startns < TESTDUR_NS) {
328 int ret = kpc_get_cpu_counters(true,
329 KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
330 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_get_cpu_counters");
331
332 check_counters(mch.ncpus, mch.nconfig, tly, counts);
333
334 usleep(10000);
335 n++;
336 if (n % 100 == 0) {
337 T_LOG("checked 100 times");
338 }
339 }
340
341 check_tally(mch.ncpus, mch.nconfig, tly);
342
343 until = 1;
344 end_threads(&mch, threads);
345 }
346
347 T_DECL(kpc_thread_direct_instrs_cycles,
348 "test that fixed thread counters return monotonically increasing values",
349 XNU_T_META_SOC_SPECIFIC, T_META_TAG_VM_NOT_ELIGIBLE)
350 {
351 int err;
352 uint32_t ctrs_cnt;
353 uint64_t *ctrs_a;
354 uint64_t *ctrs_b;
355
356 skip_if_unsupported();
357
358 T_SETUPBEGIN;
359
360 ctrs_cnt = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
361 if (ctrs_cnt == 0) {
362 T_SKIP("no fixed counters available");
363 }
364 T_LOG("device has %" PRIu32 " fixed counters", ctrs_cnt);
365
366 T_QUIET; T_ASSERT_POSIX_SUCCESS(kpc_force_all_ctrs_set(1), NULL);
367 T_ASSERT_POSIX_SUCCESS(kpc_set_counting(KPC_CLASS_FIXED_MASK),
368 "kpc_set_counting");
369 T_ASSERT_POSIX_SUCCESS(kpc_set_thread_counting(KPC_CLASS_FIXED_MASK),
370 "kpc_set_thread_counting");
371
372 T_SETUPEND;
373
374 ctrs_a = malloc(ctrs_cnt * sizeof(uint64_t));
375 T_QUIET; T_ASSERT_NOTNULL(ctrs_a, NULL);
376
377 err = kpc_get_thread_counters(0, ctrs_cnt, ctrs_a);
378 T_ASSERT_POSIX_SUCCESS(err, "kpc_get_thread_counters");
379
380 for (uint32_t i = 0; i < ctrs_cnt; i++) {
381 T_LOG("checking counter %d with value %" PRIu64 " > 0", i, ctrs_a[i]);
382 T_QUIET;
383 T_EXPECT_GT(ctrs_a[i], UINT64_C(0), "counter %d is non-zero", i);
384 }
385
386 ctrs_b = malloc(ctrs_cnt * sizeof(uint64_t));
387 T_QUIET; T_ASSERT_NOTNULL(ctrs_b, NULL);
388
389 err = kpc_get_thread_counters(0, ctrs_cnt, ctrs_b);
390 T_ASSERT_POSIX_SUCCESS(err, "kpc_get_thread_counters");
391
392 for (uint32_t i = 0; i < ctrs_cnt; i++) {
393 T_LOG("checking counter %d with value %" PRIu64
394 " > previous value %" PRIu64, i, ctrs_b[i], ctrs_a[i]);
395 T_QUIET;
396 T_EXPECT_GT(ctrs_b[i], UINT64_C(0), "counter %d is non-zero", i);
397 T_QUIET; T_EXPECT_LT(ctrs_a[i], ctrs_b[i],
398 "counter %d is increasing", i);
399 }
400
401 free(ctrs_a);
402 free(ctrs_b);
403 }
404
405 #define PMI_TEST_DURATION_NS (15 * NSEC_PER_SEC)
406 #define PERIODIC_CPU_COUNT_MS (250)
407 #define NTIMESLICES (72)
408 #define PMI_PERIOD (50ULL * 1000 * 1000)
409 #define END_EVENT KDBG_EVENTID(0xfe, 0xfe, 0)
410
411 struct cpu {
412 uint64_t prev_count, max_skid;
413 unsigned int scheduled_outside_slice;
414 unsigned int pmi_timeslices[NTIMESLICES];
415 unsigned int scheduled_timeslices[NTIMESLICES];
416 };
417
418 T_DECL(kpc_pmi_configurable,
419 "test that PMIs don't interfere with sampling counters in kperf",
420 XNU_T_META_SOC_SPECIFIC,
421 T_META_BOOTARGS_SET("enable_skstb=1"), T_META_TAG_VM_NOT_ELIGIBLE)
422 {
423 skip_if_unsupported();
424
425 start_controlling_ktrace();
426 struct machine mch = {};
427 prepare_kpc(&mch, 1, "CORE_ACTIVE_CYCLE", PMI_PERIOD);
428
429 T_SETUPBEGIN;
430
431 int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
432 actions[0] = 1;
433 int ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
434 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
435 free(actions);
436
437 (void)kperf_action_count_set(1);
438 ret = kperf_action_samplers_set(1,
439 KPERF_SAMPLER_TINFO | KPERF_SAMPLER_KSTACK);
440 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
441
442 ktrace_config_t ktconfig = ktrace_config_create_current();
443 T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(ktconfig, "create current config");
444 ret = ktrace_config_print_description(ktconfig, stdout);
445 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "print config description");
446
447 struct cpu *cpus = calloc(mch.ncpus, sizeof(*cpus));
448 T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(cpus, "allocate CPUs array");
449
450 __block unsigned int sample_count = 0;
451 __block unsigned int pmi_count = 0;
452 __block unsigned int callstack_count = 0;
453 __block uint64_t first_ns = 0;
454 __block uint64_t last_ns = 0;
455
456 ktrace_session_t sess = ktrace_session_create();
457 T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
458
459 ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
460 if (tp->debugid & DBG_FUNC_END) {
461 return;
462 }
463
464 uint64_t cur_ns = 0;
465 int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
466 tp->timestamp, &cur_ns);
467 T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
468
469 uint64_t desc = tp->arg1;
470 uint64_t config = desc & UINT32_MAX;
471 T_QUIET; T_EXPECT_EQ(config & UINT16_MAX,
472 mch.selector & UINT16_MAX,
473 "PMI argument matches configuration");
474 __unused uint64_t counter = (desc >> 32) & UINT16_MAX;
475 __unused uint64_t flags = desc >> 48;
476
477 uint64_t count = tp->arg2;
478 if (first_ns == 0) {
479 first_ns = cur_ns;
480 }
481 struct cpu *cpu = &cpus[tp->cpuid];
482
483 if (cpu->prev_count != 0) {
484 uint64_t delta = count - cpu->prev_count;
485 uint64_t skid = delta - PMI_PERIOD;
486 if (skid > cpu->max_skid) {
487 cpu->max_skid = skid;
488 }
489 }
490 cpu->prev_count = count;
491
492 __unused uint64_t pc = tp->arg3;
493
494 double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
495 NTIMESLICES;
496 if (slice < NTIMESLICES) {
497 cpu->pmi_timeslices[(unsigned int)slice] += 1;
498 }
499
500 pmi_count++;
501 });
502
503 void (^sched_handler)(struct trace_point *tp) =
504 ^(struct trace_point *tp) {
505 uint64_t cur_ns = 0;
506 int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
507 tp->timestamp, &cur_ns);
508 T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
509 if (first_ns == 0) {
510 first_ns = cur_ns;
511 }
512
513 struct cpu *cpu = &cpus[tp->cpuid];
514 double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
515 NTIMESLICES;
516 if (slice < NTIMESLICES) {
517 cpu->scheduled_timeslices[(unsigned int)slice] += 1;
518 } else {
519 cpu->scheduled_outside_slice += 1;
520 }
521 };
522 ktrace_events_single(sess, MACH_SCHED, sched_handler);
523 ktrace_events_single(sess, MACH_STACK_HANDOFF, sched_handler);
524
525 ktrace_events_single(sess, PERF_SAMPLE, ^(struct trace_point * tp) {
526 if (tp->debugid & DBG_FUNC_START) {
527 sample_count++;
528 }
529 });
530 ktrace_events_single(sess, PERF_STK_KHDR,
531 ^(struct trace_point * __unused tp) {
532 callstack_count++;
533 });
534
535 ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp) {
536 int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
537 tp->timestamp, &last_ns);
538 T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
539
540 ktrace_end(sess, 1);
541 });
542
543 uint64_t *counts = kpc_counterbuf_alloc();
544 T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(counts,
545 "allocated counter values array");
546 memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
547 struct tally *tly = calloc(mch.ncpus * (mch.nconfig + mch.nfixed),
548 sizeof(*tly));
549 T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(tly, "allocated tallies array");
550
551 dispatch_source_t cpu_count_timer = dispatch_source_create(
552 DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_get_main_queue());
553 dispatch_source_set_timer(cpu_count_timer, dispatch_time(DISPATCH_TIME_NOW,
554 PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC),
555 PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC, 0);
556 dispatch_source_set_cancel_handler(cpu_count_timer, ^{
557 dispatch_release(cpu_count_timer);
558 });
559
560 __block uint64_t first_check_ns = 0;
561 __block uint64_t last_check_ns = 0;
562
563 dispatch_source_set_event_handler(cpu_count_timer, ^{
564 int cret = kpc_get_cpu_counters(true,
565 KPC_CLASS_FIXED_MASK | KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
566 T_QUIET; T_ASSERT_POSIX_SUCCESS(cret, "kpc_get_cpu_counters");
567
568 if (!first_check_ns) {
569 first_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
570 } else {
571 last_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
572 }
573 check_counters(mch.ncpus, mch.nfixed + mch.nconfig, tly, counts);
574 });
575 ktrace_events_class(sess, DBG_PERF, ^(struct trace_point * __unused tp) {});
576
577 int stop = 0;
578 (void)start_threads(&mch, spin, &stop);
579
580 ktrace_set_completion_handler(sess, ^{
581 dispatch_cancel(cpu_count_timer);
582
583 check_tally(mch.ncpus, mch.nfixed + mch.nconfig, tly);
584
585 struct rusage_info_v4 post_ru = {};
586 int ruret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4,
587 (rusage_info_t *)&post_ru);
588 T_QUIET; T_ASSERT_POSIX_SUCCESS(ruret, "got rusage information");
589 T_LOG("saw %llu cycles in process",
590 post_ru.ri_cycles - pre_ru.ri_cycles);
591 uint64_t total_cycles = 0;
592
593 T_LOG("saw pmis = %u, samples = %u, stacks = %u", pmi_count, sample_count,
594 callstack_count);
595 // Allow some slop in case the trace is cut-off midway through a
596 // sample.
597 const unsigned int cutoff_leeway = 32;
598 T_EXPECT_GE(sample_count + cutoff_leeway, pmi_count,
599 "saw as many samples as PMIs");
600 T_EXPECT_GE(callstack_count + cutoff_leeway, pmi_count,
601 "saw as many stacks as PMIs");
602
603 unsigned int cpu_sample_count = 0;
604 char sample_slices[NTIMESLICES + 1];
605 sample_slices[NTIMESLICES] = '\0';
606 for (unsigned int i = 0; i < mch.ncpus; i++) {
607 memset(sample_slices, '-', sizeof(sample_slices) - 1);
608
609 struct cpu *cpu = &cpus[i];
610 unsigned int pmi_slice_count = 0, no_sched_slice_count = 0,
611 cpu_pmi_count = 0, last_contiguous = 0;
612 bool seen_empty = false;
613 for (unsigned int j = 0; j < NTIMESLICES; j++) {
614 unsigned int slice_pmi_count = cpu->pmi_timeslices[j];
615 unsigned int slice_sched_count = cpu->scheduled_timeslices[j];
616 cpu_pmi_count += slice_pmi_count;
617 if (slice_pmi_count > 0) {
618 pmi_slice_count++;
619 sample_slices[j] = '*';
620 } else if (slice_sched_count == 0) {
621 no_sched_slice_count++;
622 sample_slices[j] = '.';
623 } else {
624 seen_empty = true;
625 }
626 if (!seen_empty) {
627 last_contiguous = j;
628 }
629 }
630 unsigned int ctr = i * (mch.nfixed + mch.nconfig) + mch.nfixed;
631 uint64_t delta = tly[ctr].lastvalue - tly[ctr].firstvalue;
632 T_LOG("%g GHz", (double)delta / (last_check_ns - first_check_ns));
633 total_cycles += delta;
634 uint64_t abs_max_skid = (uint64_t)ABSV64(cpu->max_skid);
635 T_LOG("CPU %2u: %4up:%4un/%u, %6u/%llu, max skid = %llu (%.4f%%), "
636 "last contiguous = %u, scheduled outside = %u", i,
637 pmi_slice_count, no_sched_slice_count, NTIMESLICES,
638 sample_count, delta / PMI_PERIOD, abs_max_skid,
639 (double)abs_max_skid / PMI_PERIOD * 100, last_contiguous,
640 cpu->scheduled_outside_slice);
641 T_LOG("%s", sample_slices);
642 if (cpu_pmi_count > 0) {
643 cpu_sample_count++;
644 }
645 T_EXPECT_EQ(last_contiguous, NTIMESLICES - 1,
646 "CPU %2u: saw samples in each time slice", i);
647 }
648 T_LOG("kpc reported %llu total cycles", total_cycles);
649 T_LOG("saw %u sample events, across %u/%u cpus", sample_count,
650 cpu_sample_count, mch.ncpus);
651 T_EXPECT_EQ(cpu_sample_count, mch.ncpus,
652 "should see PMIs on every CPU");
653 T_END;
654 });
655
656 int dbglvl = 3;
657 ret = sysctlbyname("kperf.debug_level", NULL, NULL, &dbglvl,
658 sizeof(dbglvl));
659 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "set kperf debug level");
660 ret = kperf_sample_set(1);
661 T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
662
663 start_kpc();
664
665 int error = ktrace_start(sess, dispatch_get_main_queue());
666 T_ASSERT_POSIX_ZERO(error, "started tracing");
667
668 dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
669 dispatch_get_main_queue(), ^{
670 T_LOG("ending tracing after timeout");
671 kdebug_trace(END_EVENT, 0, 0, 0, 0);
672 });
673
674 dispatch_activate(cpu_count_timer);
675
676 T_SETUPEND;
677
678 dispatch_main();
679 }
680
681 #if defined(__arm64__)
682 #define IS_ARM64 true
683 #else // defined(__arm64__)
684 #define IS_ARM64 false
685 #endif // !defined(__arm64__)
686
687 T_DECL(kpc_pmu_config, "ensure PMU can be configured",
688 XNU_T_META_SOC_SPECIFIC,
689 T_META_ENABLED(IS_ARM64), T_META_TAG_VM_NOT_ELIGIBLE)
690 {
691 T_SETUPBEGIN;
692 int ret = kpc_force_all_ctrs_set(1);
693 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret,
694 "force all counters to allow raw PMU configuration");
695 uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_RAWPMU_MASK);
696 T_LOG("found %u raw PMU configuration words", nconfigs);
697 uint64_t *configs = calloc(nconfigs, sizeof(*configs));
698 T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
699 T_SETUPEND;
700
701 ret = kpc_set_config(KPC_CLASS_RAWPMU_MASK, configs);
702 T_ASSERT_POSIX_SUCCESS(ret, "should set PMU configuration");
703 }
704
705 T_DECL(pmi_pc_capture, "ensure PC capture works for PMCs 5, 6, and 7",
706 XNU_T_META_SOC_SPECIFIC,
707 T_META_REQUIRES_SYSCTL_EQ("kpc.pc_capture_supported", 1), T_META_TAG_VM_NOT_ELIGIBLE)
708 {
709 start_controlling_ktrace();
710 struct machine mch = {};
711 prepare_kpc(&mch, 0, "INST_BRANCH_TAKEN", PMI_PERIOD);
712
713 T_SETUPBEGIN;
714
715 uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
716 T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
717 for (unsigned int i = 0; i < mch.nconfig; i++) {
718 /*
719 * Offset the periods so the PMIs don't alias to the same PC capture.
720 * Since there's only one PC capture register, they will clobber each
721 * other.
722 */
723 periods[i] = PMI_PERIOD / 1000 + (i * 1000);
724 }
725
726 int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
727 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
728 free(periods);
729
730 int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
731 for (unsigned int i = 0; i < mch.nconfig; i++) {
732 actions[i] = 1;
733 }
734 ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
735 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
736 free(actions);
737
738 (void)kperf_action_count_set(1);
739 ret = kperf_action_samplers_set(1, KPERF_SAMPLER_TINFO);
740 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
741
742 ktrace_session_t sess = ktrace_session_create();
743 T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
744
745 uint64_t pc_captured_arr[3] = {};
746 uint64_t *pc_captured = pc_captured_arr;
747 uint64_t pmi_event_arr[3] = {};
748 uint64_t *pmi_event = pmi_event_arr;
749 ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
750 if (tp->debugid & DBG_FUNC_END) {
751 return;
752 }
753
754 uint64_t desc = tp->arg1;
755
756 #define KPC_DESC_COUNTER(DESC) (((DESC) >> 32) & 0xffff)
757 #define KPC_DESC_CONFIG(DESC) ((DESC) & 0xffff)
758 #define KPC_DESC_FLAGS(DESC) ((DESC) >> 48)
759 #define KPC_FLAG_PC_CAPTURED (0x08)
760
761 uint64_t counter = KPC_DESC_COUNTER(desc);
762 uint64_t flags = KPC_DESC_FLAGS(desc);
763 if (counter >= 5 && counter <= 7) {
764 pmi_event[counter - 5]++;
765 if (flags & KPC_FLAG_PC_CAPTURED) {
766 pc_captured[counter - 5]++;
767 }
768 }
769 T_QUIET;
770 T_ASSERT_EQ(KPC_DESC_CONFIG(desc), mch.selector,
771 "correct counter configuration");
772 });
773
774 ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp __unused) {
775 ktrace_config_t config = ktrace_config_create_current();
776 ktrace_config_print_description(config, stdout);
777 ktrace_config_destroy(config);
778 T_LOG("saw ending event");
779 ktrace_end(sess, 1);
780 });
781
782 ktrace_set_completion_handler(sess, ^{
783 ktrace_session_destroy(sess);
784 for (unsigned int i = 0; i < 3; i++) {
785 T_LOG("PMC%u: saw %llu/%llu (%g%%) PMIs with PC capture", i + 5,
786 pc_captured[i], pmi_event[i],
787 (double)pc_captured[i] / (double)pmi_event[i] * 100.0);
788 T_EXPECT_GT(pc_captured[i], 0ULL, "saw PC capture for counter %u",
789 i + 5);
790 }
791 T_END;
792 });
793
794 ret = kperf_sample_set(1);
795 T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
796
797 start_kpc();
798
799 int error = ktrace_start(sess, dispatch_get_main_queue());
800 T_ASSERT_POSIX_ZERO(error, "started tracing");
801
802 dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
803 dispatch_get_main_queue(), ^{
804 T_LOG("ending tracing after timeout");
805 kdebug_trace(END_EVENT, 0, 0, 0, 0);
806 });
807
808 T_SETUPEND;
809
810 dispatch_main();
811 }
812