1 #include <darwintest.h>
2
3 #include <assert.h>
4 #include <mach/clock_types.h>
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <err.h>
10 #include <sys/time.h>
11 #include <mach/mach.h>
12 #include <mach/mach_time.h>
13 #include <pthread.h>
14 #include <perfdata/perfdata.h>
15 #include <sys/sysctl.h>
16 #include <sys/stat.h>
17 #include <sys/mount.h>
18 #include <stdbool.h>
19 #include <signal.h>
20 #include <sys/resource.h>
21 #include <sys/resource_private.h>
22 #include <time.h>
23 #include <os/atomic_private.h>
24 #include <libproc.h>
25 #include <TargetConditionals.h>
26
27 #if __has_include(<mach/mach_time_private.h>)
28 #include <mach/mach_time_private.h>
29 #else
30 kern_return_t mach_get_times(uint64_t* absolute_time,
31 uint64_t* continuous_time,
32 struct timespec *tp);
33 #endif
34
35 /*
36 * This test program creates up to 8 worker threads performing
37 * mixed workloads of system calls (which contribute to both
38 * user and system time), as well as spins in userspace (which
39 * only contribute to user time).
40 *
41 * setitimer(2) is used to program timers that fire signals
42 * after various thresholds. The signal handler detects
43 * which thread the signal was delivered on by matching the
44 * stack pointer to ranges for each thread.
45 *
46 * After the test scenario is complete, the distribution of
47 * threads which received interrupts is evaluated to match
48 * expected heuristics.
49 */
50
51 T_GLOBAL_META(
52 T_META_RUN_CONCURRENTLY(false),
53 T_META_CHECK_LEAKS(false),
54 T_META_ALL_VALID_ARCHS(true),
55 T_META_RADAR_COMPONENT_NAME("xnu"),
56 T_META_RADAR_COMPONENT_VERSION("scheduler"),
57 T_META_OWNER("chimene"),
58 T_META_ENABLED(TARGET_OS_OSX),
59 T_META_TAG_VM_NOT_ELIGIBLE
60 );
61
62 static void *stat_thread(void *arg);
63 static void *statfs_thread(void *arg);
64
65 static void alrm_handler(int, struct __siginfo *, void *);
66
67 static semaphore_t gMainWaitForWorkers;
68 static semaphore_t gWorkersStart;
69
70 static pthread_mutex_t gShouldExitMutex = PTHREAD_MUTEX_INITIALIZER;
71 static pthread_cond_t gShouldExitCondition = PTHREAD_COND_INITIALIZER;
72
73 static _Atomic bool gShouldExit = false;
74
75 static const uint32_t max_threads = 9;
76
77 static struct threadentry {
78 pthread_t thread;
79 uint64_t tid;
80 void* stack_addr;
81 size_t stack_size;
82 bool expect_cpu_usage;
83 uint32_t alrm_count;
84 uint32_t vtalrm_count;
85 uint32_t prof_count;
86 uint32_t xcpu_count;
87 struct thsc_time_cpi self_stats;
88 } __attribute__((aligned(128))) gThreadList[max_threads];
89
90 static uint32_t nworkers;
91 static uint32_t nthreads;
92
93 static double offcore_time_percent_threshold = 75.0;
94
95 static bool is_rosetta = false;
96
97 static mach_timebase_info_data_t timebase_info;
98
99 static uint64_t
abs_to_nanos(uint64_t abs)100 abs_to_nanos(uint64_t abs)
101 {
102 return abs * timebase_info.numer / timebase_info.denom;
103 }
104
105 /* Some statistics APIs return host abstime instead of Rosetta-translated abstime */
106 static uint64_t
abs_to_nanos_host(uint64_t abstime)107 abs_to_nanos_host(uint64_t abstime)
108 {
109 if (is_rosetta) {
110 return abstime * 125 / 3;
111 } else {
112 return abs_to_nanos(abstime);
113 }
114 }
115
116 static int
processIsTranslated(void)117 processIsTranslated(void)
118 {
119 int ret = 0;
120 size_t size = sizeof(ret);
121 if (sysctlbyname("sysctl.proc_translated", &ret, &size, NULL, 0) == -1) {
122 if (errno == ENOENT) {
123 return 0;
124 } else {
125 return -1;
126 }
127 }
128 return ret;
129 }
130
131 static void
fill_thread_stats(uint32_t i)132 fill_thread_stats(uint32_t i)
133 {
134 struct threadentry *entry = &gThreadList[i];
135
136 int rv = thread_selfcounts(THSC_TIME_CPI, &entry->self_stats, sizeof(entry->self_stats));
137 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "thread_selfcounts(THSC_TIME_CPI)");
138 }
139
140 T_DECL(setitimer,
141 "Test various setitimer delivered signals to CPU-burning threads")
142 {
143 int rv;
144 kern_return_t kr;
145 uint32_t ncpu;
146 size_t ncpu_size = sizeof(ncpu);
147
148 struct sched_param self_param = {.sched_priority = 47};
149
150 rv = pthread_setschedparam(pthread_self(), SCHED_FIFO, &self_param);
151 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_setschedparam");
152
153 kr = mach_timebase_info(&timebase_info);
154 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
155
156 is_rosetta = processIsTranslated();
157
158 rv = sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0);
159 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sysctlbyname(hw.ncpu)");
160
161 if (ncpu < 2) {
162 T_SKIP("%d CPUs not supported for test, returning success", ncpu);
163 }
164
165 nworkers = MIN(max_threads - 1, ncpu);
166 nthreads = nworkers + 1;
167
168 T_LOG("rosetta = %d\n", is_rosetta);
169 T_LOG("hw.ncpu = %d\n", ncpu);
170 T_LOG("nworkers = %d\n", nworkers);
171 T_LOG("nthreads = %d\n", nthreads);
172
173 kr = semaphore_create(mach_task_self(), &gMainWaitForWorkers, SYNC_POLICY_FIFO, 0);
174 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create()");
175
176 kr = semaphore_create(mach_task_self(), &gWorkersStart, SYNC_POLICY_FIFO, 0);
177 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create()");
178
179 pthread_attr_t attr;
180
181 rv = pthread_attr_init(&attr);
182 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_init");
183
184 struct sched_param child_param = {.sched_priority = 37};
185
186 rv = pthread_attr_setschedparam(&attr, &child_param);
187 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_set_qos_class_np");
188
189 for (uint32_t i = 0; i < nthreads; i++) {
190 if (i == 0) {
191 gThreadList[i].thread = pthread_self();
192 } else {
193 rv = pthread_create(&gThreadList[i].thread, &attr,
194 i % 2 ? stat_thread : statfs_thread,
195 (void *)(uintptr_t)i);
196 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_create");
197 gThreadList[i].expect_cpu_usage = i % 2 == 0 ? true : false;
198 }
199
200 rv = pthread_threadid_np(gThreadList[i].thread, &gThreadList[i].tid);
201 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_threadid_np");
202
203 gThreadList[i].stack_addr = pthread_get_stackaddr_np(gThreadList[i].thread);
204 gThreadList[i].stack_size = pthread_get_stacksize_np(gThreadList[i].thread);
205 }
206
207 rv = pthread_attr_destroy(&attr);
208 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_destroy");
209
210 for (uint32_t i = 1; i < nthreads; i++) {
211 kr = semaphore_wait(gMainWaitForWorkers);
212 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait()");
213 }
214
215 for (uint32_t i = 0; i < nthreads; i++) {
216 T_LOG("Thread %p (0x%llx) checked in, stack %p/%p\n",
217 (void*)gThreadList[i].thread,
218 gThreadList[i].tid,
219 gThreadList[i].stack_addr,
220 (void *)gThreadList[i].stack_size);
221 }
222
223 sigset_t sigmk;
224 sigemptyset(&sigmk);
225
226 struct sigaction sigact = {
227 .sa_sigaction = alrm_handler,
228 .sa_mask = sigmk,
229 .sa_flags = SA_SIGINFO,
230 };
231
232 rv = sigaction(SIGALRM, &sigact, NULL);
233 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sigaction(SIGALRM)");
234
235 rv = sigaction(SIGVTALRM, &sigact, NULL);
236 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sigaction(SIGVTALRM)");
237
238 rv = sigaction(SIGPROF, &sigact, NULL);
239 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sigaction(SIGPROF)");
240
241 rv = sigaction(SIGXCPU, &sigact, NULL);
242 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sigaction(SIGXCPU)");
243
244 struct itimerval itime = {
245 .it_interval.tv_sec = 0,
246 .it_interval.tv_usec = 10000,
247 .it_value.tv_sec = 0,
248 .it_value.tv_usec = 10, /* immediately */
249 };
250
251 rv = setitimer(ITIMER_REAL, &itime, NULL);
252 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "setitimer(ITIMER_REAL)");
253
254 rv = setitimer(ITIMER_VIRTUAL, &itime, NULL);
255 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "setitimer(ITIMER_REAL)");
256
257 rv = setitimer(ITIMER_PROF, &itime, NULL);
258 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "setitimer(ITIMER_REAL)");
259
260 struct rlimit rlim = {};
261
262 rv = getrlimit(RLIMIT_CPU, &rlim);
263 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "getrlimit(RLIMIT_CPU)");
264
265 rlim.rlim_cur = 1;
266 rv = setrlimit(RLIMIT_CPU, &rlim);
267 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "setrlimit(RLIMIT_CPU)");
268
269 rv = pthread_mutex_lock(&gShouldExitMutex);
270 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_mutex_lock(&gShouldExitMutex)");
271
272 kr = semaphore_signal_all(gWorkersStart);
273 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all()");
274
275 struct timespec timenow = {};
276 uint64_t time_start;
277
278 kr = mach_get_times(&time_start, NULL, &timenow);
279 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_get_times()");
280
281 struct timespec timeout = {
282 .tv_sec = timenow.tv_sec + 10,
283 .tv_nsec = timenow.tv_nsec,
284 };
285
286 uint64_t time_end = 0;
287
288 do {
289 assert(os_atomic_load(&gShouldExit, relaxed) == false);
290
291 rv = pthread_cond_timedwait(&gShouldExitCondition, &gShouldExitMutex, &timeout);
292 if (rv == ETIMEDOUT) {
293 os_atomic_store(&gShouldExit, true, relaxed);
294
295 time_end = mach_absolute_time();
296
297 struct itimerval itime_stop = {
298 .it_interval.tv_sec = 0,
299 .it_interval.tv_usec = 0,
300 .it_value.tv_sec = 0,
301 .it_value.tv_usec = 0, /* stop immediately */
302 };
303
304 rv = setitimer(ITIMER_REAL, &itime_stop, NULL);
305 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "setitimer(ITIMER_REAL)");
306
307 rv = setitimer(ITIMER_VIRTUAL, &itime_stop, NULL);
308 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "setitimer(ITIMER_VIRTUAL)");
309
310 rv = setitimer(ITIMER_PROF, &itime_stop, NULL);
311 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "setitimer(ITIMER_PROF)");
312
313 break;
314 } else {
315 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_cond_timedwait(&gShouldExitCondition, ...)");
316 }
317 } while (true);
318
319 rv = pthread_mutex_unlock(&gShouldExitMutex);
320 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_mutex_unlock(&gShouldExitMutex)");
321
322 for (uint32_t i = 1; i < nthreads; i++) {
323 rv = pthread_join(gThreadList[i].thread, NULL);
324 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_join");
325 }
326
327 uint64_t test_duration = time_end - time_start;
328 uint64_t test_duration_ns = abs_to_nanos(test_duration);
329
330 double elapsed_secs = (double) test_duration_ns / (uint64_t)NSEC_PER_SEC;
331
332 T_LOG("test duration %3.3f seconds\n", elapsed_secs);
333
334 fill_thread_stats(0);
335
336 struct rusage_info_v6 ru = {};
337 rv = proc_pid_rusage(getpid(), RUSAGE_INFO_V6, (rusage_info_t *)&ru);
338 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "proc_pid_rusage");
339
340 uint64_t total_user_time_ns = abs_to_nanos_host(ru.ri_user_time);
341 double total_user_time_s = (double)total_user_time_ns / (uint64_t)NSEC_PER_SEC;
342
343 uint64_t total_system_time_ns = abs_to_nanos_host(ru.ri_system_time);
344 double total_system_time_s = (double)total_system_time_ns / (uint64_t)NSEC_PER_SEC;
345
346 uint64_t total_time_ns = (total_user_time_ns + total_system_time_ns);
347 double total_time_s = (double)total_time_ns / (uint64_t)NSEC_PER_SEC;
348
349 uint64_t total_runnable_time_ns = abs_to_nanos_host(ru.ri_runnable_time);
350 double total_runnable_time_s = (double)total_runnable_time_ns / (uint64_t)NSEC_PER_SEC;
351
352 uint64_t total_pending_time_ns = total_runnable_time_ns - (total_time_ns);
353 double total_pending_time_s = (double)total_pending_time_ns / (uint64_t)NSEC_PER_SEC;
354
355 uint64_t total_p_time_ns = abs_to_nanos_host(ru.ri_user_ptime + ru.ri_system_ptime);
356 double total_p_time_s = (double)total_p_time_ns / (uint64_t)NSEC_PER_SEC;
357
358 T_LOG("total usage: time: %3.3f user: %3.3f kernel: %3.3f runnable: %3.3f pending: %3.3f pcore: %3.3f\n",
359 total_time_s, total_user_time_s, total_system_time_s,
360 total_runnable_time_s, total_pending_time_s,
361 total_p_time_s);
362
363 /*
364 * "Good" data looks like:
365 *
366 * total usage: time: 77.696 user: 16.570 kernel: 61.126 runnable: 79.951 pending: 2.255 pcore: 72.719
367 * Thread ALRM VTALRM PROF XCPU inst cycle user kernel offcore type
368 * 0x16f78f000 0 251 811 0 27680301973 28913501188 3706622958 ( 38.14%) 6012631083 ( 61.86%) 2.81% statfs
369 * 0x16f81b000 0 2 889 0 27962710058 28780576123 439297291 ( 4.53%) 9259942583 ( 95.47%) 3.01% stat
370 * 0x16f8a7000 0 251 836 0 27558331077 28889228535 3699010000 ( 38.08%) 6016015083 ( 61.92%) 2.85% statfs
371 * 0x16f933000 0 0 939 0 28078084696 28880195679 443067500 ( 4.56%) 9269807666 ( 95.44%) 2.87% stat
372 * 0x16f9bf000 0 283 874 0 27691851016 28969873070 3710916750 ( 38.16%) 6012783541 ( 61.84%) 2.76% statfs
373 * 0x16fa4b000 0 2 908 1 27945063330 28769971396 438583000 ( 4.53%) 9252694291 ( 95.47%) 3.09% stat
374 * 0x16fad7000 0 262 889 0 27328496429 28772748055 3689245375 ( 38.03%) 6011061458 ( 61.97%) 3.00% statfs
375 * 0x16fb63000 0 0 914 0 27942195343 28757254100 439690166 ( 4.53%) 9256659500 ( 95.47%) 3.04% stat
376 * 0x1fe2bb400 1001 0 3 0 72144372 102339334 3532125 ( 9.35%) 34249208 ( 90.65%) 99.62% main
377 */
378 uint32_t total_alrm = 0;
379 uint32_t total_vtalrm = 0;
380 uint32_t total_prof = 0;
381 uint32_t total_xcpu = 0;
382 uint32_t total_vtalrm_in_cpubound = 0;
383
384 uint32_t total_threads_not_finding_cpus = 0;
385
386 T_LOG("Thread ALRM VTALRM PROF XCPU "
387 "inst cycle user kernel "
388 "offcore type\n");
389
390 for (uint32_t i = 0; i < nthreads; i++) {
391 uint64_t user_time = abs_to_nanos_host(gThreadList[i].self_stats.ttci_user_time_mach);
392 uint64_t system_time = abs_to_nanos_host(gThreadList[i].self_stats.ttci_system_time_mach);
393
394
395 uint64_t total_time = user_time + system_time;
396
397 double percentage_user = (double)user_time / (double) total_time * 100;
398 double percentage_system = (double)system_time / (double) total_time * 100;
399 double percentage_not_running = (double)(test_duration_ns - total_time) / (double) test_duration_ns * 100;
400
401 char* thread_type_str = "";
402 char* warning_str = "";
403
404 if (i == 0) {
405 thread_type_str = "main ";
406 } else {
407 thread_type_str = i % 2 ? "stat " : "statfs ";
408
409 if (percentage_not_running > offcore_time_percent_threshold) {
410 total_threads_not_finding_cpus++;
411 warning_str = "** too much offcore time **";
412 }
413 }
414
415 T_LOG("0x%010llx %6d %6d %6d %6d %12lld %12lld %12lld (%7.2f%%) %12lld (%7.2f%%) %7.2f%% %s%s\n",
416 gThreadList[i].tid,
417 gThreadList[i].alrm_count,
418 gThreadList[i].vtalrm_count,
419 gThreadList[i].prof_count,
420 gThreadList[i].xcpu_count,
421 gThreadList[i].self_stats.ttci_instructions,
422 gThreadList[i].self_stats.ttci_cycles,
423 user_time, percentage_user,
424 system_time, percentage_system,
425 percentage_not_running,
426 thread_type_str, warning_str);
427
428 total_alrm += gThreadList[i].alrm_count;
429 total_vtalrm += gThreadList[i].vtalrm_count;
430 total_prof += gThreadList[i].prof_count;
431 total_xcpu += gThreadList[i].xcpu_count;
432
433 if (gThreadList[i].expect_cpu_usage) {
434 total_vtalrm_in_cpubound += gThreadList[i].vtalrm_count;
435 }
436 }
437
438 /*
439 * We expect all SIGALRM to go to the main thread, because it is the
440 * first thread in the process with the signal unmasked, and we
441 * never expect the signal handler itself to take >10ms
442 *
443 * This can happen if the main thread is preempted for the entire 10ms duration, though.
444 * Being high priority, it shouldn't be delayed for more than 10ms too often.
445 * Allow up to 10% to deliver to other threads.
446 */
447 if ((double)gThreadList[0].alrm_count * 100 / total_alrm < 90.0) {
448 T_FAIL("SIGALRM delivered to non-main thread more than 10%% of the time (%d of %d)",
449 gThreadList[0].alrm_count,
450 total_alrm);
451 }
452
453 /* We expect all worker threads to find CPUs of their own for most of the test */
454 if (total_threads_not_finding_cpus != 0) {
455 T_FAIL("%d worker threads spent more than %2.0f%% of time off-core",
456 total_threads_not_finding_cpus, offcore_time_percent_threshold);
457 }
458
459 /*
460 * SIGVTALRM is delivered based on user time, and we expect the busy
461 * threads to have an advantage and account for 80% (non-scientific) of events,
462 * since the other threads will spend more time in kernel mode.
463 */
464 if (total_vtalrm_in_cpubound * 100 / total_vtalrm < 80) {
465 T_FAIL("SIGVTALRM delivered to threads without extra userspace spin (only %d of %d)",
466 total_vtalrm_in_cpubound, total_vtalrm);
467 }
468
469 /*
470 * SIGPROF is delivered based on user+system time, and we expect it to be distributed
471 * among non-blocked threads (so not the main thread, which only handles SIGALRM).
472 */
473 if (gThreadList[0].prof_count * 100 / total_prof > 1) {
474 T_FAIL("SIGPROF delivered to main thread more than 1%% (%d of %d)",
475 gThreadList[0].prof_count,
476 total_prof);
477 }
478
479 /*
480 * SIGXCPU should be delivered exactly once.
481 */
482 if (total_xcpu == 0) {
483 T_FAIL("SIGXCPU delivered %d times (expected at least once)", total_xcpu);
484 }
485 }
486
487 static void *
stat_thread(void * arg)488 stat_thread(void *arg)
489 {
490 kern_return_t kr;
491 int rv;
492
493 /* This wait can be aborted by one of the signals, so we make sure to wait for the first iteration of main */
494 kr = semaphore_wait_signal(gWorkersStart, gMainWaitForWorkers);
495 if (kr != KERN_ABORTED) {
496 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal()");
497 }
498
499 rv = pthread_mutex_lock(&gShouldExitMutex);
500 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_mutex_lock(&gShouldExitMutex)");
501 rv = pthread_mutex_unlock(&gShouldExitMutex);
502 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_mutex_unlock(&gShouldExitMutex)");
503
504 do {
505 struct stat sb;
506
507 rv = stat("/", &sb);
508 if (rv != 0) {
509 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "stat");
510 }
511 } while (os_atomic_load(&gShouldExit, relaxed) == false);
512
513 fill_thread_stats((uint32_t)(uintptr_t)arg);
514
515 return NULL;
516 }
517
518 static void *
statfs_thread(void * arg)519 statfs_thread(void *arg)
520 {
521 kern_return_t kr;
522 uint64_t previous_spin_timestamp;
523 int iteration = 0;
524 int rv;
525
526 /* This wait can be aborted by one of the signals, so we make sure to wait for the first iteration of main */
527 kr = semaphore_wait_signal(gWorkersStart, gMainWaitForWorkers);
528 if (kr != KERN_ABORTED) {
529 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal()");
530 }
531
532 rv = pthread_mutex_lock(&gShouldExitMutex);
533 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_mutex_lock(&gShouldExitMutex)");
534 rv = pthread_mutex_unlock(&gShouldExitMutex);
535 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_mutex_unlock(&gShouldExitMutex)");
536
537 previous_spin_timestamp = mach_absolute_time();
538
539 do {
540 struct statfs sf;
541
542 /*
543 * Every so many system calls, inject a spin in userspace
544 * proportional to how much time was spent performing the
545 * system calls.
546 */
547 #define SYSCALL_ITERATIONS_BETWEEN_SPINS (10000)
548 if (++iteration % SYSCALL_ITERATIONS_BETWEEN_SPINS == 0) {
549 uint64_t now = mach_absolute_time();
550 uint64_t spin_deadline = now + (now - previous_spin_timestamp) / 2;
551
552 while (mach_absolute_time() < spin_deadline) {
553 ;
554 }
555
556 previous_spin_timestamp = mach_absolute_time();
557 }
558
559 rv = statfs("/", &sf);
560 if (rv != 0) {
561 T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "statfs");
562 }
563 } while (os_atomic_load(&gShouldExit, relaxed) == false);
564
565 fill_thread_stats((uint32_t)(uintptr_t)arg);
566
567 return NULL;
568 }
569
570 static void
alrm_handler(int signum,struct __siginfo * info __unused,void * uap)571 alrm_handler(int signum, struct __siginfo *info __unused, void *uap)
572 {
573 ucontext_t *context = (ucontext_t *)uap;
574 struct threadentry *entry = NULL;
575 void *sp;
576
577 #if defined(__arm64__)
578 sp = (void *)__darwin_arm_thread_state64_get_sp((context->uc_mcontext)->__ss);
579 #elif defined(__i386__)
580 sp = (void *)(context->uc_mcontext)->__ss.__esp;
581 #elif defined(__x86_64__)
582 sp = (void *)(context->uc_mcontext)->__ss.__rsp;
583 #else
584 #error Unrecognized architecture
585 #endif
586
587 for (uint32_t i = 0; i < nworkers + 1; i++) {
588 struct threadentry *t = &gThreadList[i];
589 if (((uintptr_t)sp >= ((uintptr_t)t->stack_addr - t->stack_size) &&
590 ((uintptr_t)sp < (uintptr_t)t->stack_addr))) {
591 entry = t;
592 break;
593 }
594 }
595
596 if (entry == NULL) {
597 T_ASSERT_FAIL("Signal %d delivered to unknown thread, SP=%p", signum, sp);
598 }
599
600 switch (signum) {
601 case SIGALRM:
602 os_atomic_inc(&entry->alrm_count, relaxed);
603 break;
604 case SIGVTALRM:
605 os_atomic_inc(&entry->vtalrm_count, relaxed);
606 break;
607 case SIGPROF:
608 os_atomic_inc(&entry->prof_count, relaxed);
609 break;
610 case SIGXCPU:
611 os_atomic_inc(&entry->xcpu_count, relaxed);
612 break;
613 }
614 }
615
616 // When the SIGPROF signal was received.
617 static uint64_t sigprof_received_ns = 0;
618
619 static const uint64_t ITIMER_PROF_SECS = 2;
620 #define PROF_EXTRA_THREAD_COUNT (1)
621 // Include the main thread as participating in consuming CPU.
622 static const uint64_t EXPECTED_PROF_DURATION_SECS =
623 ITIMER_PROF_SECS / (PROF_EXTRA_THREAD_COUNT + 1);
624 static const uint64_t EXTRA_TIMEOUT_SECS = 1;
625
626 struct cpu_spin_args {
627 bool spin_while_true;
628 };
629
630 static void *
cpu_thread_main(void * arg)631 cpu_thread_main(void *arg)
632 {
633 bool *spin = arg;
634 while (*spin) {
635 }
636 return NULL;
637 }
638
639 static void
sigprof_received(int __unused sig)640 sigprof_received(int __unused sig)
641 {
642 sigprof_received_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
643 }
644
645 T_DECL(setitimer_prof,
646 "ensure a single-threaded process doesn't receive early signals",
647 T_META_TAG_VM_PREFERRED)
648 {
649 T_SETUPBEGIN;
650 (void)signal(SIGPROF, sigprof_received);
651
652 uint64_t start_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
653 uint64_t expected_end_ns = start_ns + (ITIMER_PROF_SECS * NSEC_PER_SEC);
654 uint64_t end_timeout_ns = expected_end_ns + (EXTRA_TIMEOUT_SECS * NSEC_PER_SEC);
655
656 struct itimerval prof_timer = {
657 .it_value.tv_sec = ITIMER_PROF_SECS,
658 };
659
660 int ret = setitimer(ITIMER_PROF, &prof_timer, NULL);
661 T_ASSERT_POSIX_SUCCESS(ret, "setitimer(ITIMER_PROF, %llus)",
662 ITIMER_PROF_SECS);
663 T_SETUPEND;
664
665 uint64_t last_ns = 0;
666 while ((last_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC)) < end_timeout_ns) {
667 if (sigprof_received_ns) {
668 break;
669 }
670 }
671
672 T_EXPECT_LE(last_ns, end_timeout_ns, "received SIGPROF within the timeout (%.9gs < %.9gs)",
673 (double)(last_ns - start_ns) / 1e9, (double)(end_timeout_ns - start_ns) / 1e9);
674 T_LOG("SIGPROF was delivered %+.6gms after expected duration",
675 (double)(last_ns - expected_end_ns) / 1e6);
676 T_EXPECT_GE(last_ns, expected_end_ns, "received SIGPROF after enough time (%.9gs > %.9gs)",
677 (double)(last_ns - start_ns) / 1e9, (double)(expected_end_ns - start_ns) / 1e9);
678 }
679
680 T_DECL(setitimer_prof_multi_threaded,
681 "ensure a multi-threaded process doesn't receive early signals",
682 T_META_TAG_VM_PREFERRED)
683 {
684 T_SETUPBEGIN;
685 (void)signal(SIGPROF, sigprof_received);
686
687 pthread_t cpu_threads[PROF_EXTRA_THREAD_COUNT] = { 0 };
688 bool spin_while_true = true;
689 for (unsigned int i = 0; i < PROF_EXTRA_THREAD_COUNT; i++) {
690 int error = pthread_create(&cpu_threads[i], NULL, cpu_thread_main, &spin_while_true);
691 T_QUIET; T_ASSERT_POSIX_ZERO(error, "create thread %d", i);
692 }
693 T_LOG("spinning %d threads on CPU", PROF_EXTRA_THREAD_COUNT + 1);
694
695 uint64_t start_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
696 uint64_t expected_end_ns = start_ns + (EXPECTED_PROF_DURATION_SECS * NSEC_PER_SEC);
697 uint64_t end_timeout_ns = expected_end_ns + (EXTRA_TIMEOUT_SECS * NSEC_PER_SEC);
698
699 struct itimerval prof_timer = {
700 .it_value.tv_sec = ITIMER_PROF_SECS,
701 };
702
703 int ret = setitimer(ITIMER_PROF, &prof_timer, NULL);
704 T_ASSERT_POSIX_SUCCESS(ret, "setitimer(ITIMER_PROF, %llus)",
705 ITIMER_PROF_SECS);
706 T_SETUPEND;
707
708 uint64_t last_ns = 0;
709 while ((last_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC)) < end_timeout_ns) {
710 if (sigprof_received_ns) {
711 break;
712 }
713 }
714
715 spin_while_true = false;
716 T_EXPECT_LE(last_ns, end_timeout_ns, "received SIGPROF within the timeout (%.9gs < %.9gs)",
717 (double)(last_ns - start_ns) / 1e9, (double)(end_timeout_ns - start_ns) / 1e9);
718 T_LOG("SIGPROF was delivered %+.6gms after expected duration",
719 (double)(last_ns - expected_end_ns) / 1e6);
720 T_EXPECT_GE(last_ns, expected_end_ns, "received SIGPROF after enough time (%.9gs > %.9gs)",
721 (double)(last_ns - start_ns) / 1e9, (double)(expected_end_ns - start_ns) / 1e9);
722 }
723
724 static uint64_t sigprof_received_usage_mach = 0;
725 extern uint64_t __thread_selfusage(void);
726 static int const ITERATION_COUNT = 50;
727 static uint64_t const ITIMER_PERF_PROF_US = 50000;
728
729 static void
sigprof_received_usage(int __unused sig)730 sigprof_received_usage(int __unused sig)
731 {
732 sigprof_received_usage_mach = __thread_selfusage();
733 }
734
735 T_DECL(setitimer_prof_latency,
736 "measure the latency of delivering SIGPROF",
737 T_META_TAG_PERF, T_META_TAG_VM_NOT_ELIGIBLE,
738 T_META_TIMEOUT(10))
739 {
740 T_SETUPBEGIN;
741 (void)signal(SIGPROF, sigprof_received_usage);
742
743 struct itimerval prof_timer = {
744 .it_value.tv_usec = ITIMER_PERF_PROF_US,
745 };
746 mach_timebase_info_data_t timebase = { 0 };
747 int ret = mach_timebase_info(&timebase);
748 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "mach_timebase_info");
749
750 char pd_path[PATH_MAX] = "";
751 pdwriter_t pd = pdwriter_open_tmp("xnu", "setitimer_sigprof_latency", 1, 0, pd_path, sizeof(pd_path));
752 T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(pd, "opened perfdata file");
753 T_SETUPEND;
754
755 T_LOG("running %d iterations, %lluus each", ITERATION_COUNT, ITIMER_PERF_PROF_US);
756 for (int i = 0; i < ITERATION_COUNT; i++) {
757 sigprof_received_usage_mach = 0;
758
759 uint64_t const start_usage_mach = __thread_selfusage();
760
761 ret = setitimer(ITIMER_PROF, &prof_timer, NULL);
762 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "setitimer(ITIMER_PROF, %llus)",
763 ITIMER_PROF_SECS);
764
765 while (sigprof_received_usage_mach == 0) {
766 // XXX ITIMER_PROF only re-evaluates the timers at user/kernel boundaries.
767 // Issue a trivial syscall (getpid has a fast path without making a syscall)
768 // to ensure the system checks the timer.
769 (void)getppid();
770 }
771
772 uint64_t const end_usage_mach = start_usage_mach +
773 (ITIMER_PERF_PROF_US * NSEC_PER_USEC) * timebase.denom / timebase.numer;
774 uint64_t const latency_mach = sigprof_received_usage_mach - end_usage_mach;
775 uint64_t const latency_ns = latency_mach * timebase.denom / timebase.numer;
776
777 pdwriter_new_value(pd, "sigprof_latency", pdunit_ns, latency_ns);
778 }
779
780 pdwriter_close(pd);
781 T_LOG("wrote perfdata to %s", pd_path);
782 }
783