1 #include <stdio.h>
2 #include <mach/mach_vm.h>
3 #include <mach/mach_port.h>
4 #include <mach/mach_host.h>
5 #include <mach/mach_error.h>
6 #include <mach-o/dyld.h>
7 #include <sys/sysctl.h>
8 #include <sys/kdebug.h>
9 #include <sys/mman.h>
10 #include <sys/kern_memorystatus.h>
11 #include <ktrace/session.h>
12 #include <dispatch/private.h>
13
14 #ifdef T_NAMESPACE
15 #undef T_NAMESPACE
16 #endif
17 #include <darwintest.h>
18 #include <darwintest_utils.h>
19
20 T_GLOBAL_META(
21 T_META_NAMESPACE("xnu.vm"),
22 T_META_RADAR_COMPONENT_NAME("xnu"),
23 T_META_RADAR_COMPONENT_VERSION("VM"),
24 T_META_CHECK_LEAKS(false)
25 );
26
27 #define TIMEOUT_SECS 10 * 60 /* abort if test takes > 10 minutes */
28
29 #if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
30 #define ALLOCATION_SIZE_VM_REGION (16*1024) /* 16 KB */
31 #define ALLOCATION_SIZE_VM_OBJECT ALLOCATION_SIZE_VM_REGION
32 #else
33 #define ALLOCATION_SIZE_VM_REGION (1024*1024*100) /* 100 MB */
34 #define ALLOCATION_SIZE_VM_OBJECT (16*1024) /* 16 KB */
35 #endif
36 #define MAX_CHILD_PROCS 100
37
38 #define NUM_GIVE_BACK 5
39 #define NUM_GIVE_BACK_PORTS 20
40
41 /* 60% is too high on bridgeOS to achieve without vm-pageshortage jetsams. Set it to 40%. */
42 #if TARGET_OS_BRIDGE
43 #define ZONEMAP_JETSAM_LIMIT_SYSCTL "kern.zone_map_jetsam_limit=40"
44 #else
45 #define ZONEMAP_JETSAM_LIMIT_SYSCTL "kern.zone_map_jetsam_limit=60"
46 #endif
47
48 #define VME_ZONE_TEST_OPT "allocate_vm_regions"
49 #define VM_OBJECTS_ZONE_TEST_OPT "allocate_vm_objects"
50 #define GENERIC_ZONE_TEST_OPT "allocate_from_generic_zone"
51
52 #define VME_ZONE "VM map entries"
53 #define VMOBJECTS_ZONE "vm objects"
54 #define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
55
56 #define VM_TAG1 100
57 #define VM_TAG2 101
58
59 enum {
60 VME_ZONE_TEST = 0,
61 VM_OBJECTS_ZONE_TEST,
62 GENERIC_ZONE_TEST,
63 };
64
65 typedef struct test_config_struct {
66 int test_index;
67 int num_zones;
68 const char *helper_func;
69 mach_zone_name_array_t zone_names;
70 } test_config_struct;
71
72 static test_config_struct current_test;
73 static dispatch_source_t ds_signal = NULL;
74 static dispatch_source_t ds_timer = NULL;
75 static dispatch_queue_t dq_spawn = NULL;
76 static ktrace_session_t session = NULL;
77
78 static mach_zone_info_array_t zone_info_array = NULL;
79 static mach_zone_name_t largest_zone_name;
80 static mach_zone_info_t largest_zone_info;
81
82 static pthread_mutex_t test_mtx = PTHREAD_MUTEX_INITIALIZER; /* protects the next 3 things */
83 static bool test_ending = false;
84 static int num_children = 0;
85 static pid_t child_pids[MAX_CHILD_PROCS];
86
87 static char testpath[PATH_MAX];
88 static void allocate_vm_stuff(int);
89 static void allocate_from_generic_zone(void);
90 static void begin_test_teardown(void);
91 static void cleanup_and_end_test(void);
92 static void setup_ktrace_session(void);
93 static void spawn_child_process(void);
94 static void run_test(void);
95 static bool verify_generic_jetsam_criteria(void);
96 static bool vme_zone_compares_to_vm_objects(void);
97 static void query_zone_info(void);
98 static void print_zone_info(mach_zone_name_t *zn, mach_zone_info_t *zi);
99
100 extern void mach_zone_force_gc(host_t host);
101 extern kern_return_t mach_zone_info_for_largest_zone(
102 host_priv_t host,
103 mach_zone_name_t *name,
104 mach_zone_info_t *info
105 );
106
107 static bool
check_time(time_t start,int timeout)108 check_time(time_t start, int timeout)
109 {
110 return start + timeout < time(NULL);
111 }
112
113 /*
114 * flag values for allocate_vm_stuff()
115 */
116 #define REGIONS 1
117 #define OBJECTS 2
118
119 static void
allocate_vm_stuff(int flags)120 allocate_vm_stuff(int flags)
121 {
122 uint64_t alloc_size, i;
123 time_t start = time(NULL);
124 mach_vm_address_t give_back[NUM_GIVE_BACK];
125 char *msg;
126
127 if (flags == REGIONS) {
128 alloc_size = ALLOCATION_SIZE_VM_REGION;
129 msg = "";
130 } else {
131 alloc_size = ALLOCATION_SIZE_VM_OBJECT;
132 msg = " each region backed by a VM object";
133 }
134
135 printf("[%d] Allocating VM regions, each of size %lld KB%s\n", getpid(), (alloc_size >> 10), msg);
136
137 for (i = 0;; i++) {
138 mach_vm_address_t addr = (mach_vm_address_t)NULL;
139
140 /* Alternate VM tags between consecutive regions to prevent coalescing */
141 int vmflags = VM_MAKE_TAG((i % 2)? VM_TAG1: VM_TAG2) | VM_FLAGS_ANYWHERE;
142
143 if ((mach_vm_allocate(mach_task_self(), &addr, (mach_vm_size_t)alloc_size, vmflags)) != KERN_SUCCESS) {
144 break;
145 }
146
147 /*
148 * If interested in objects, touch the region so the VM object is created,
149 * then free this page. Keeps us from holding a lot of dirty pages.
150 */
151 if (flags == OBJECTS) {
152 *((int *)addr) = 0;
153 madvise((void *)addr, (size_t)alloc_size, MADV_FREE);
154 }
155
156 if (check_time(start, TIMEOUT_SECS)) {
157 printf("[%d] child timeout during allocations\n", getpid());
158 exit(0);
159 }
160
161 if (i < NUM_GIVE_BACK) {
162 give_back[i] = addr;
163 }
164 }
165
166 /* return some of the resource to avoid O-O-M problems */
167 for (uint64_t j = 0; j < NUM_GIVE_BACK && j < i; ++j) {
168 mach_vm_deallocate(mach_task_self(), give_back[j], (mach_vm_size_t)alloc_size);
169 }
170
171 printf("[%d] Number of allocations: %lld\n", getpid(), i);
172
173 /* Signal to the parent that we're done allocating */
174 kill(getppid(), SIGUSR1);
175
176 while (1) {
177 usleep(500 * 1000);
178 /* Exit if parent has exited. Ensures child processes don't linger around after the test exits */
179 if (getppid() == 1) {
180 exit(0);
181 }
182 if (check_time(start, TIMEOUT_SECS)) {
183 printf("[%d] child timeout while waiting\n", getpid());
184 exit(0);
185 }
186 }
187 }
188
189
190 static void
allocate_from_generic_zone(void)191 allocate_from_generic_zone(void)
192 {
193 uint64_t i = 0;
194 time_t start = time(NULL);
195 mach_port_t give_back[NUM_GIVE_BACK_PORTS];
196 int old_limit = 0;
197
198 printf("[%d] Allocating mach_ports\n", getpid());
199
200 size_t size = sizeof(old_limit);
201 int kr = sysctlbyname("machdep.max_port_table_size", &old_limit, &size, NULL, 0);
202 T_QUIET; T_ASSERT_POSIX_SUCCESS(kr, "sysctl kern.max_port_table_size failed");
203 T_LOG("machdep.max_port_table_size = %d", old_limit);
204
205 /* Avoid hitting the resource limit exception */
206 uint64_t limit = (uint64_t)(old_limit * 7 / 8);
207
208 for (i = 0; i < limit; i++) {
209 mach_port_t port;
210
211 if ((mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port)) != KERN_SUCCESS) {
212 break;
213 }
214
215 if (check_time(start, TIMEOUT_SECS)) {
216 printf("[%d] child timeout during allocations\n", getpid());
217 exit(0);
218 }
219
220 if (i < NUM_GIVE_BACK_PORTS) {
221 give_back[i] = port;
222 }
223 }
224
225 /* return some of the resource to avoid O-O-M problems */
226 for (uint64_t j = 0; j < NUM_GIVE_BACK_PORTS && j < i; ++j) {
227 int ret;
228 ret = mach_port_mod_refs(mach_task_self(), give_back[j], MACH_PORT_RIGHT_RECEIVE, -1);
229 T_ASSERT_MACH_SUCCESS(ret, "mach_port_mod_refs(RECV_RIGHT, -1)");
230 }
231 printf("[%d] Number of allocations: %lld\n", getpid(), i);
232
233 /* Signal to the parent that we're done allocating */
234 kill(getppid(), SIGUSR1);
235
236 while (1) {
237 usleep(500 * 1000);
238 /* Exit if parent has exited. Ensures child processes don't linger around after the test exits */
239 if (getppid() == 1) {
240 exit(0);
241 }
242
243 if (check_time(start, TIMEOUT_SECS)) {
244 printf("[%d] child timeout while waiting\n", getpid());
245 exit(0);
246 }
247 }
248 }
249
250 static void
print_zone_info(mach_zone_name_t * zn,mach_zone_info_t * zi)251 print_zone_info(mach_zone_name_t *zn, mach_zone_info_t *zi)
252 {
253 T_LOG("ZONE NAME: %-35sSIZE: %-25lluELEMENTS: %llu",
254 zn->mzn_name, zi->mzi_cur_size, zi->mzi_count);
255 }
256
257 static time_t main_start;
258
259 static void
query_zone_info(void)260 query_zone_info(void)
261 {
262 int i;
263 kern_return_t kr;
264 static uint64_t num_calls = 0;
265
266 if (check_time(main_start, TIMEOUT_SECS)) {
267 T_ASSERT_FAIL("Global timeout expired");
268 }
269 for (i = 0; i < current_test.num_zones; i++) {
270 kr = mach_zone_info_for_zone(mach_host_self(), current_test.zone_names[i], &(zone_info_array[i]));
271 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_zone_info_for_zone(%s) returned %d [%s]", current_test.zone_names[i].mzn_name, kr, mach_error_string(kr));
272 }
273 kr = mach_zone_info_for_largest_zone(mach_host_self(), &largest_zone_name, &largest_zone_info);
274 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_zone_info_for_largest_zone returned %d [%s]", kr, mach_error_string(kr));
275
276 num_calls++;
277 if (num_calls % 5 != 0) {
278 return;
279 }
280
281 /* Print out size and element count for zones relevant to the test */
282 for (i = 0; i < current_test.num_zones; i++) {
283 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
284 }
285 }
286
287 static bool
vme_zone_compares_to_vm_objects(void)288 vme_zone_compares_to_vm_objects(void)
289 {
290 int i;
291 uint64_t vm_object_element_count = 0, vm_map_entry_element_count = 0;
292
293 T_LOG("Comparing element counts of \"VM map entries\" and \"vm objects\" zones");
294 for (i = 0; i < current_test.num_zones; i++) {
295 if (!strcmp(current_test.zone_names[i].mzn_name, VME_ZONE)) {
296 vm_map_entry_element_count = zone_info_array[i].mzi_count;
297 } else if (!strcmp(current_test.zone_names[i].mzn_name, VMOBJECTS_ZONE)) {
298 vm_object_element_count = zone_info_array[i].mzi_count;
299 }
300 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
301 }
302
303 T_LOG("# VM map entries as percentage of # vm objects = %llu", (vm_map_entry_element_count * 100) / vm_object_element_count);
304 if (vm_map_entry_element_count >= ((vm_object_element_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / 100)) {
305 T_LOG("Number of VM map entries is comparable to vm objects\n\n");
306 return true;
307 }
308 T_LOG("Number of VM map entries is NOT comparable to vm objects\n\n");
309 return false;
310 }
311
312 static bool
verify_generic_jetsam_criteria(void)313 verify_generic_jetsam_criteria(void)
314 {
315 T_LOG("Largest zone info");
316 print_zone_info(&largest_zone_name, &largest_zone_info);
317
318 /* If VM map entries is not the largest zone */
319 if (strcmp(largest_zone_name.mzn_name, VME_ZONE)) {
320 /* If vm objects is the largest zone and the VM map entries zone had comparable # of elements, return false */
321 if (!strcmp(largest_zone_name.mzn_name, VMOBJECTS_ZONE) && vme_zone_compares_to_vm_objects()) {
322 return false;
323 }
324 return true;
325 }
326 return false;
327 }
328
329 static void
begin_test_teardown(void)330 begin_test_teardown(void)
331 {
332 int ret, old_limit = 95;
333
334 /*
335 * Restore kern.zone_map_jetsam_limit to the default high value, to prevent further jetsams.
336 * We should change the value of old_limit if ZONE_MAP_JETSAM_LIMIT_DEFAULT changes in the kernel.
337 * We don't have a way to capture what the original value was before the test, because the
338 * T_META_SYSCTL_INT macro will have changed the value before the test starts running.
339 */
340 ret = sysctlbyname("kern.zone_map_jetsam_limit", NULL, NULL, &old_limit, sizeof(old_limit));
341 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_jetsam_limit failed");
342 T_LOG("kern.zone_map_jetsam_limit set to %d%%", old_limit);
343
344
345 /* End ktrace session */
346 if (session != NULL) {
347 T_LOG("Ending ktrace session...");
348 ktrace_end(session, 1);
349 }
350
351 dispatch_sync(dq_spawn, ^{
352 T_LOG("Cancelling dispatch sources...");
353
354 /* Disable the timer that queries and prints zone info periodically */
355 if (ds_timer != NULL) {
356 dispatch_source_cancel(ds_timer);
357 }
358
359 /* Disable signal handler that spawns child processes */
360 if (ds_signal != NULL) {
361 /*
362 * No need for a dispatch_source_cancel_and_wait here.
363 * We're queueing this on the spawn queue, so no further
364 * processes will be spawned after the source is cancelled.
365 */
366 dispatch_source_cancel(ds_signal);
367 }
368 });
369 }
370
371 static void
cleanup_and_end_test(void)372 cleanup_and_end_test(void)
373 {
374 int i;
375
376 /*
377 * The atend handler executes on a different dispatch queue.
378 * We want to do the cleanup only once.
379 */
380 pthread_mutex_lock(&test_mtx);
381 if (test_ending) {
382 pthread_mutex_unlock(&test_mtx);
383 return;
384 }
385 test_ending = TRUE;
386 pthread_mutex_unlock(&test_mtx);
387
388 dispatch_async(dq_spawn, ^{
389 /*
390 * If the test succeeds, we will call dispatch_source_cancel twice, which is fine since
391 * the operation is idempotent. Just make sure to not drop all references to the dispatch sources
392 * (in this case we're not, we have globals holding references to them), or we can end up with
393 * use-after-frees which would be a problem.
394 */
395 /* Disable the timer that queries and prints zone info periodically */
396 if (ds_timer != NULL) {
397 dispatch_source_cancel(ds_timer);
398 }
399
400 /* Disable signal handler that spawns child processes */
401 if (ds_signal != NULL) {
402 dispatch_source_cancel(ds_signal);
403 }
404 });
405
406 pthread_mutex_lock(&test_mtx);
407 T_LOG("Number of processes spawned: %d", num_children);
408 T_LOG("Killing child processes...");
409
410 /* Kill all the child processes that were spawned */
411 for (i = 0; i < num_children; i++) {
412 pid_t pid = child_pids[i];
413 int status = 0;
414
415 /*
416 * Kill and wait for each child to exit
417 * Without this we were seeing hw_lock_bit timeouts in BATS.
418 */
419 kill(pid, SIGKILL);
420 pthread_mutex_unlock(&test_mtx);
421 if (waitpid(pid, &status, 0) < 0) {
422 T_LOG("waitpid returned status %d", status);
423 }
424 pthread_mutex_lock(&test_mtx);
425 }
426 usleep(500 * 1000);
427
428 /* Force zone_gc before starting test for another zone or exiting */
429 mach_zone_force_gc(mach_host_self());
430
431 /* End ktrace session */
432 if (session != NULL) {
433 ktrace_end(session, 1);
434 }
435
436 if (current_test.num_zones > 0) {
437 T_LOG("Relevant zone info at the end of the test:");
438 for (i = 0; i < current_test.num_zones; i++) {
439 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
440 }
441 }
442 }
443
444 static void
setup_ktrace_session(void)445 setup_ktrace_session(void)
446 {
447 int ret = 0;
448
449 T_LOG("Setting up ktrace session...");
450 session = ktrace_session_create();
451 T_QUIET; T_ASSERT_NOTNULL(session, "ktrace_session_create");
452
453 ktrace_set_interactive(session);
454
455 ktrace_set_dropped_events_handler(session, ^{
456 T_FAIL("Dropped ktrace events; might have missed an expected jetsam event. Terminating early.");
457 });
458
459 ktrace_set_completion_handler(session, ^{
460 ktrace_session_destroy(session);
461 T_END;
462 });
463
464 /* Listen for memorystatus_do_kill trace events */
465 ret = ktrace_events_single(session, (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)), ^(ktrace_event_t event) {
466 int i;
467 bool received_jetsam_event = false;
468
469 /*
470 * libktrace does not support DBG_FUNC_START/END in the event filter. It simply ignores it.
471 * So we need to explicitly check for the end event (a successful jetsam kill) here,
472 * instead of passing in ((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START).
473 */
474 if (!(event->debugid & DBG_FUNC_START)) {
475 return;
476 }
477
478 /* Check for zone-map-exhaustion jetsam. */
479 if (event->arg2 == kMemorystatusKilledZoneMapExhaustion) {
480 begin_test_teardown();
481 T_LOG("[memorystatus_do_kill] jetsam reason: zone-map-exhaustion, pid: %d\n\n", (int)event->arg1);
482 if (current_test.test_index == VME_ZONE_TEST || current_test.test_index == VM_OBJECTS_ZONE_TEST) {
483 /*
484 * For the VM map entries zone we try to kill the leaking process.
485 * Verify that we jetsammed one of the processes we spawned.
486 *
487 * For the vm objects zone we pick the leaking process via the VM map entries
488 * zone, if the number of vm objects and VM map entries are comparable.
489 * The test simulates this scenario, we should see a targeted jetsam for the
490 * vm objects zone too.
491 */
492 pthread_mutex_lock(&test_mtx);
493 for (i = 0; i < num_children; i++) {
494 if (child_pids[i] == (pid_t)event->arg1) {
495 received_jetsam_event = true;
496 T_LOG("Received jetsam event for a child");
497 break;
498 }
499 }
500 pthread_mutex_unlock(&test_mtx);
501 /*
502 * If we didn't see a targeted jetsam, verify that the largest zone actually
503 * fulfilled the criteria for generic jetsams.
504 */
505 if (!received_jetsam_event && verify_generic_jetsam_criteria()) {
506 received_jetsam_event = true;
507 T_LOG("Did not receive jetsam event for a child, but generic jetsam criteria holds");
508 }
509 } else {
510 received_jetsam_event = true;
511 T_LOG("Received generic jetsam event");
512 }
513
514 T_QUIET; T_ASSERT_TRUE(received_jetsam_event, "Jetsam event not as expected");
515 } else {
516 /*
517 * The test relies on the children being able to send a signal to the parent, to continue spawning new processes
518 * that leak more zone memory. If a child is jetsammed for some other reason, the parent can get stuck waiting for
519 * a signal from the child, never being able to make progress (We spawn only a single process at a time to rate-limit
520 * the zone memory bloat.). If this happens, the test eventually times out. So if a child is jetsammed for some
521 * reason other than zone-map-exhaustion, end the test early.
522 *
523 * This typically happens when we end up triggering vm-pageshortage jetsams before zone-map-exhaustion jetsams.
524 * Lowering the zone_map_jetsam_limit if the zone map size was initially low should help with this too.
525 * See sysctlbyname("kern.zone_map_jetsam_limit"...) in run_test() below.
526 */
527 pthread_mutex_lock(&test_mtx);
528 for (i = 0; i < num_children; i++) {
529 if (child_pids[i] == (pid_t)event->arg1) {
530 begin_test_teardown();
531 T_PASS("Child pid %d was jetsammed due to reason %d. Terminating early.",
532 (int)event->arg1, (int)event->arg2);
533 }
534 }
535 pthread_mutex_unlock(&test_mtx);
536 }
537 });
538 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "ktrace_events_single");
539
540 ret = ktrace_start(session, dispatch_get_main_queue());
541 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "ktrace_start");
542 }
543
544 static void
query_zone_map_size(uint64_t * current,uint64_t * total)545 query_zone_map_size(uint64_t *current, uint64_t *total)
546 {
547 int ret;
548 uint64_t zstats[2];
549 size_t zstats_size = sizeof(zstats);
550
551 ret = sysctlbyname("kern.zone_map_size_and_capacity", &zstats, &zstats_size, NULL, 0);
552 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_size_and_capacity failed");
553
554 T_LOG("Zone map capacity: %-30lldZone map size: %lld [%lld%% full]", zstats[1], zstats[0], (zstats[0] * 100) / zstats[1]);
555
556 #if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
557 int memstat_level;
558 size_t memstat_level_size = sizeof(memstat_level);
559 ret = sysctlbyname("kern.memorystatus_level", &memstat_level, &memstat_level_size, NULL, 0);
560 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.memorystatus_level failed");
561
562 T_LOG("kern.memorystatus_level = %d%%", memstat_level);
563 #endif
564 if (current) {
565 *current = zstats[0];
566 }
567 if (total) {
568 *total = zstats[1];
569 }
570 }
571
572 static void
spawn_child_process(void)573 spawn_child_process(void)
574 {
575 pid_t pid = -1;
576 char helper_func[50];
577 char *launch_tool_args[4];
578
579 pthread_mutex_lock(&test_mtx);
580 if (!test_ending) {
581 if (num_children == MAX_CHILD_PROCS) {
582 pthread_mutex_unlock(&test_mtx);
583 T_ASSERT_FAIL("Spawned too many children. Aborting test");
584 /* not reached */
585 }
586
587 strlcpy(helper_func, current_test.helper_func, sizeof(helper_func));
588 launch_tool_args[0] = testpath;
589 launch_tool_args[1] = "-n";
590 launch_tool_args[2] = helper_func;
591 launch_tool_args[3] = NULL;
592
593 /* Spawn the child process */
594 int rc = dt_launch_tool(&pid, launch_tool_args, false, NULL, NULL);
595 if (rc != 0) {
596 T_LOG("dt_launch tool returned %d with error code %d", rc, errno);
597 }
598 T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "dt_launch_tool");
599
600 child_pids[num_children++] = pid;
601 }
602 pthread_mutex_unlock(&test_mtx);
603 }
604
605 static void
run_test(void)606 run_test(void)
607 {
608 uint64_t mem;
609 uint32_t testpath_buf_size, pages;
610 int ret, pgsz, old_limit, new_limit = 0;
611 size_t sysctl_size;
612 uint64_t zone_cur, zone_tot, zone_target;
613
614 T_ATEND(cleanup_and_end_test);
615 T_SETUPBEGIN;
616
617 main_start = time(NULL);
618
619 testpath_buf_size = sizeof(testpath);
620 ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
621 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
622 T_LOG("Executable path: %s", testpath);
623
624 sysctl_size = sizeof(mem);
625 ret = sysctlbyname("hw.memsize", &mem, &sysctl_size, NULL, 0);
626 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl hw.memsize failed");
627 T_LOG("hw.memsize: %llu", mem);
628
629 sysctl_size = sizeof(pgsz);
630 ret = sysctlbyname("vm.pagesize", &pgsz, &sysctl_size, NULL, 0);
631 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pagesize failed");
632 T_LOG("vm.pagesize: %d", pgsz);
633
634 sysctl_size = sizeof(pages);
635 ret = sysctlbyname("vm.pages", &pages, &sysctl_size, NULL, 0);
636 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pages failed");
637 T_LOG("vm.pages: %d", pages);
638
639 sysctl_size = sizeof(old_limit);
640 ret = sysctlbyname("kern.zone_map_jetsam_limit", &old_limit, &sysctl_size, NULL, 0);
641 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_jetsam_limit failed");
642 T_LOG("kern.zone_map_jetsam_limit: %d", old_limit);
643
644 /*
645 * In order to start jetsamming "quickly",
646 * set up the limit to be about 2x of what the current usage is.
647 */
648 query_zone_map_size(&zone_cur, &zone_tot);
649 zone_target = zone_cur * 2;
650
651 new_limit = (int)howmany(zone_target * 100, zone_tot);
652
653 if (new_limit < old_limit) {
654 /*
655 * We should be fine messing with the zone_map_jetsam_limit here, i.e. outside of T_META_SYSCTL_INT.
656 * When the test ends, T_META_SYSCTL_INT will restore the zone_map_jetsam_limit to what it was
657 * before the test anyway.
658 */
659 ret = sysctlbyname("kern.zone_map_jetsam_limit", NULL, NULL, &new_limit, sizeof(new_limit));
660 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_jetsam_limit failed");
661 T_LOG("kern.zone_map_jetsam_limit set to %d%%", new_limit);
662 }
663
664 zone_info_array = (mach_zone_info_array_t) calloc((unsigned long)current_test.num_zones, sizeof *zone_info_array);
665
666 /*
667 * If the timeout specified by T_META_TIMEOUT is hit, the atend handler does not get called.
668 * So we're queueing a dispatch block to fire after TIMEOUT_SECS seconds, so we can exit cleanly.
669 */
670 dispatch_after(dispatch_time(DISPATCH_TIME_NOW, TIMEOUT_SECS * NSEC_PER_SEC), dispatch_get_main_queue(), ^{
671 T_ASSERT_FAIL("Timed out after %d seconds", TIMEOUT_SECS);
672 });
673
674 /*
675 * Create a dispatch source for the signal SIGUSR1. When a child is done allocating zone memory, it
676 * sends SIGUSR1 to the parent. Only then does the parent spawn another child. This prevents us from
677 * spawning many children at once and creating a lot of memory pressure.
678 */
679 signal(SIGUSR1, SIG_IGN);
680 dq_spawn = dispatch_queue_create("spawn_queue", DISPATCH_QUEUE_SERIAL);
681 ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dq_spawn);
682 T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create: signal");
683
684 dispatch_source_set_event_handler(ds_signal, ^{
685 uint64_t cur, tot;
686
687 query_zone_map_size(&cur, &tot);
688
689 if (cur + cur / 20 >= zone_target) {
690 /*
691 * Slow down allocation pace when nearing target.
692 */
693 sleep(1);
694 }
695 spawn_child_process();
696 });
697 dispatch_activate(ds_signal);
698
699 /* Timer to query jetsam-relevant zone info every second. Print it every 5 seconds. */
700 ds_timer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_queue_create("timer_queue", NULL));
701 T_QUIET; T_ASSERT_NOTNULL(ds_timer, "dispatch_source_create: timer");
702 dispatch_source_set_timer(ds_timer, dispatch_time(DISPATCH_TIME_NOW, NSEC_PER_SEC), NSEC_PER_SEC, 0);
703
704 dispatch_source_set_event_handler(ds_timer, ^{
705 query_zone_info();
706 });
707 dispatch_activate(ds_timer);
708
709 /* Set up a ktrace session to listen for jetsam events */
710 setup_ktrace_session();
711
712 T_SETUPEND;
713
714 /* Spawn the first child process */
715 T_LOG("Spawning child processes to allocate zone memory...\n\n");
716 spawn_child_process();
717
718 dispatch_main();
719 }
720
721 static void
move_to_idle_band(void)722 move_to_idle_band(void)
723 {
724 memorystatus_priority_properties_t props;
725
726 /*
727 * We want to move the processes we spawn into the idle band, so that jetsam can target them first.
728 * This prevents other important BATS tasks from getting killed, specially in LTE where we have very few
729 * processes running.
730 *
731 * This is only needed for tests which (are likely to) lead us down the generic jetsam path.
732 */
733 props.priority = JETSAM_PRIORITY_IDLE;
734 props.user_data = 0;
735
736 if (memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, getpid(), 0, &props, sizeof(props))) {
737 printf("memorystatus call to change jetsam priority failed\n");
738 exit(-1);
739 }
740 }
741
742 T_HELPER_DECL(allocate_vm_regions, "allocates VM regions")
743 {
744 move_to_idle_band();
745 allocate_vm_stuff(REGIONS);
746 }
747
748 T_HELPER_DECL(allocate_vm_objects, "allocates VM objects and VM regions")
749 {
750 move_to_idle_band();
751 allocate_vm_stuff(OBJECTS);
752 }
753
754 T_HELPER_DECL(allocate_from_generic_zone, "allocates from a generic zone")
755 {
756 move_to_idle_band();
757 allocate_from_generic_zone();
758 }
759
760 /*
761 * T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL) changes the zone_map_jetsam_limit to a
762 * lower value, so that the test can complete faster.
763 * The test allocates zone memory pretty aggressively which can cause the system to panic
764 * if the jetsam limit is quite high; a lower value keeps us from panicking.
765 */
766 T_DECL( memorystatus_vme_zone_test,
767 "allocates elements from the VM map entries zone, verifies zone-map-exhaustion jetsams",
768 T_META_ASROOT(true),
769 T_META_TIMEOUT(1800),
770 /* T_META_LTEPHASE(LTE_POSTINIT),
771 */
772 T_META_REQUIRES_SYSCTL_NE("kern.kasan.available", 1),
773 T_META_REQUIRES_SYSCTL_EQ("kern.development", 1),
774 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL),
775 T_META_TAG_VM_PREFERRED)
776 {
777 current_test = (test_config_struct) {
778 .test_index = VME_ZONE_TEST,
779 .helper_func = VME_ZONE_TEST_OPT,
780 .num_zones = 1,
781 .zone_names = (mach_zone_name_t[]){
782 { .mzn_name = VME_ZONE }
783 }
784 };
785 run_test();
786 }
787
788 T_DECL( memorystatus_vm_objects_zone_test,
789 "allocates elements from the VM objects and the VM map entries zones, verifies zone-map-exhaustion jetsams",
790 T_META_ASROOT(true),
791 T_META_TIMEOUT(1800),
792 /* T_META_LTEPHASE(LTE_POSTINIT),
793 */
794 T_META_REQUIRES_SYSCTL_NE("kern.kasan.available", 1),
795 T_META_REQUIRES_SYSCTL_EQ("kern.development", 1),
796 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL),
797 T_META_TAG_VM_PREFERRED)
798 {
799 current_test = (test_config_struct) {
800 .test_index = VM_OBJECTS_ZONE_TEST,
801 .helper_func = VM_OBJECTS_ZONE_TEST_OPT,
802 .num_zones = 2,
803 .zone_names = (mach_zone_name_t[]){
804 { .mzn_name = VME_ZONE },
805 { .mzn_name = VMOBJECTS_ZONE}
806 }
807 };
808 run_test();
809 }
810
811 T_DECL( memorystatus_generic_zone_test,
812 "allocates elements from a zone that doesn't have an optimized jetsam path, verifies zone-map-exhaustion jetsams",
813 T_META_ASROOT(true),
814 T_META_TIMEOUT(1800),
815 /* T_META_LTEPHASE(LTE_POSTINIT),
816 */
817 T_META_REQUIRES_SYSCTL_NE("kern.kasan.available", 1),
818 T_META_REQUIRES_SYSCTL_EQ("kern.development", 1),
819 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL),
820 T_META_TAG_VM_PREFERRED)
821 {
822 current_test = (test_config_struct) {
823 .test_index = GENERIC_ZONE_TEST,
824 .helper_func = GENERIC_ZONE_TEST_OPT,
825 .num_zones = 0,
826 .zone_names = NULL
827 };
828 run_test();
829 }
830