xref: /xnu-11215/tests/perf_vmfault.c (revision 8d741a5d)
1 #include <unistd.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/sysctl.h>
5 #include <mach/mach.h>
6 #include <mach/vm_map.h>
7 #include <darwintest.h>
8 #include <TargetConditionals.h>
9 #include <perfcheck_keys.h>
10 
11 #include "benchmark/helpers.h"
12 #include "test_utils.h"
13 
14 T_GLOBAL_META(
15 	T_META_NAMESPACE("xnu.vm.perf"),
16 	T_META_RADAR_COMPONENT_NAME("xnu"),
17 	T_META_RADAR_COMPONENT_VERSION("VM"),
18 	T_META_CHECK_LEAKS(false),
19 	T_META_TAG_PERF
20 	);
21 
22 #ifdef DT_IOSMARK
23 #define MEMSIZE                 (1UL<<29)       /* 512 MB */
24 #else
25 #define MEMSIZE                 (1UL<<27)       /* 128 MB */
26 #endif
27 
28 #define VM_TAG1                 100
29 #define VM_TAG2                 101
30 
31 enum {
32 	SOFT_FAULT,
33 	ZERO_FILL,
34 	NUM_FAULT_TYPES
35 };
36 
37 enum {
38 	VARIANT_DEFAULT = 1,
39 	VARIANT_SINGLE_REGION,
40 	VARIANT_MULTIPLE_REGIONS,
41 	NUM_MAPPING_VARIANTS
42 };
43 
44 static char *variant_str[] = {
45 	"none",
46 	"default",
47 	"single-region",
48 	"multiple-regions"
49 };
50 
51 
52 typedef struct {
53 	char *region_addr;
54 	char *shared_region_addr;
55 	size_t region_len;
56 } memregion_config;
57 
58 static memregion_config *memregion_config_per_thread;
59 
60 static size_t pgsize;
61 static int num_threads;
62 static int ready_thread_count;
63 static int finished_thread_count;
64 static dt_stat_time_t runtime;
65 static pthread_cond_t start_cvar;
66 static pthread_cond_t threads_ready_cvar;
67 static pthread_cond_t threads_finished_cvar;
68 static pthread_mutex_t ready_thread_count_lock;
69 static pthread_mutex_t finished_thread_count_lock;
70 
71 static void map_mem_regions_default(int fault_type, size_t memsize);
72 static void map_mem_regions_single(int fault_type, size_t memsize);
73 static void map_mem_regions_multiple(int fault_type, size_t memsize);
74 static void map_mem_regions(int fault_type, int mapping_variant, size_t memsize);
75 static void unmap_mem_regions(int mapping_variant, size_t memsize);
76 static void setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize);
77 static void fault_pages(int thread_id);
78 static void execute_threads(void);
79 static void *thread_setup(void *arg);
80 static void run_test(int fault_type, int mapping_variant, size_t memsize);
81 static void setup_and_run_test(int test, int threads);
82 
83 /* Allocates memory using the default mmap behavior. Each VM region created is capped at 128 MB. */
84 static void
map_mem_regions_default(int fault_type,size_t memsize)85 map_mem_regions_default(int fault_type, size_t memsize)
86 {
87 	volatile char val;
88 	vm_prot_t curprot, maxprot;
89 	char *ptr, *memblock, *memblock_share = NULL;
90 
91 	memblock = (char *)mmap(NULL, memsize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
92 	T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap");
93 
94 	if (fault_type == SOFT_FAULT) {
95 		/* Fault in all the pages of the original region. */
96 		for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) {
97 			val = *ptr;
98 		}
99 		/* Remap the region so that subsequent accesses result in read soft faults. */
100 		T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
101 		    memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
102 		    &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
103 	}
104 	setup_per_thread_regions(memblock, memblock_share, fault_type, memsize);
105 }
106 
107 /* Creates a single VM region by mapping in a named memory entry. */
108 static void
map_mem_regions_single(int fault_type,size_t memsize)109 map_mem_regions_single(int fault_type, size_t memsize)
110 {
111 	volatile char val;
112 	vm_prot_t curprot, maxprot;
113 	char *ptr, *memblock = NULL, *memblock_share = NULL;
114 	vm_size_t size = memsize;
115 	vm_offset_t addr1 = 0;
116 	mach_port_t mem_handle = MACH_PORT_NULL;
117 
118 	/* Allocate a region and fault in all the pages. */
119 	T_QUIET; T_ASSERT_MACH_SUCCESS(vm_allocate(mach_task_self(), &addr1, size, VM_FLAGS_ANYWHERE), "vm_allocate");
120 	for (ptr = (char *)addr1; ptr < (char *)addr1 + memsize; ptr += pgsize) {
121 		val = *ptr;
122 	}
123 
124 	/* Create a named memory entry from the region allocated above, and de-allocate said region. */
125 	T_QUIET; T_ASSERT_MACH_SUCCESS(mach_make_memory_entry(mach_task_self(), &size, addr1, VM_PROT_ALL | MAP_MEM_NAMED_CREATE,
126 	    &mem_handle, MACH_PORT_NULL), "mach_make_memory_entry");
127 	T_QUIET; T_ASSERT_MACH_SUCCESS(vm_deallocate(mach_task_self(), addr1, size), "vm_deallocate");
128 
129 	/* Map in the named entry and deallocate it. */
130 	T_QUIET; T_ASSERT_MACH_SUCCESS(vm_map(mach_task_self(), (vm_address_t *)&memblock, size, 0, VM_FLAGS_ANYWHERE, mem_handle, 0,
131 	    FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_NONE), "vm_map");
132 	T_QUIET; T_ASSERT_MACH_SUCCESS(mach_port_deallocate(mach_task_self(), mem_handle), "mach_port_deallocate");
133 
134 	if (fault_type == SOFT_FAULT) {
135 		/* Fault in all the pages of the original region. */
136 		for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) {
137 			val = *ptr;
138 		}
139 		/* Remap the region so that subsequent accesses result in read soft faults. */
140 		T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
141 		    memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
142 		    &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
143 	}
144 	setup_per_thread_regions(memblock, memblock_share, fault_type, memsize);
145 }
146 
147 /* Allocates a separate VM region for each thread. */
148 static void
map_mem_regions_multiple(int fault_type,size_t memsize)149 map_mem_regions_multiple(int fault_type, size_t memsize)
150 {
151 	int i;
152 	size_t region_len, num_pages;
153 	volatile char val;
154 	char *ptr, *memblock, *memblock_share;
155 	vm_prot_t curprot, maxprot;
156 
157 	num_pages = memsize / pgsize;
158 
159 	for (i = 0; i < num_threads; i++) {
160 		memblock = NULL;
161 
162 		region_len = num_pages / (size_t)num_threads;
163 		if ((size_t)i < num_pages % (size_t)num_threads) {
164 			region_len++;
165 		}
166 		region_len *= pgsize;
167 
168 		int fd = VM_MAKE_TAG((i % 2)? VM_TAG1 : VM_TAG2);
169 		memblock = (char *)mmap(NULL, region_len, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, fd, 0);
170 		T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap");
171 		memregion_config_per_thread[i].region_addr = memblock;
172 		memregion_config_per_thread[i].shared_region_addr = 0;
173 		memregion_config_per_thread[i].region_len = region_len;
174 
175 		if (fault_type == SOFT_FAULT) {
176 			/* Fault in all the pages of the original region. */
177 			for (ptr = memblock; ptr < memblock + region_len; ptr += pgsize) {
178 				val = *ptr;
179 			}
180 			memblock_share = NULL;
181 			/* Remap the region so that subsequent accesses result in read soft faults. */
182 			T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
183 			    region_len, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
184 			    &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
185 			memregion_config_per_thread[i].shared_region_addr = memblock_share;
186 		}
187 	}
188 }
189 
190 static void
map_mem_regions(int fault_type,int mapping_variant,size_t memsize)191 map_mem_regions(int fault_type, int mapping_variant, size_t memsize)
192 {
193 	memregion_config_per_thread = (memregion_config *)malloc(sizeof(*memregion_config_per_thread) * (size_t)num_threads);
194 	switch (mapping_variant) {
195 	case VARIANT_SINGLE_REGION:
196 		map_mem_regions_single(fault_type, memsize);
197 		break;
198 	case VARIANT_MULTIPLE_REGIONS:
199 		map_mem_regions_multiple(fault_type, memsize);
200 		break;
201 	case VARIANT_DEFAULT:
202 	default:
203 		map_mem_regions_default(fault_type, memsize);
204 	}
205 }
206 
207 static void
setup_per_thread_regions(char * memblock,char * memblock_share,int fault_type,size_t memsize)208 setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize)
209 {
210 	int i;
211 	size_t region_len, region_start, num_pages;
212 
213 	num_pages = memsize / pgsize;
214 	for (i = 0; i < num_threads; i++) {
215 		region_len = num_pages / (size_t)num_threads;
216 		region_start = region_len * (size_t)i;
217 
218 		if ((size_t)i < num_pages % (size_t)num_threads) {
219 			region_start += (size_t)i;
220 			region_len++;
221 		} else {
222 			region_start += num_pages % (size_t)num_threads;
223 		}
224 
225 		region_start *= pgsize;
226 		region_len *= pgsize;
227 
228 		memregion_config_per_thread[i].region_addr = memblock + region_start;
229 		memregion_config_per_thread[i].shared_region_addr = ((fault_type == SOFT_FAULT) ?
230 		    memblock_share + region_start : 0);
231 		memregion_config_per_thread[i].region_len = region_len;
232 	}
233 }
234 
235 static void
unmap_mem_regions(int mapping_variant,size_t memsize)236 unmap_mem_regions(int mapping_variant, size_t memsize)
237 {
238 	if (mapping_variant == VARIANT_MULTIPLE_REGIONS) {
239 		int i;
240 		for (i = 0; i < num_threads; i++) {
241 			if (memregion_config_per_thread[i].shared_region_addr != 0) {
242 				T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].shared_region_addr,
243 				    memregion_config_per_thread[i].region_len), "munmap");
244 			}
245 			T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].region_addr,
246 			    memregion_config_per_thread[i].region_len), "munmap");
247 		}
248 	} else {
249 		if (memregion_config_per_thread[0].shared_region_addr != 0) {
250 			T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].shared_region_addr, memsize), "munmap");
251 		}
252 		T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].region_addr, memsize), "munmap");
253 	}
254 }
255 
256 static void
fault_pages(int thread_id)257 fault_pages(int thread_id)
258 {
259 	char *ptr, *block;
260 	volatile char val;
261 
262 	block = memregion_config_per_thread[thread_id].shared_region_addr ?
263 	    memregion_config_per_thread[thread_id].shared_region_addr :
264 	    memregion_config_per_thread[thread_id].region_addr;
265 	for (ptr = block; ptr < block + memregion_config_per_thread[thread_id].region_len; ptr += pgsize) {
266 		val = *ptr;
267 	}
268 }
269 
270 static void *
thread_setup(void * arg)271 thread_setup(void *arg)
272 {
273 	int my_index = *((int *)arg);
274 
275 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock");
276 	ready_thread_count++;
277 	if (ready_thread_count == num_threads) {
278 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_ready_cvar), "pthread_cond_signal");
279 	}
280 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&start_cvar, &ready_thread_count_lock), "pthread_cond_wait");
281 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock");
282 
283 	fault_pages(my_index);
284 
285 	/* Up the finished count */
286 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&finished_thread_count_lock), "pthread_mutex_lock");
287 	finished_thread_count++;
288 	if (finished_thread_count == num_threads) {
289 		/* All the threads are done. Wake up the main thread */
290 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_finished_cvar), "pthread_cond_signal");
291 	}
292 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&finished_thread_count_lock), "pthread_mutex_unlock");
293 	return NULL;
294 }
295 
296 static void
execute_threads(void)297 execute_threads(void)
298 {
299 	int thread_index, thread_retval;
300 	int *thread_indices;
301 	void *thread_retval_ptr = &thread_retval;
302 	pthread_t* threads;
303 
304 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_ready_cvar, NULL), "pthread_cond_init");
305 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&start_cvar, NULL), "pthread_cond_init");
306 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&ready_thread_count_lock, NULL), "pthread_mutex_init");
307 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_finished_cvar, NULL), "pthread_cond_init");
308 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&finished_thread_count_lock, NULL), "pthread_mutex_init");
309 	ready_thread_count = 0;
310 	finished_thread_count = 0;
311 
312 	threads = (pthread_t *)malloc(sizeof(*threads) * (size_t)num_threads);
313 	thread_indices = (int *)malloc(sizeof(*thread_indices) * (size_t)num_threads);
314 	for (thread_index = 0; thread_index < num_threads; thread_index++) {
315 		thread_indices[thread_index] = thread_index;
316 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_create(&threads[thread_index], NULL,
317 		    thread_setup, (void *)&thread_indices[thread_index]), "pthread_create");
318 	}
319 
320 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock");
321 	while (ready_thread_count != num_threads) {
322 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_ready_cvar, &ready_thread_count_lock),
323 		    "pthread_cond_wait");
324 	}
325 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock");
326 
327 	T_STAT_MEASURE(runtime) {
328 		/* Ungate the threads */
329 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_broadcast(&start_cvar), "pthread_cond_broadcast");
330 		/* Wait for the threads to finish */
331 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&finished_thread_count_lock), "pthread_mutex_lock");
332 		while (finished_thread_count != num_threads) {
333 			T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_finished_cvar, &finished_thread_count_lock), "pthread_cond_wait");
334 		}
335 	};
336 
337 	/* Join the threads */
338 	for (thread_index = 0; thread_index < num_threads; thread_index++) {
339 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_join(threads[thread_index], &thread_retval_ptr),
340 		    "pthread_join");
341 	}
342 
343 	free(threads);
344 	free(thread_indices);
345 }
346 
347 static void
run_test(int fault_type,int mapping_variant,size_t memsize)348 run_test(int fault_type, int mapping_variant, size_t memsize)
349 {
350 	char metric_str[32];
351 	size_t num_pages;
352 	size_t sysctl_size = sizeof(pgsize);
353 	int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0);
354 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pagesize failed");
355 
356 	num_pages = memsize / pgsize;
357 
358 	T_QUIET; T_ASSERT_LT(fault_type, NUM_FAULT_TYPES, "invalid test type");
359 	T_QUIET; T_ASSERT_LT(mapping_variant, NUM_MAPPING_VARIANTS, "invalid mapping variant");
360 	T_QUIET; T_ASSERT_GT(num_threads, 0, "num_threads <= 0");
361 	T_QUIET; T_ASSERT_GT((int)num_pages / num_threads, 0, "num_pages/num_threads <= 0");
362 
363 	T_LOG("No. of cpus:     %d", get_ncpu());
364 	T_LOG("No. of threads:  %d", num_threads);
365 	T_LOG("No. of pages:    %ld", num_pages);
366 	T_LOG("Pagesize:        %ld", pgsize);
367 	T_LOG("Allocation size: %ld MB", memsize / (1024 * 1024));
368 	T_LOG("Mapping variant: %s", variant_str[mapping_variant]);
369 
370 	snprintf(metric_str, 32, "Runtime-%s", variant_str[mapping_variant]);
371 	runtime = dt_stat_time_create(metric_str);
372 
373 	while (!dt_stat_stable(runtime)) {
374 		map_mem_regions(fault_type, mapping_variant, memsize);
375 		execute_threads();
376 		unmap_mem_regions(mapping_variant, memsize);
377 	}
378 
379 	dt_stat_finalize(runtime);
380 	T_LOG("Throughput-%s (MB/s): %lf\n\n", variant_str[mapping_variant], (double)memsize / (1024 * 1024) / dt_stat_mean((dt_stat_t)runtime));
381 }
382 
383 static void
setup_and_run_test(int fault_type,int threads)384 setup_and_run_test(int fault_type, int threads)
385 {
386 	int i, mapping_variant;
387 	size_t memsize;
388 	char *e;
389 
390 	mapping_variant = VARIANT_DEFAULT;
391 	memsize = MEMSIZE;
392 	num_threads = threads;
393 
394 	if ((e = getenv("NTHREADS"))) {
395 		if (threads == 1) {
396 			T_SKIP("Custom environment variables specified. Skipping single threaded version.");
397 		}
398 		num_threads = (int)strtol(e, NULL, 0);
399 	}
400 
401 	if ((e = getenv("MEMSIZEMB"))) {
402 		memsize = (size_t)strtol(e, NULL, 0) * 1024 * 1024;
403 	}
404 
405 	if ((e = getenv("VARIANT"))) {
406 		mapping_variant = (int)strtol(e, NULL, 0);
407 		run_test(fault_type, mapping_variant, memsize);
408 	} else {
409 		for (i = VARIANT_DEFAULT; i < NUM_MAPPING_VARIANTS; i++) {
410 			run_test(fault_type, i, memsize);
411 		}
412 	}
413 
414 	T_END;
415 }
416 
417 T_DECL(read_soft_fault,
418     "Read soft faults (single thread)", T_META_TAG_VM_NOT_ELIGIBLE)
419 {
420 	setup_and_run_test(SOFT_FAULT, 1);
421 }
422 
423 T_DECL(read_soft_fault_multithreaded,
424     "Read soft faults (multi-threaded)", T_META_TAG_VM_NOT_ELIGIBLE)
425 {
426 	char *e;
427 	int nthreads;
428 
429 	/* iOSMark passes in the no. of threads via an env. variable */
430 	if ((e = getenv("DT_STAT_NTHREADS"))) {
431 		nthreads = (int)strtol(e, NULL, 0);
432 	} else {
433 		nthreads = get_ncpu();
434 		if (nthreads == 1) {
435 			T_SKIP("Skipping multi-threaded test on single core device.");
436 		}
437 	}
438 	setup_and_run_test(SOFT_FAULT, nthreads);
439 }
440 
441 T_DECL(zero_fill_fault,
442     "Zero fill faults (single thread)", T_META_TAG_VM_NOT_ELIGIBLE)
443 {
444 	setup_and_run_test(ZERO_FILL, 1);
445 }
446 
447 T_DECL(zero_fill_fault_multithreaded,
448     "Zero fill faults (multi-threaded)",
449     XNU_T_META_SOC_SPECIFIC, T_META_TAG_VM_NOT_ELIGIBLE)
450 {
451 	char *e;
452 	int nthreads;
453 
454 	/* iOSMark passes in the no. of threads via an env. variable */
455 	if ((e = getenv("DT_STAT_NTHREADS"))) {
456 		nthreads = (int)strtol(e, NULL, 0);
457 	} else {
458 		nthreads = get_ncpu();
459 		if (nthreads == 1) {
460 			T_SKIP("Skipping multi-threaded test on single core device.");
461 		}
462 	}
463 	setup_and_run_test(ZERO_FILL, nthreads);
464 }
465