1 /*
2 * Benchmark VM fault throughput.
3 * This test faults memory for a configurable amount of time across a
4 * configurable number of threads. Currently it only measures zero fill faults.
5 * Currently it supports two variants:
6 * 1. Each thread gets its own vm objects to fault in
7 * 2. Threads share vm objects
8 *
9 * We'll add more fault types as we identify problematic user-facing workloads
10 * in macro benchmarks.
11 *
12 * Throughput is reported as pages / second using both wall time and cpu time.
13 * CPU time is a more reliable metric for regression testing, but wall time can
14 * highlight blocking in the VM.
15 *
16 * Running this benchmark directly is not recommended.
17 * Use fault_throughput.lua which provides a nicer interface and outputs
18 * perfdata.
19 */
20 #include <assert.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <strings.h>
27
28 #include <sys/mman.h>
29 #include <sys/types.h>
30 #include <sys/sysctl.h>
31
32 /*
33 * TODO: Make this benchmark runnable on linux so we can do a perf comparison.
34 * We're mostly using POSIX APIs, but we'll need to replace
35 * the sysctls with the /proc equivalents, and replace clock_gettime_nsec_np
36 * with the linux equivalent.
37 */
38 #include <mach/mach.h>
39
40 #include <TargetConditionals.h>
41
42 #include <pthread.h>
43 #include <stdatomic.h>
44
45 #include "benchmark/helpers.h"
46
47 #if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
48 /*
49 * On non-embedded platforms we coalesce vm objects up to 128 MB, so
50 * we make the objects 128 MB on that platform to ensure they're not
51 * merged with anything else.
52 */
53 const static size_t kVmObjectSize = 128 * (1UL << 20);
54 #else
55 /*
56 * Embedded platforms don't coalesce vm objects. This number
57 * needs to be big enough that faulting it in dwarfs the cost of dequeuing
58 * it from the work queue, but can't be too large or else we won't be able
59 * to allocate one per thread in the separate-objects benchmark.
60 */
61 const static size_t kVmObjectSize = 4 * (1UL << 20);
62 #endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
63 static const clockid_t kThreadCPUTimeClock = CLOCK_THREAD_CPUTIME_ID;
64 /* These globals are set dynamically during test setup based on sysctls. */
65 static uint64_t kCacheLineSize = 0;
66 /* The VM page size */
67 static size_t kPageSize = 0;
68
69
70 typedef struct fault_buffer {
71 unsigned char* fb_start; /* The start of this buffer. */
72 size_t fb_size; /* The size of this buffer in bytes. */
73 } fault_buffer_t;
74
75 typedef enum test_variant {
76 VARIANT_SEPARATE_VM_OBJECTS,
77 VARIANT_SHARE_VM_OBJECTS
78 } test_variant_t;
79
80 typedef struct test_globals {
81 /* This lock protects: tg_cv, tg_running_count, tg_done, tg_current_iteration, and tg_iterations_completed. */
82 pthread_mutex_t tg_lock;
83 pthread_cond_t tg_cv;
84 /* The number of currently running threads */
85 unsigned int tg_running_count;
86 /* Set during cleanup to indicate that the benchmark is over. */
87 bool tg_done;
88 size_t tg_current_iteration;
89 size_t tg_iterations_completed;
90 unsigned int tg_num_threads;
91 test_variant_t tg_variant;
92 bool pin_threads;
93 /*
94 * An array of memory objects to fault in.
95 * This is basically a workqueue of
96 * contiguous chunks of memory that the worker threads
97 * will fault in.
98 */
99 fault_buffer_t *tg_fault_buffer_arr;
100 size_t tg_fault_buffer_arr_length;
101 /*
102 * To avoid false sharing, we pad the test globals with an extra cache line and place the atomic
103 * next_fault_buffer_index size_t after the cache line.
104 */
105 __unused char padding[];
106 /*
107 * This field is directly after the padding buffer.
108 * It is used to synchronize access to tg_fault_buffer_arr.
109 */
110 //_Atomic size_t tg_next_fault_buffer_index;
111 } test_globals_t;
112
113 typedef struct {
114 void *test_globals;
115 uint32_t cpu_id;
116 } faulting_thread_args_t;
117
118 static faulting_thread_args_t *faulting_thread_args;
119
120 static const char* kSeparateObjectsArgument = "separate-objects";
121 static const char* kShareObjectsArgument = "share-objects";
122
123 /* Arguments parsed from the command line */
124 typedef struct test_args {
125 uint32_t n_threads;
126 uint32_t first_cpu;
127 uint64_t duration_seconds;
128 test_variant_t variant;
129 bool pin_threads;
130 bool verbose;
131 } test_args_t;
132
133 /*
134 * Fault in the pages in the given buffer.
135 */
136 static void fault_pages(fault_buffer_t *buffer, size_t stride);
137 /* Get a unique fault buffer from the global work queue. */
138 static fault_buffer_t *get_fault_buffer(test_globals_t* globals);
139 /*
140 * Grabs buffers from the global test structure and faults them in, using this
141 * test variant's stride, until there are no more buffers to grab.
142 * Returns the number of microseconds spent on-cpu.
143 */
144 static uint64_t grab_and_fault_pages(test_globals_t* globals);
145
146 static bool worker_thread_iteration_setup(size_t current_iteration, test_globals_t *globals);
147 static void worker_thread_iteration_complete(test_globals_t *globals);
148
149 static void parse_arguments(int argc, char **argv, test_args_t *args);
150 /*
151 * Sets up the test globals and spawns the background threads to do the faults.
152 * Returns an array of size `num_threads`
153 * Containing the thread ids of the forked threads.
154 */
155 static pthread_t* setup_test(test_globals_t *globals, const test_args_t *args, size_t memory_size, bool verbose);
156 static test_globals_t *allocate_test_globals(void);
157 /* Initializes variables in the globals array. */
158 static void init_globals(test_globals_t *globals, const test_args_t *args);
159 static inline _Atomic size_t *next_fault_buffer_index_ptr(test_globals_t *globals);
160 /*
161 * Called on the main thread.
162 * Waits for the background threads to be ready, sets up the memory objects,
163 * and then starts a faulting iteration.
164 * Returns the start (wall) time.
165 */
166 static uint64_t start_iteration(test_globals_t* globals, test_variant_t variant, bool verbose);
167 /*
168 * Called on the main thread.
169 * Waits for the background threads to complete the iteration and cleans up.
170 * Returns the total amount of time spent faulting pages in nanoseconds by all threads thus far.
171 */
172 static uint64_t finish_iteration(test_globals_t *globals, uint64_t start_time);
173 /*
174 * Called on the main thread.
175 * Maps buffers and places them in the work queue.
176 */
177 static void setup_memory(test_globals_t* globals, test_variant_t variant);
178 /*
179 * Dump test results as a csv to stdout.
180 * Use fault_throughput.lua to convert to perfdata.
181 */
182 static void output_results(const test_globals_t *globals, double walltime_elapsed_seconds, double cputime_elapsed_seconds);
183 static void cleanup_test(test_globals_t *globals);
184 /*
185 * Join the background threads and return the total microseconds
186 * of cpu time spent faulting across all of the threads.
187 * Takes ownership of the threads array and frees it.
188 */
189 static uint64_t join_background_threads(test_globals_t *globals, pthread_t *threads);
190 static void unmap_fault_buffers(test_globals_t *globals);
191 /*
192 * Get the stride between each vm object in the fault buffer array.
193 */
194 static size_t fault_buffer_stride(const test_globals_t *globals);
195
196 int
main(int argc,char ** argv)197 main(int argc, char **argv)
198 {
199 /* How much memory should the test consume (per-core on the system)? */
200 #if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
201 static const size_t memory_per_core = kVmObjectSize;
202 #else
203 static const size_t memory_per_core = 25 * (1UL << 20);
204 #endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
205 const size_t kMemSize = memory_per_core * (size_t) get_ncpu();
206 test_globals_t *globals = allocate_test_globals();
207 /* Total wall-time spent faulting in pages. */
208 uint64_t wall_time_elapsed_ns = 0;
209 /* Total cpu-time spent faulting in pages */
210 uint64_t cpu_time_faulting_us = 0;
211 uint64_t start_time_ns;
212 test_args_t args;
213 parse_arguments(argc, argv, &args);
214 pthread_t* threads = setup_test(globals, &args, kMemSize, args.verbose);
215
216 /* Keep doing more iterations until we've hit our (wall) time budget */
217 while (wall_time_elapsed_ns < args.duration_seconds * kNumNanosecondsInSecond) {
218 benchmark_log(args.verbose, "----Starting Iteration %lu-----\n", globals->tg_current_iteration + 1);
219 start_time_ns = start_iteration(globals, args.variant, args.verbose);
220 wall_time_elapsed_ns += finish_iteration(globals, start_time_ns);
221 benchmark_log(args.verbose, "----Completed Iteration %lu----\n", globals->tg_current_iteration);
222 }
223
224 benchmark_log(args.verbose, "Hit time budget\nJoining worker threads\n");
225 cpu_time_faulting_us = join_background_threads(globals, threads);
226 benchmark_log(args.verbose, "----End Test Output----\n");
227 output_results(globals, (double) wall_time_elapsed_ns / kNumNanosecondsInSecond,
228 (double)cpu_time_faulting_us / kNumMicrosecondsInSecond);
229 cleanup_test(globals);
230
231 return 0;
232 }
233
234
235 /* The main loop for the worker threads. */
236 static void*
faulting_thread(void * arg)237 faulting_thread(void* arg)
238 {
239 test_globals_t* globals = ((faulting_thread_args_t *)arg)->test_globals;
240 uint64_t on_cpu_time_faulting = 0;
241 size_t current_iteration = 1;
242
243 if (globals->pin_threads) {
244 uint32_t cpu_id = ((faulting_thread_args_t *)arg)->cpu_id;
245 int err = sysctlbyname("kern.sched_thread_bind_cpu", NULL, 0, &cpu_id, sizeof(cpu_id));
246 assert(err == 0);
247 }
248
249 while (true) {
250 bool should_continue = worker_thread_iteration_setup(current_iteration, globals);
251 if (!should_continue) {
252 break;
253 }
254 on_cpu_time_faulting += grab_and_fault_pages(globals);
255 worker_thread_iteration_complete(globals);
256 current_iteration++;
257 }
258 return (void*)on_cpu_time_faulting;
259 }
260
261 /*
262 * Called on the worker threads before each iteration to synchronize this
263 * iteration start with the other threads.
264 * Returns true if the iteration should continue, and false if the test is over.
265 */
266 static bool
worker_thread_iteration_setup(size_t current_iteration,test_globals_t * globals)267 worker_thread_iteration_setup(size_t current_iteration, test_globals_t *globals)
268 {
269 bool should_continue = false;
270 int ret = 0;
271 // Gate on the other threads being ready to start
272 ret = pthread_mutex_lock(&globals->tg_lock);
273 assert(ret == 0);
274 globals->tg_running_count++;
275 if (globals->tg_running_count == globals->tg_num_threads) {
276 // All the worker threads are running.
277 // Wake up the main thread so that it can ungate the test.
278 ret = pthread_cond_broadcast(&globals->tg_cv);
279 assert(ret == 0);
280 }
281 /*
282 * The main thread will start this iteration by incrementing
283 * tg_current_iteration. Block until that happens.
284 * See start_iteration for the wakeup code.
285 */
286 while (!globals->tg_done && globals->tg_current_iteration != current_iteration) {
287 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
288 assert(ret == 0);
289 }
290 should_continue = !globals->tg_done;
291 ret = pthread_mutex_unlock(&globals->tg_lock);
292 assert(ret == 0);
293 return should_continue;
294 }
295
296 /*
297 * Called on the worker threads before each iteration finishes to synchronize
298 * with the other threads.
299 */
300 static void
worker_thread_iteration_complete(test_globals_t * globals)301 worker_thread_iteration_complete(test_globals_t *globals)
302 {
303 int ret;
304 // Mark ourselves as done and wait for the other threads to finish
305 ret = pthread_mutex_lock(&globals->tg_lock);
306 assert(ret == 0);
307 globals->tg_running_count--;
308 if (globals->tg_running_count == 0) {
309 // We're the last one to finish. Mark this iteration as completed and wake everyone up.
310 globals->tg_iterations_completed++;
311 ret = pthread_cond_broadcast(&globals->tg_cv);
312 assert(ret == 0);
313 } else {
314 // Others are running. Wait for them to finish.
315 while (globals->tg_iterations_completed != globals->tg_current_iteration) {
316 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
317 assert(ret == 0);
318 }
319 }
320 ret = pthread_mutex_unlock(&globals->tg_lock);
321 assert(ret == 0);
322 }
323
324 static void
fault_pages(fault_buffer_t * buffer,size_t stride)325 fault_pages(fault_buffer_t *buffer, size_t stride)
326 {
327 volatile unsigned char val;
328 for (unsigned char* ptr = buffer->fb_start; ptr < buffer->fb_start + buffer->fb_size; ptr += stride) {
329 val = *ptr;
330 }
331 }
332
333 static fault_buffer_t *
get_fault_buffer(test_globals_t * globals)334 get_fault_buffer(test_globals_t* globals)
335 {
336 size_t index = atomic_fetch_add_explicit(next_fault_buffer_index_ptr(globals), 1UL, memory_order_acq_rel);
337 if (index < globals->tg_fault_buffer_arr_length) {
338 return &globals->tg_fault_buffer_arr[index];
339 }
340 return NULL;
341 }
342
343 static uint64_t
grab_and_fault_pages(test_globals_t * globals)344 grab_and_fault_pages(test_globals_t* globals)
345 {
346 struct timespec start_time, end_time;
347 uint64_t nanoseconds_faulting_on_cpu = 0;
348 int ret;
349 size_t stride = fault_buffer_stride(globals) * kPageSize;
350 while (true) {
351 fault_buffer_t *object = get_fault_buffer(globals);
352 if (object == NULL) {
353 break;
354 }
355 ret = clock_gettime(kThreadCPUTimeClock, &start_time);
356 assert(ret == 0);
357
358 fault_pages(object, stride);
359
360 ret = clock_gettime(kThreadCPUTimeClock, &end_time);
361 assert(ret == 0);
362 nanoseconds_faulting_on_cpu += (unsigned long) timespec_difference_us(&end_time, &start_time);
363 }
364 return nanoseconds_faulting_on_cpu;
365 }
366
367 static uint64_t
start_iteration(test_globals_t * globals,test_variant_t variant,bool verbose)368 start_iteration(test_globals_t* globals, test_variant_t variant, bool verbose)
369 {
370 int ret;
371 uint64_t start_time;
372 ret = pthread_mutex_lock(&globals->tg_lock);
373 assert(ret == 0);
374 benchmark_log(verbose, "Waiting for workers to catch up before starting next iteration.\n");
375 /* Wait until all the threads are ready to go to the next iteration */
376 while (globals->tg_running_count != globals->tg_num_threads) {
377 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
378 }
379 benchmark_log(verbose, "Workers are all caught up\n");
380 setup_memory(globals, variant);
381 benchmark_log(verbose, "Initialized data structures for iteration. Waking workers.\n");
382 /* Grab a timestamp, tick the current iteration, and wake up the worker threads */
383 start_time = current_timestamp_ns();
384 globals->tg_current_iteration++;
385 ret = pthread_mutex_unlock(&globals->tg_lock);
386 assert(ret == 0);
387 ret = pthread_cond_broadcast(&globals->tg_cv);
388 assert(ret == 0);
389 return start_time;
390 }
391
392 static uint64_t
finish_iteration(test_globals_t * globals,uint64_t start_time)393 finish_iteration(test_globals_t* globals, uint64_t start_time)
394 {
395 int ret;
396 uint64_t end_time;
397 ret = pthread_mutex_lock(&globals->tg_lock);
398 assert(ret == 0);
399 while (globals->tg_iterations_completed != globals->tg_current_iteration) {
400 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
401 }
402 end_time = current_timestamp_ns();
403 ret = pthread_mutex_unlock(&globals->tg_lock);
404 unmap_fault_buffers(globals);
405 assert(ret == 0);
406 return end_time - start_time;
407 }
408
409 static void
setup_memory(test_globals_t * globals,test_variant_t variant)410 setup_memory(test_globals_t* globals, test_variant_t variant)
411 {
412 size_t stride = fault_buffer_stride(globals);
413 for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
414 fault_buffer_t *object = &globals->tg_fault_buffer_arr[i];
415 object->fb_start = mmap_buffer(kVmObjectSize);
416 object->fb_size = kVmObjectSize;
417 if (variant == VARIANT_SHARE_VM_OBJECTS) {
418 /*
419 * Insert another buffer into the work queue for each thread.
420 * Each buffer starts 1 page past where the previous buffer started into the vm object.
421 * Since each thread strides by the number of threads * the page size they won't fault in the same pages.
422 */
423 for (size_t j = 1; j < globals->tg_num_threads; j++) {
424 size_t offset = kPageSize * j;
425 fault_buffer_t *offset_object = &globals->tg_fault_buffer_arr[i + j];
426 offset_object->fb_start = object->fb_start + offset;
427 offset_object->fb_size = object->fb_size - offset;
428 }
429 } else if (variant != VARIANT_SEPARATE_VM_OBJECTS) {
430 fprintf(stderr, "Unknown test variant.\n");
431 exit(2);
432 }
433 }
434 atomic_store_explicit(next_fault_buffer_index_ptr(globals), 0, memory_order_release);
435 }
436
437 static void
unmap_fault_buffers(test_globals_t * globals)438 unmap_fault_buffers(test_globals_t* globals)
439 {
440 size_t stride = fault_buffer_stride(globals);
441 for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
442 fault_buffer_t *buffer = &globals->tg_fault_buffer_arr[i];
443 int res = munmap(buffer->fb_start, buffer->fb_size);
444 assert(res == 0);
445 }
446 }
447
448 static test_globals_t *
allocate_test_globals()449 allocate_test_globals()
450 {
451 test_globals_t *globals = NULL;
452 int ret;
453 if (kCacheLineSize == 0) {
454 size_t cachelinesize_size = sizeof(kCacheLineSize);
455 ret = sysctlbyname("hw.cachelinesize", &kCacheLineSize, &cachelinesize_size, NULL, 0);
456 assert(ret == 0);
457 assert(kCacheLineSize > 0);
458 }
459 if (kPageSize == 0) {
460 size_t pagesize_size = sizeof(kPageSize);
461 ret = sysctlbyname("vm.pagesize", &kPageSize, &pagesize_size, NULL, 0);
462 assert(ret == 0);
463 assert(kPageSize > 0);
464 }
465 size_t test_globals_size = sizeof(test_globals_t) + kCacheLineSize + sizeof(_Atomic size_t);
466 globals = malloc(test_globals_size);
467 assert(globals != NULL);
468 memset(globals, 0, test_globals_size);
469 return globals;
470 }
471
472 static void
init_globals(test_globals_t * globals,const test_args_t * args)473 init_globals(test_globals_t *globals, const test_args_t *args)
474 {
475 pthread_mutexattr_t mutex_attrs;
476 pthread_condattr_t cond_attrs;
477 int ret;
478 memset(globals, 0, sizeof(test_globals_t));
479
480 ret = pthread_mutexattr_init(&mutex_attrs);
481 assert(ret == 0);
482 ret = pthread_mutex_init(&globals->tg_lock, &mutex_attrs);
483 assert(ret == 0);
484 ret = pthread_condattr_init(&cond_attrs);
485 assert(ret == 0);
486 ret = pthread_cond_init(&globals->tg_cv, &cond_attrs);
487 assert(ret == 0);
488 ret = pthread_mutexattr_destroy(&mutex_attrs);
489 assert(ret == 0);
490 ret = pthread_condattr_destroy(&cond_attrs);
491 assert(ret == 0);
492
493 globals->tg_num_threads = args->n_threads;
494 globals->tg_variant = args->variant;
495 globals->pin_threads = args->pin_threads;
496 }
497
498 static void
init_fault_buffer_arr(test_globals_t * globals,const test_args_t * args,size_t memory_size)499 init_fault_buffer_arr(test_globals_t *globals, const test_args_t *args, size_t memory_size)
500 {
501 if (args->variant == VARIANT_SEPARATE_VM_OBJECTS) {
502 // This variant creates separate vm objects up to memory size bytes total
503 globals->tg_fault_buffer_arr_length = memory_size / kVmObjectSize;
504 } else if (args->variant == VARIANT_SHARE_VM_OBJECTS) {
505 // This variant creates separate vm objects up to memory size bytes total
506 // And places a pointer into each vm object for each thread.
507 globals->tg_fault_buffer_arr_length = memory_size / kVmObjectSize * globals->tg_num_threads;
508 } else {
509 fprintf(stderr, "Unsupported test variant.\n");
510 exit(2);
511 }
512 // It doesn't make sense to have more threads than elements in the work queue.
513 // NB: Since we scale memory_size by ncpus, this can only happen if the user
514 // tries to run the benchmark with many more threads than cores.
515 assert(globals->tg_fault_buffer_arr_length >= globals->tg_num_threads);
516 globals->tg_fault_buffer_arr = calloc(sizeof(fault_buffer_t), globals->tg_fault_buffer_arr_length);
517 assert(globals->tg_fault_buffer_arr);
518 }
519
520 static pthread_t *
spawn_worker_threads(test_globals_t * globals,unsigned int num_threads,unsigned int first_cpu)521 spawn_worker_threads(test_globals_t *globals, unsigned int num_threads, unsigned int first_cpu)
522 {
523 int ret;
524 pthread_attr_t pthread_attrs;
525 globals->tg_num_threads = num_threads;
526 pthread_t* threads = malloc(sizeof(pthread_t) * num_threads);
527 faulting_thread_args = malloc(sizeof(faulting_thread_args_t) * num_threads);
528 assert(threads);
529 ret = pthread_attr_init(&pthread_attrs);
530 assert(ret == 0);
531 // Spawn the background threads
532 for (unsigned int i = 0; i < num_threads; i++) {
533 if (globals->pin_threads) {
534 faulting_thread_args[i].cpu_id = (i + first_cpu) % get_ncpu();
535 }
536 faulting_thread_args[i].test_globals = globals;
537 ret = pthread_create(threads + i, &pthread_attrs, faulting_thread, &faulting_thread_args[i]);
538 assert(ret == 0);
539 }
540 ret = pthread_attr_destroy(&pthread_attrs);
541 assert(ret == 0);
542 return threads;
543 }
544
545 static pthread_t*
setup_test(test_globals_t * globals,const test_args_t * args,size_t memory_size,bool verbose)546 setup_test(test_globals_t *globals, const test_args_t *args, size_t memory_size, bool verbose)
547 {
548 init_globals(globals, args);
549 init_fault_buffer_arr(globals, args, memory_size);
550 benchmark_log(verbose, "Initialized global data structures.\n");
551 pthread_t *workers = spawn_worker_threads(globals, args->n_threads, args->first_cpu);
552 benchmark_log(verbose, "Spawned workers.\n");
553 return workers;
554 }
555
556 static uint64_t
join_background_threads(test_globals_t * globals,pthread_t * threads)557 join_background_threads(test_globals_t *globals, pthread_t *threads)
558 {
559 // Set the done flag so that the background threads exit
560 int ret;
561 uint64_t total_cputime_spent_faulting = 0;
562 ret = pthread_mutex_lock(&globals->tg_lock);
563 assert(ret == 0);
564 globals->tg_done = true;
565 ret = pthread_cond_broadcast(&globals->tg_cv);
566 assert(ret == 0);
567 ret = pthread_mutex_unlock(&globals->tg_lock);
568 assert(ret == 0);
569
570 // Join the background threads
571 for (unsigned int i = 0; i < globals->tg_num_threads; i++) {
572 uint64_t cputime_spent_faulting = 0;
573 ret = pthread_join(threads[i], (void **)&cputime_spent_faulting);
574 assert(ret == 0);
575 total_cputime_spent_faulting += cputime_spent_faulting;
576 }
577 free(threads);
578 free(faulting_thread_args);
579 return total_cputime_spent_faulting;
580 }
581
582 static void
cleanup_test(test_globals_t * globals)583 cleanup_test(test_globals_t* globals)
584 {
585 int ret;
586 ret = pthread_mutex_destroy(&globals->tg_lock);
587 assert(ret == 0);
588 ret = pthread_cond_destroy(&globals->tg_cv);
589 assert(ret == 0);
590 free(globals->tg_fault_buffer_arr);
591 free(globals);
592 }
593
594 static void
output_results(const test_globals_t * globals,double walltime_elapsed_seconds,double cputime_elapsed_seconds)595 output_results(const test_globals_t* globals, double walltime_elapsed_seconds, double cputime_elapsed_seconds)
596 {
597 size_t pgsize;
598 size_t sysctl_size = sizeof(pgsize);
599 int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0);
600 assert(ret == 0);
601 size_t num_pages = 0;
602 double walltime_throughput, cputime_throughput;
603 size_t stride = fault_buffer_stride(globals);
604 for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
605 num_pages += globals->tg_fault_buffer_arr[i].fb_size / pgsize;
606 }
607 num_pages *= globals->tg_iterations_completed;
608 walltime_throughput = num_pages / walltime_elapsed_seconds;
609 cputime_throughput = num_pages / cputime_elapsed_seconds;
610 printf("-----Results-----\n");
611 printf("Throughput (pages / wall second), Throughput (pages / CPU second)\n");
612 printf("%f,%f\n", walltime_throughput, cputime_throughput);
613 }
614
615 static void
print_help(char ** argv)616 print_help(char** argv)
617 {
618 fprintf(stderr, "%s: <test-variant> [-v] duration num_threads\n", argv[0]);
619 fprintf(stderr, "\ntest variants:\n");
620 fprintf(stderr, " %s Fault in different vm objects in each thread.\n", kSeparateObjectsArgument);
621 fprintf(stderr, " %s Share vm objects across faulting threads.\n", kShareObjectsArgument);
622 }
623
624 static void
parse_arguments(int argc,char ** argv,test_args_t * args)625 parse_arguments(int argc, char** argv, test_args_t *args)
626 {
627 int current_argument = 1;
628 memset(args, 0, sizeof(test_args_t));
629 if (argc < 4 || argc > 6) {
630 print_help(argv);
631 exit(1);
632 }
633 if (argv[current_argument][0] == '-') {
634 if (strcmp(argv[current_argument], "-v") == 0) {
635 args->verbose = true;
636 } else {
637 fprintf(stderr, "Unknown argument %s\n", argv[current_argument]);
638 print_help(argv);
639 exit(1);
640 }
641 current_argument++;
642 }
643 if (strncasecmp(argv[current_argument], kSeparateObjectsArgument, strlen(kSeparateObjectsArgument)) == 0) {
644 args->variant = VARIANT_SEPARATE_VM_OBJECTS;
645 } else if (strncasecmp(argv[current_argument], kShareObjectsArgument, strlen(kShareObjectsArgument)) == 0) {
646 args->variant = VARIANT_SHARE_VM_OBJECTS;
647 } else {
648 print_help(argv);
649 exit(1);
650 }
651 current_argument++;
652
653 long duration = strtol(argv[current_argument++], NULL, 10);
654 if (duration == 0) {
655 print_help(argv);
656 exit(1);
657 }
658 long num_cores = strtol(argv[current_argument++], NULL, 10);
659 if (num_cores == 0) {
660 print_help(argv);
661 exit(1);
662 }
663 if (current_argument < argc) {
664 long first_cpu = strtol(argv[current_argument++], NULL, 10);
665 assert(first_cpu >= 0 && first_cpu < get_ncpu());
666 args->pin_threads = true;
667 args->first_cpu = (unsigned int) first_cpu;
668 } else {
669 args->pin_threads = false;
670 }
671
672 assert(num_cores > 0 && num_cores <= get_ncpu());
673 args->n_threads = (unsigned int) num_cores;
674 args->duration_seconds = (unsigned long) duration;
675 }
676
677 static inline
678 _Atomic size_t *
next_fault_buffer_index_ptr(test_globals_t * globals)679 next_fault_buffer_index_ptr(test_globals_t *globals)
680 {
681 return (_Atomic size_t *) (((ptrdiff_t)(globals + 1)) + (int64_t)kCacheLineSize);
682 }
683 static size_t
fault_buffer_stride(const test_globals_t * globals)684 fault_buffer_stride(const test_globals_t *globals)
685 {
686 size_t stride;
687 if (globals->tg_variant == VARIANT_SEPARATE_VM_OBJECTS) {
688 stride = 1;
689 } else if (globals->tg_variant == VARIANT_SHARE_VM_OBJECTS) {
690 stride = globals->tg_num_threads;
691 } else {
692 fprintf(stderr, "Unknown variant\n");
693 exit(-1);
694 }
695 return stride;
696 }
697