1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Memory bandwidth monitoring and allocation library
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <[email protected]>,
9  *    Fenghua Yu <[email protected]>
10  */
11 #include "resctrl.h"
12 
13 #define UNCORE_IMC		"uncore_imc"
14 #define READ_FILE_NAME		"events/cas_count_read"
15 #define WRITE_FILE_NAME		"events/cas_count_write"
16 #define DYN_PMU_PATH		"/sys/bus/event_source/devices"
17 #define SCALE			0.00006103515625
18 #define MAX_IMCS		20
19 #define MAX_TOKENS		5
20 #define READ			0
21 #define WRITE			1
22 
23 #define CON_MBM_LOCAL_BYTES_PATH		\
24 	"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
25 
26 struct membw_read_format {
27 	__u64 value;         /* The value of the event */
28 	__u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
29 	__u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
30 	__u64 id;            /* if PERF_FORMAT_ID */
31 };
32 
33 struct imc_counter_config {
34 	__u32 type;
35 	__u64 event;
36 	__u64 umask;
37 	struct perf_event_attr pe;
38 	struct membw_read_format return_value;
39 	int fd;
40 };
41 
42 static char mbm_total_path[1024];
43 static int imcs;
44 static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
45 static const struct resctrl_test *current_test;
46 
47 void membw_initialize_perf_event_attr(int i, int j)
48 {
49 	memset(&imc_counters_config[i][j].pe, 0,
50 	       sizeof(struct perf_event_attr));
51 	imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type;
52 	imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr);
53 	imc_counters_config[i][j].pe.disabled = 1;
54 	imc_counters_config[i][j].pe.inherit = 1;
55 	imc_counters_config[i][j].pe.exclude_guest = 0;
56 	imc_counters_config[i][j].pe.config =
57 		imc_counters_config[i][j].umask << 8 |
58 		imc_counters_config[i][j].event;
59 	imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
60 	imc_counters_config[i][j].pe.read_format =
61 		PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
62 }
63 
64 void membw_ioctl_perf_event_ioc_reset_enable(int i, int j)
65 {
66 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0);
67 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0);
68 }
69 
70 void membw_ioctl_perf_event_ioc_disable(int i, int j)
71 {
72 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0);
73 }
74 
75 /*
76  * get_event_and_umask:	Parse config into event and umask
77  * @cas_count_cfg:	Config
78  * @count:		iMC number
79  * @op:			Operation (read/write)
80  */
81 void get_event_and_umask(char *cas_count_cfg, int count, bool op)
82 {
83 	char *token[MAX_TOKENS];
84 	int i = 0;
85 
86 	strcat(cas_count_cfg, ",");
87 	token[0] = strtok(cas_count_cfg, "=,");
88 
89 	for (i = 1; i < MAX_TOKENS; i++)
90 		token[i] = strtok(NULL, "=,");
91 
92 	for (i = 0; i < MAX_TOKENS; i++) {
93 		if (!token[i])
94 			break;
95 		if (strcmp(token[i], "event") == 0) {
96 			if (op == READ)
97 				imc_counters_config[count][READ].event =
98 				strtol(token[i + 1], NULL, 16);
99 			else
100 				imc_counters_config[count][WRITE].event =
101 				strtol(token[i + 1], NULL, 16);
102 		}
103 		if (strcmp(token[i], "umask") == 0) {
104 			if (op == READ)
105 				imc_counters_config[count][READ].umask =
106 				strtol(token[i + 1], NULL, 16);
107 			else
108 				imc_counters_config[count][WRITE].umask =
109 				strtol(token[i + 1], NULL, 16);
110 		}
111 	}
112 }
113 
114 static int open_perf_event(int i, int cpu_no, int j)
115 {
116 	imc_counters_config[i][j].fd =
117 		perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
118 				PERF_FLAG_FD_CLOEXEC);
119 
120 	if (imc_counters_config[i][j].fd == -1) {
121 		fprintf(stderr, "Error opening leader %llx\n",
122 			imc_counters_config[i][j].pe.config);
123 
124 		return -1;
125 	}
126 
127 	return 0;
128 }
129 
130 /* Get type and config (read and write) of an iMC counter */
131 static int read_from_imc_dir(char *imc_dir, int count)
132 {
133 	char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
134 	FILE *fp;
135 
136 	/* Get type of iMC counter */
137 	sprintf(imc_counter_type, "%s%s", imc_dir, "type");
138 	fp = fopen(imc_counter_type, "r");
139 	if (!fp) {
140 		ksft_perror("Failed to open iMC counter type file");
141 
142 		return -1;
143 	}
144 	if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
145 		ksft_perror("Could not get iMC type");
146 		fclose(fp);
147 
148 		return -1;
149 	}
150 	fclose(fp);
151 
152 	imc_counters_config[count][WRITE].type =
153 				imc_counters_config[count][READ].type;
154 
155 	/* Get read config */
156 	sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
157 	fp = fopen(imc_counter_cfg, "r");
158 	if (!fp) {
159 		ksft_perror("Failed to open iMC config file");
160 
161 		return -1;
162 	}
163 	if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
164 		ksft_perror("Could not get iMC cas count read");
165 		fclose(fp);
166 
167 		return -1;
168 	}
169 	fclose(fp);
170 
171 	get_event_and_umask(cas_count_cfg, count, READ);
172 
173 	/* Get write config */
174 	sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
175 	fp = fopen(imc_counter_cfg, "r");
176 	if (!fp) {
177 		ksft_perror("Failed to open iMC config file");
178 
179 		return -1;
180 	}
181 	if  (fscanf(fp, "%s", cas_count_cfg) <= 0) {
182 		ksft_perror("Could not get iMC cas count write");
183 		fclose(fp);
184 
185 		return -1;
186 	}
187 	fclose(fp);
188 
189 	get_event_and_umask(cas_count_cfg, count, WRITE);
190 
191 	return 0;
192 }
193 
194 /*
195  * A system can have 'n' number of iMC (Integrated Memory Controller)
196  * counters, get that 'n'. For each iMC counter get it's type and config.
197  * Also, each counter has two configs, one for read and the other for write.
198  * A config again has two parts, event and umask.
199  * Enumerate all these details into an array of structures.
200  *
201  * Return: >= 0 on success. < 0 on failure.
202  */
203 static int num_of_imcs(void)
204 {
205 	char imc_dir[512], *temp;
206 	unsigned int count = 0;
207 	struct dirent *ep;
208 	int ret;
209 	DIR *dp;
210 
211 	dp = opendir(DYN_PMU_PATH);
212 	if (dp) {
213 		while ((ep = readdir(dp))) {
214 			temp = strstr(ep->d_name, UNCORE_IMC);
215 			if (!temp)
216 				continue;
217 
218 			/*
219 			 * imc counters are named as "uncore_imc_<n>", hence
220 			 * increment the pointer to point to <n>. Note that
221 			 * sizeof(UNCORE_IMC) would count for null character as
222 			 * well and hence the last underscore character in
223 			 * uncore_imc'_' need not be counted.
224 			 */
225 			temp = temp + sizeof(UNCORE_IMC);
226 
227 			/*
228 			 * Some directories under "DYN_PMU_PATH" could have
229 			 * names like "uncore_imc_free_running", hence, check if
230 			 * first character is a numerical digit or not.
231 			 */
232 			if (temp[0] >= '0' && temp[0] <= '9') {
233 				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
234 					ep->d_name);
235 				ret = read_from_imc_dir(imc_dir, count);
236 				if (ret) {
237 					closedir(dp);
238 
239 					return ret;
240 				}
241 				count++;
242 			}
243 		}
244 		closedir(dp);
245 		if (count == 0) {
246 			ksft_print_msg("Unable to find iMC counters\n");
247 
248 			return -1;
249 		}
250 	} else {
251 		ksft_perror("Unable to open PMU directory");
252 
253 		return -1;
254 	}
255 
256 	return count;
257 }
258 
259 int initialize_mem_bw_imc(void)
260 {
261 	int imc, j;
262 
263 	imcs = num_of_imcs();
264 	if (imcs <= 0)
265 		return imcs;
266 
267 	/* Initialize perf_event_attr structures for all iMC's */
268 	for (imc = 0; imc < imcs; imc++) {
269 		for (j = 0; j < 2; j++)
270 			membw_initialize_perf_event_attr(imc, j);
271 	}
272 
273 	return 0;
274 }
275 
276 static void perf_close_imc_mem_bw(void)
277 {
278 	int mc;
279 
280 	for (mc = 0; mc < imcs; mc++) {
281 		if (imc_counters_config[mc][READ].fd != -1)
282 			close(imc_counters_config[mc][READ].fd);
283 		if (imc_counters_config[mc][WRITE].fd != -1)
284 			close(imc_counters_config[mc][WRITE].fd);
285 	}
286 }
287 
288 /*
289  * perf_open_imc_mem_bw - Open perf fds for IMCs
290  * @cpu_no: CPU number that the benchmark PID is bound to
291  *
292  * Return: = 0 on success. < 0 on failure.
293  */
294 static int perf_open_imc_mem_bw(int cpu_no)
295 {
296 	int imc, ret;
297 
298 	for (imc = 0; imc < imcs; imc++) {
299 		imc_counters_config[imc][READ].fd = -1;
300 		imc_counters_config[imc][WRITE].fd = -1;
301 	}
302 
303 	for (imc = 0; imc < imcs; imc++) {
304 		ret = open_perf_event(imc, cpu_no, READ);
305 		if (ret)
306 			goto close_fds;
307 		ret = open_perf_event(imc, cpu_no, WRITE);
308 		if (ret)
309 			goto close_fds;
310 	}
311 
312 	return 0;
313 
314 close_fds:
315 	perf_close_imc_mem_bw();
316 	return -1;
317 }
318 
319 /*
320  * do_mem_bw_test - Perform memory bandwidth test
321  *
322  * Runs memory bandwidth test over one second period. Also, handles starting
323  * and stopping of the IMC perf counters around the test.
324  */
325 static void do_imc_mem_bw_test(void)
326 {
327 	int imc;
328 
329 	for (imc = 0; imc < imcs; imc++) {
330 		membw_ioctl_perf_event_ioc_reset_enable(imc, READ);
331 		membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE);
332 	}
333 
334 	sleep(1);
335 
336 	/* Stop counters after a second to get results (both read and write) */
337 	for (imc = 0; imc < imcs; imc++) {
338 		membw_ioctl_perf_event_ioc_disable(imc, READ);
339 		membw_ioctl_perf_event_ioc_disable(imc, WRITE);
340 	}
341 }
342 
343 /*
344  * get_mem_bw_imc - Memory bandwidth as reported by iMC counters
345  * @bw_report: Bandwidth report type (reads, writes)
346  *
347  * Memory bandwidth utilized by a process on a socket can be calculated
348  * using iMC counters. Perf events are used to read these counters.
349  *
350  * Return: = 0 on success. < 0 on failure.
351  */
352 static int get_mem_bw_imc(char *bw_report, float *bw_imc)
353 {
354 	float reads, writes, of_mul_read, of_mul_write;
355 	int imc;
356 
357 	/* Start all iMC counters to log values (both read and write) */
358 	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
359 
360 	/*
361 	 * Get results which are stored in struct type imc_counter_config
362 	 * Take overflow into consideration before calculating total bandwidth.
363 	 */
364 	for (imc = 0; imc < imcs; imc++) {
365 		struct imc_counter_config *r =
366 			&imc_counters_config[imc][READ];
367 		struct imc_counter_config *w =
368 			&imc_counters_config[imc][WRITE];
369 
370 		if (read(r->fd, &r->return_value,
371 			 sizeof(struct membw_read_format)) == -1) {
372 			ksft_perror("Couldn't get read bandwidth through iMC");
373 			return -1;
374 		}
375 
376 		if (read(w->fd, &w->return_value,
377 			 sizeof(struct membw_read_format)) == -1) {
378 			ksft_perror("Couldn't get write bandwidth through iMC");
379 			return -1;
380 		}
381 
382 		__u64 r_time_enabled = r->return_value.time_enabled;
383 		__u64 r_time_running = r->return_value.time_running;
384 
385 		if (r_time_enabled != r_time_running)
386 			of_mul_read = (float)r_time_enabled /
387 					(float)r_time_running;
388 
389 		__u64 w_time_enabled = w->return_value.time_enabled;
390 		__u64 w_time_running = w->return_value.time_running;
391 
392 		if (w_time_enabled != w_time_running)
393 			of_mul_write = (float)w_time_enabled /
394 					(float)w_time_running;
395 		reads += r->return_value.value * of_mul_read * SCALE;
396 		writes += w->return_value.value * of_mul_write * SCALE;
397 	}
398 
399 	if (strcmp(bw_report, "reads") == 0) {
400 		*bw_imc = reads;
401 		return 0;
402 	}
403 
404 	if (strcmp(bw_report, "writes") == 0) {
405 		*bw_imc = writes;
406 		return 0;
407 	}
408 
409 	*bw_imc = reads + writes;
410 	return 0;
411 }
412 
413 /*
414  * initialize_mem_bw_resctrl:	Appropriately populate "mbm_total_path"
415  * @param:	Parameters passed to resctrl_val()
416  * @domain_id:	Domain ID (cache ID; for MB, L3 cache ID)
417  */
418 void initialize_mem_bw_resctrl(const struct resctrl_val_param *param,
419 			       int domain_id)
420 {
421 	sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
422 		param->ctrlgrp, domain_id);
423 }
424 
425 /*
426  * Open file to read MBM local bytes from resctrl FS
427  */
428 static FILE *open_mem_bw_resctrl(const char *mbm_bw_file)
429 {
430 	FILE *fp;
431 
432 	fp = fopen(mbm_bw_file, "r");
433 	if (!fp)
434 		ksft_perror("Failed to open total memory bandwidth file");
435 
436 	return fp;
437 }
438 
439 /*
440  * Get MBM Local bytes as reported by resctrl FS
441  */
442 static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total)
443 {
444 	if (fscanf(fp, "%lu\n", mbm_total) <= 0) {
445 		ksft_perror("Could not get MBM local bytes");
446 		return -1;
447 	}
448 	return 0;
449 }
450 
451 static pid_t bm_pid, ppid;
452 
453 void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
454 {
455 	/* Only kill child after bm_pid is set after fork() */
456 	if (bm_pid)
457 		kill(bm_pid, SIGKILL);
458 	umount_resctrlfs();
459 	if (current_test && current_test->cleanup)
460 		current_test->cleanup();
461 	ksft_print_msg("Ending\n\n");
462 
463 	exit(EXIT_SUCCESS);
464 }
465 
466 /*
467  * Register CTRL-C handler for parent, as it has to kill
468  * child process before exiting.
469  */
470 int signal_handler_register(const struct resctrl_test *test)
471 {
472 	struct sigaction sigact = {};
473 	int ret = 0;
474 
475 	bm_pid = 0;
476 
477 	current_test = test;
478 	sigact.sa_sigaction = ctrlc_handler;
479 	sigemptyset(&sigact.sa_mask);
480 	sigact.sa_flags = SA_SIGINFO;
481 	if (sigaction(SIGINT, &sigact, NULL) ||
482 	    sigaction(SIGTERM, &sigact, NULL) ||
483 	    sigaction(SIGHUP, &sigact, NULL)) {
484 		ksft_perror("sigaction");
485 		ret = -1;
486 	}
487 	return ret;
488 }
489 
490 /*
491  * Reset signal handler to SIG_DFL.
492  * Non-Value return because the caller should keep
493  * the error code of other path even if sigaction fails.
494  */
495 void signal_handler_unregister(void)
496 {
497 	struct sigaction sigact = {};
498 
499 	current_test = NULL;
500 	sigact.sa_handler = SIG_DFL;
501 	sigemptyset(&sigact.sa_mask);
502 	if (sigaction(SIGINT, &sigact, NULL) ||
503 	    sigaction(SIGTERM, &sigact, NULL) ||
504 	    sigaction(SIGHUP, &sigact, NULL)) {
505 		ksft_perror("sigaction");
506 	}
507 }
508 
509 static void parent_exit(pid_t ppid)
510 {
511 	kill(ppid, SIGKILL);
512 	umount_resctrlfs();
513 	exit(EXIT_FAILURE);
514 }
515 
516 /*
517  * print_results_bw:	the memory bandwidth results are stored in a file
518  * @filename:		file that stores the results
519  * @bm_pid:		child pid that runs benchmark
520  * @bw_imc:		perf imc counter value
521  * @bw_resc:		memory bandwidth value
522  *
523  * Return:		0 on success, < 0 on error.
524  */
525 static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc,
526 			    unsigned long bw_resc)
527 {
528 	unsigned long diff = fabs(bw_imc - bw_resc);
529 	FILE *fp;
530 
531 	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
532 		printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc);
533 		printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
534 	} else {
535 		fp = fopen(filename, "a");
536 		if (!fp) {
537 			ksft_perror("Cannot open results file");
538 
539 			return -1;
540 		}
541 		if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
542 			    (int)bm_pid, bw_imc, bw_resc, diff) <= 0) {
543 			ksft_print_msg("Could not log results\n");
544 			fclose(fp);
545 
546 			return -1;
547 		}
548 		fclose(fp);
549 	}
550 
551 	return 0;
552 }
553 
554 /*
555  * measure_mem_bw - Measures memory bandwidth numbers while benchmark runs
556  * @uparams:		User supplied parameters
557  * @param:		Parameters passed to resctrl_val()
558  * @bm_pid:		PID that runs the benchmark
559  *
560  * Measure memory bandwidth from resctrl and from another source which is
561  * perf imc value or could be something else if perf imc event is not
562  * available. Compare the two values to validate resctrl value. It takes
563  * 1 sec to measure the data.
564  */
565 int measure_mem_bw(const struct user_params *uparams,
566 		   struct resctrl_val_param *param, pid_t bm_pid)
567 {
568 	unsigned long bw_resc, bw_resc_start, bw_resc_end;
569 	FILE *mem_bw_fp;
570 	float bw_imc;
571 	int ret;
572 
573 	mem_bw_fp = open_mem_bw_resctrl(mbm_total_path);
574 	if (!mem_bw_fp)
575 		return -1;
576 
577 	ret = perf_open_imc_mem_bw(uparams->cpu);
578 	if (ret < 0)
579 		goto close_fp;
580 
581 	ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start);
582 	if (ret < 0)
583 		goto close_imc;
584 
585 	rewind(mem_bw_fp);
586 
587 	do_imc_mem_bw_test();
588 
589 	ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end);
590 	if (ret < 0)
591 		goto close_imc;
592 
593 	ret = get_mem_bw_imc(param->bw_report, &bw_imc);
594 	if (ret < 0)
595 		goto close_imc;
596 
597 	perf_close_imc_mem_bw();
598 	fclose(mem_bw_fp);
599 
600 	bw_resc = (bw_resc_end - bw_resc_start) / MB;
601 
602 	return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
603 
604 close_imc:
605 	perf_close_imc_mem_bw();
606 close_fp:
607 	fclose(mem_bw_fp);
608 	return ret;
609 }
610 
611 /*
612  * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
613  *		   in specified signal. Direct benchmark stdio to /dev/null.
614  * @signum:	signal number
615  * @info:	signal info
616  * @ucontext:	user context in signal handling
617  */
618 static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
619 {
620 	int operation, ret, memflush;
621 	char **benchmark_cmd;
622 	size_t span;
623 	bool once;
624 	FILE *fp;
625 
626 	benchmark_cmd = info->si_ptr;
627 
628 	/*
629 	 * Direct stdio of child to /dev/null, so that only parent writes to
630 	 * stdio (console)
631 	 */
632 	fp = freopen("/dev/null", "w", stdout);
633 	if (!fp) {
634 		ksft_perror("Unable to direct benchmark status to /dev/null");
635 		parent_exit(ppid);
636 	}
637 
638 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
639 		/* Execute default fill_buf benchmark */
640 		span = strtoul(benchmark_cmd[1], NULL, 10);
641 		memflush =  atoi(benchmark_cmd[2]);
642 		operation = atoi(benchmark_cmd[3]);
643 		if (!strcmp(benchmark_cmd[4], "true")) {
644 			once = true;
645 		} else if (!strcmp(benchmark_cmd[4], "false")) {
646 			once = false;
647 		} else {
648 			ksft_print_msg("Invalid once parameter\n");
649 			parent_exit(ppid);
650 		}
651 
652 		if (run_fill_buf(span, memflush, operation, once))
653 			fprintf(stderr, "Error in running fill buffer\n");
654 	} else {
655 		/* Execute specified benchmark */
656 		ret = execvp(benchmark_cmd[0], benchmark_cmd);
657 		if (ret)
658 			ksft_perror("execvp");
659 	}
660 
661 	fclose(stdout);
662 	ksft_print_msg("Unable to run specified benchmark\n");
663 	parent_exit(ppid);
664 }
665 
666 /*
667  * resctrl_val:	execute benchmark and measure memory bandwidth on
668  *			the benchmark
669  * @test:		test information structure
670  * @uparams:		user supplied parameters
671  * @benchmark_cmd:	benchmark command and its arguments
672  * @param:		parameters passed to resctrl_val()
673  *
674  * Return:		0 when the test was run, < 0 on error.
675  */
676 int resctrl_val(const struct resctrl_test *test,
677 		const struct user_params *uparams,
678 		const char * const *benchmark_cmd,
679 		struct resctrl_val_param *param)
680 {
681 	char *resctrl_val = param->resctrl_val;
682 	struct sigaction sigact;
683 	int ret = 0, pipefd[2];
684 	char pipe_message = 0;
685 	union sigval value;
686 	int domain_id;
687 
688 	if (strcmp(param->filename, "") == 0)
689 		sprintf(param->filename, "stdio");
690 
691 	ret = get_domain_id(test->resource, uparams->cpu, &domain_id);
692 	if (ret < 0) {
693 		ksft_print_msg("Could not get domain ID\n");
694 		return ret;
695 	}
696 
697 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
698 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
699 		ret = validate_bw_report_request(param->bw_report);
700 		if (ret)
701 			return ret;
702 	}
703 
704 	/*
705 	 * If benchmark wasn't successfully started by child, then child should
706 	 * kill parent, so save parent's pid
707 	 */
708 	ppid = getpid();
709 
710 	if (pipe(pipefd)) {
711 		ksft_perror("Unable to create pipe");
712 
713 		return -1;
714 	}
715 
716 	/*
717 	 * Fork to start benchmark, save child's pid so that it can be killed
718 	 * when needed
719 	 */
720 	fflush(stdout);
721 	bm_pid = fork();
722 	if (bm_pid == -1) {
723 		ksft_perror("Unable to fork");
724 
725 		return -1;
726 	}
727 
728 	if (bm_pid == 0) {
729 		/*
730 		 * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
731 		 * start benchmark
732 		 */
733 		sigfillset(&sigact.sa_mask);
734 		sigdelset(&sigact.sa_mask, SIGUSR1);
735 
736 		sigact.sa_sigaction = run_benchmark;
737 		sigact.sa_flags = SA_SIGINFO;
738 
739 		/* Register for "SIGUSR1" signal from parent */
740 		if (sigaction(SIGUSR1, &sigact, NULL)) {
741 			ksft_perror("Can't register child for signal");
742 			parent_exit(ppid);
743 		}
744 
745 		/* Tell parent that child is ready */
746 		close(pipefd[0]);
747 		pipe_message = 1;
748 		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
749 		    sizeof(pipe_message)) {
750 			ksft_perror("Failed signaling parent process");
751 			close(pipefd[1]);
752 			return -1;
753 		}
754 		close(pipefd[1]);
755 
756 		/* Suspend child until delivery of "SIGUSR1" from parent */
757 		sigsuspend(&sigact.sa_mask);
758 
759 		ksft_perror("Child is done");
760 		parent_exit(ppid);
761 	}
762 
763 	ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid);
764 
765 	/*
766 	 * The cast removes constness but nothing mutates benchmark_cmd within
767 	 * the context of this process. At the receiving process, it becomes
768 	 * argv, which is mutable, on exec() but that's after fork() so it
769 	 * doesn't matter for the process running the tests.
770 	 */
771 	value.sival_ptr = (void *)benchmark_cmd;
772 
773 	/* Taskset benchmark to specified cpu */
774 	ret = taskset_benchmark(bm_pid, uparams->cpu, NULL);
775 	if (ret)
776 		goto out;
777 
778 	/* Write benchmark to specified control&monitoring grp in resctrl FS */
779 	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
780 				      resctrl_val);
781 	if (ret)
782 		goto out;
783 
784 	if (param->init) {
785 		ret = param->init(param, domain_id);
786 		if (ret)
787 			goto out;
788 	}
789 
790 	/* Parent waits for child to be ready. */
791 	close(pipefd[1]);
792 	while (pipe_message != 1) {
793 		if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
794 		    sizeof(pipe_message)) {
795 			ksft_perror("Failed reading message from child process");
796 			close(pipefd[0]);
797 			goto out;
798 		}
799 	}
800 	close(pipefd[0]);
801 
802 	/* Signal child to start benchmark */
803 	if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
804 		ksft_perror("sigqueue SIGUSR1 to child");
805 		ret = -1;
806 		goto out;
807 	}
808 
809 	/* Give benchmark enough time to fully run */
810 	sleep(1);
811 
812 	/* Test runs until the callback setup() tells the test to stop. */
813 	while (1) {
814 		ret = param->setup(test, uparams, param);
815 		if (ret == END_OF_TESTS) {
816 			ret = 0;
817 			break;
818 		}
819 		if (ret < 0)
820 			break;
821 
822 		ret = param->measure(uparams, param, bm_pid);
823 		if (ret)
824 			break;
825 	}
826 
827 out:
828 	kill(bm_pid, SIGKILL);
829 
830 	return ret;
831 }
832