1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Memory bandwidth monitoring and allocation library
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <[email protected]>,
9  *    Fenghua Yu <[email protected]>
10  */
11 #include "resctrl.h"
12 
13 #define UNCORE_IMC		"uncore_imc"
14 #define READ_FILE_NAME		"events/cas_count_read"
15 #define WRITE_FILE_NAME		"events/cas_count_write"
16 #define DYN_PMU_PATH		"/sys/bus/event_source/devices"
17 #define SCALE			0.00006103515625
18 #define MAX_IMCS		20
19 #define MAX_TOKENS		5
20 #define READ			0
21 #define WRITE			1
22 #define CON_MON_MBM_LOCAL_BYTES_PATH				\
23 	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
24 
25 #define CON_MBM_LOCAL_BYTES_PATH		\
26 	"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
27 
28 #define MON_MBM_LOCAL_BYTES_PATH		\
29 	"%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
30 
31 #define MBM_LOCAL_BYTES_PATH			\
32 	"%s/mon_data/mon_L3_%02d/mbm_local_bytes"
33 
34 #define CON_MON_LCC_OCCUP_PATH		\
35 	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
36 
37 #define CON_LCC_OCCUP_PATH		\
38 	"%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
39 
40 #define MON_LCC_OCCUP_PATH		\
41 	"%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
42 
43 #define LCC_OCCUP_PATH			\
44 	"%s/mon_data/mon_L3_%02d/llc_occupancy"
45 
46 struct membw_read_format {
47 	__u64 value;         /* The value of the event */
48 	__u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
49 	__u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
50 	__u64 id;            /* if PERF_FORMAT_ID */
51 };
52 
53 struct imc_counter_config {
54 	__u32 type;
55 	__u64 event;
56 	__u64 umask;
57 	struct perf_event_attr pe;
58 	struct membw_read_format return_value;
59 	int fd;
60 };
61 
62 static char mbm_total_path[1024];
63 static int imcs;
64 static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
65 static const struct resctrl_test *current_test;
66 
67 void membw_initialize_perf_event_attr(int i, int j)
68 {
69 	memset(&imc_counters_config[i][j].pe, 0,
70 	       sizeof(struct perf_event_attr));
71 	imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type;
72 	imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr);
73 	imc_counters_config[i][j].pe.disabled = 1;
74 	imc_counters_config[i][j].pe.inherit = 1;
75 	imc_counters_config[i][j].pe.exclude_guest = 0;
76 	imc_counters_config[i][j].pe.config =
77 		imc_counters_config[i][j].umask << 8 |
78 		imc_counters_config[i][j].event;
79 	imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
80 	imc_counters_config[i][j].pe.read_format =
81 		PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
82 }
83 
84 void membw_ioctl_perf_event_ioc_reset_enable(int i, int j)
85 {
86 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0);
87 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0);
88 }
89 
90 void membw_ioctl_perf_event_ioc_disable(int i, int j)
91 {
92 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0);
93 }
94 
95 /*
96  * get_event_and_umask:	Parse config into event and umask
97  * @cas_count_cfg:	Config
98  * @count:		iMC number
99  * @op:			Operation (read/write)
100  */
101 void get_event_and_umask(char *cas_count_cfg, int count, bool op)
102 {
103 	char *token[MAX_TOKENS];
104 	int i = 0;
105 
106 	strcat(cas_count_cfg, ",");
107 	token[0] = strtok(cas_count_cfg, "=,");
108 
109 	for (i = 1; i < MAX_TOKENS; i++)
110 		token[i] = strtok(NULL, "=,");
111 
112 	for (i = 0; i < MAX_TOKENS; i++) {
113 		if (!token[i])
114 			break;
115 		if (strcmp(token[i], "event") == 0) {
116 			if (op == READ)
117 				imc_counters_config[count][READ].event =
118 				strtol(token[i + 1], NULL, 16);
119 			else
120 				imc_counters_config[count][WRITE].event =
121 				strtol(token[i + 1], NULL, 16);
122 		}
123 		if (strcmp(token[i], "umask") == 0) {
124 			if (op == READ)
125 				imc_counters_config[count][READ].umask =
126 				strtol(token[i + 1], NULL, 16);
127 			else
128 				imc_counters_config[count][WRITE].umask =
129 				strtol(token[i + 1], NULL, 16);
130 		}
131 	}
132 }
133 
134 static int open_perf_event(int i, int cpu_no, int j)
135 {
136 	imc_counters_config[i][j].fd =
137 		perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
138 				PERF_FLAG_FD_CLOEXEC);
139 
140 	if (imc_counters_config[i][j].fd == -1) {
141 		fprintf(stderr, "Error opening leader %llx\n",
142 			imc_counters_config[i][j].pe.config);
143 
144 		return -1;
145 	}
146 
147 	return 0;
148 }
149 
150 /* Get type and config (read and write) of an iMC counter */
151 static int read_from_imc_dir(char *imc_dir, int count)
152 {
153 	char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
154 	FILE *fp;
155 
156 	/* Get type of iMC counter */
157 	sprintf(imc_counter_type, "%s%s", imc_dir, "type");
158 	fp = fopen(imc_counter_type, "r");
159 	if (!fp) {
160 		ksft_perror("Failed to open iMC counter type file");
161 
162 		return -1;
163 	}
164 	if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
165 		ksft_perror("Could not get iMC type");
166 		fclose(fp);
167 
168 		return -1;
169 	}
170 	fclose(fp);
171 
172 	imc_counters_config[count][WRITE].type =
173 				imc_counters_config[count][READ].type;
174 
175 	/* Get read config */
176 	sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
177 	fp = fopen(imc_counter_cfg, "r");
178 	if (!fp) {
179 		ksft_perror("Failed to open iMC config file");
180 
181 		return -1;
182 	}
183 	if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
184 		ksft_perror("Could not get iMC cas count read");
185 		fclose(fp);
186 
187 		return -1;
188 	}
189 	fclose(fp);
190 
191 	get_event_and_umask(cas_count_cfg, count, READ);
192 
193 	/* Get write config */
194 	sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
195 	fp = fopen(imc_counter_cfg, "r");
196 	if (!fp) {
197 		ksft_perror("Failed to open iMC config file");
198 
199 		return -1;
200 	}
201 	if  (fscanf(fp, "%s", cas_count_cfg) <= 0) {
202 		ksft_perror("Could not get iMC cas count write");
203 		fclose(fp);
204 
205 		return -1;
206 	}
207 	fclose(fp);
208 
209 	get_event_and_umask(cas_count_cfg, count, WRITE);
210 
211 	return 0;
212 }
213 
214 /*
215  * A system can have 'n' number of iMC (Integrated Memory Controller)
216  * counters, get that 'n'. For each iMC counter get it's type and config.
217  * Also, each counter has two configs, one for read and the other for write.
218  * A config again has two parts, event and umask.
219  * Enumerate all these details into an array of structures.
220  *
221  * Return: >= 0 on success. < 0 on failure.
222  */
223 static int num_of_imcs(void)
224 {
225 	char imc_dir[512], *temp;
226 	unsigned int count = 0;
227 	struct dirent *ep;
228 	int ret;
229 	DIR *dp;
230 
231 	dp = opendir(DYN_PMU_PATH);
232 	if (dp) {
233 		while ((ep = readdir(dp))) {
234 			temp = strstr(ep->d_name, UNCORE_IMC);
235 			if (!temp)
236 				continue;
237 
238 			/*
239 			 * imc counters are named as "uncore_imc_<n>", hence
240 			 * increment the pointer to point to <n>. Note that
241 			 * sizeof(UNCORE_IMC) would count for null character as
242 			 * well and hence the last underscore character in
243 			 * uncore_imc'_' need not be counted.
244 			 */
245 			temp = temp + sizeof(UNCORE_IMC);
246 
247 			/*
248 			 * Some directories under "DYN_PMU_PATH" could have
249 			 * names like "uncore_imc_free_running", hence, check if
250 			 * first character is a numerical digit or not.
251 			 */
252 			if (temp[0] >= '0' && temp[0] <= '9') {
253 				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
254 					ep->d_name);
255 				ret = read_from_imc_dir(imc_dir, count);
256 				if (ret) {
257 					closedir(dp);
258 
259 					return ret;
260 				}
261 				count++;
262 			}
263 		}
264 		closedir(dp);
265 		if (count == 0) {
266 			ksft_print_msg("Unable to find iMC counters\n");
267 
268 			return -1;
269 		}
270 	} else {
271 		ksft_perror("Unable to open PMU directory");
272 
273 		return -1;
274 	}
275 
276 	return count;
277 }
278 
279 static int initialize_mem_bw_imc(void)
280 {
281 	int imc, j;
282 
283 	imcs = num_of_imcs();
284 	if (imcs <= 0)
285 		return imcs;
286 
287 	/* Initialize perf_event_attr structures for all iMC's */
288 	for (imc = 0; imc < imcs; imc++) {
289 		for (j = 0; j < 2; j++)
290 			membw_initialize_perf_event_attr(imc, j);
291 	}
292 
293 	return 0;
294 }
295 
296 static void perf_close_imc_mem_bw(void)
297 {
298 	int mc;
299 
300 	for (mc = 0; mc < imcs; mc++) {
301 		if (imc_counters_config[mc][READ].fd != -1)
302 			close(imc_counters_config[mc][READ].fd);
303 		if (imc_counters_config[mc][WRITE].fd != -1)
304 			close(imc_counters_config[mc][WRITE].fd);
305 	}
306 }
307 
308 /*
309  * perf_open_imc_mem_bw - Open perf fds for IMCs
310  * @cpu_no: CPU number that the benchmark PID is bound to
311  *
312  * Return: = 0 on success. < 0 on failure.
313  */
314 static int perf_open_imc_mem_bw(int cpu_no)
315 {
316 	int imc, ret;
317 
318 	for (imc = 0; imc < imcs; imc++) {
319 		imc_counters_config[imc][READ].fd = -1;
320 		imc_counters_config[imc][WRITE].fd = -1;
321 	}
322 
323 	for (imc = 0; imc < imcs; imc++) {
324 		ret = open_perf_event(imc, cpu_no, READ);
325 		if (ret)
326 			goto close_fds;
327 		ret = open_perf_event(imc, cpu_no, WRITE);
328 		if (ret)
329 			goto close_fds;
330 	}
331 
332 	return 0;
333 
334 close_fds:
335 	perf_close_imc_mem_bw();
336 	return -1;
337 }
338 
339 /*
340  * do_mem_bw_test - Perform memory bandwidth test
341  *
342  * Runs memory bandwidth test over one second period. Also, handles starting
343  * and stopping of the IMC perf counters around the test.
344  */
345 static void do_imc_mem_bw_test(void)
346 {
347 	int imc;
348 
349 	for (imc = 0; imc < imcs; imc++) {
350 		membw_ioctl_perf_event_ioc_reset_enable(imc, READ);
351 		membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE);
352 	}
353 
354 	sleep(1);
355 
356 	/* Stop counters after a second to get results (both read and write) */
357 	for (imc = 0; imc < imcs; imc++) {
358 		membw_ioctl_perf_event_ioc_disable(imc, READ);
359 		membw_ioctl_perf_event_ioc_disable(imc, WRITE);
360 	}
361 }
362 
363 /*
364  * get_mem_bw_imc - Memory bandwidth as reported by iMC counters
365  * @bw_report: Bandwidth report type (reads, writes)
366  *
367  * Memory bandwidth utilized by a process on a socket can be calculated
368  * using iMC counters. Perf events are used to read these counters.
369  *
370  * Return: = 0 on success. < 0 on failure.
371  */
372 static int get_mem_bw_imc(char *bw_report, float *bw_imc)
373 {
374 	float reads, writes, of_mul_read, of_mul_write;
375 	int imc;
376 
377 	/* Start all iMC counters to log values (both read and write) */
378 	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
379 
380 	/*
381 	 * Get results which are stored in struct type imc_counter_config
382 	 * Take overflow into consideration before calculating total bandwidth.
383 	 */
384 	for (imc = 0; imc < imcs; imc++) {
385 		struct imc_counter_config *r =
386 			&imc_counters_config[imc][READ];
387 		struct imc_counter_config *w =
388 			&imc_counters_config[imc][WRITE];
389 
390 		if (read(r->fd, &r->return_value,
391 			 sizeof(struct membw_read_format)) == -1) {
392 			ksft_perror("Couldn't get read bandwidth through iMC");
393 			return -1;
394 		}
395 
396 		if (read(w->fd, &w->return_value,
397 			 sizeof(struct membw_read_format)) == -1) {
398 			ksft_perror("Couldn't get write bandwidth through iMC");
399 			return -1;
400 		}
401 
402 		__u64 r_time_enabled = r->return_value.time_enabled;
403 		__u64 r_time_running = r->return_value.time_running;
404 
405 		if (r_time_enabled != r_time_running)
406 			of_mul_read = (float)r_time_enabled /
407 					(float)r_time_running;
408 
409 		__u64 w_time_enabled = w->return_value.time_enabled;
410 		__u64 w_time_running = w->return_value.time_running;
411 
412 		if (w_time_enabled != w_time_running)
413 			of_mul_write = (float)w_time_enabled /
414 					(float)w_time_running;
415 		reads += r->return_value.value * of_mul_read * SCALE;
416 		writes += w->return_value.value * of_mul_write * SCALE;
417 	}
418 
419 	if (strcmp(bw_report, "reads") == 0) {
420 		*bw_imc = reads;
421 		return 0;
422 	}
423 
424 	if (strcmp(bw_report, "writes") == 0) {
425 		*bw_imc = writes;
426 		return 0;
427 	}
428 
429 	*bw_imc = reads + writes;
430 	return 0;
431 }
432 
433 void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id)
434 {
435 	if (ctrlgrp && mongrp)
436 		sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
437 			RESCTRL_PATH, ctrlgrp, mongrp, domain_id);
438 	else if (!ctrlgrp && mongrp)
439 		sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
440 			mongrp, domain_id);
441 	else if (ctrlgrp && !mongrp)
442 		sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
443 			ctrlgrp, domain_id);
444 	else if (!ctrlgrp && !mongrp)
445 		sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
446 			domain_id);
447 }
448 
449 /*
450  * initialize_mem_bw_resctrl:	Appropriately populate "mbm_total_path"
451  * @ctrlgrp:			Name of the control monitor group (con_mon grp)
452  * @mongrp:			Name of the monitor group (mon grp)
453  * @domain_id:			Domain ID (cache ID; for MB, L3 cache ID)
454  * @resctrl_val:		Resctrl feature (Eg: mbm, mba.. etc)
455  */
456 static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
457 				      int domain_id, char *resctrl_val)
458 {
459 	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
460 		set_mbm_path(ctrlgrp, mongrp, domain_id);
461 
462 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
463 		if (ctrlgrp)
464 			sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
465 				RESCTRL_PATH, ctrlgrp, domain_id);
466 		else
467 			sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
468 				RESCTRL_PATH, domain_id);
469 	}
470 }
471 
472 /*
473  * Get MBM Local bytes as reported by resctrl FS
474  * For MBM,
475  * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp
476  * 2. If only con_mon grp is given, then read from con_mon grp
477  * 3. If both are not given, then read from root con_mon grp
478  * For MBA,
479  * 1. If con_mon grp is given, then read from it
480  * 2. If con_mon grp is not given, then read from root con_mon grp
481  */
482 static FILE *open_mem_bw_resctrl(const char *mbm_bw_file)
483 {
484 	FILE *fp;
485 
486 	fp = fopen(mbm_bw_file, "r");
487 	if (!fp)
488 		ksft_perror("Failed to open total memory bandwidth file");
489 
490 	return fp;
491 }
492 
493 static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total)
494 {
495 	if (fscanf(fp, "%lu\n", mbm_total) <= 0) {
496 		ksft_perror("Could not get MBM local bytes");
497 		return -1;
498 	}
499 	return 0;
500 }
501 
502 static pid_t bm_pid, ppid;
503 
504 void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
505 {
506 	/* Only kill child after bm_pid is set after fork() */
507 	if (bm_pid)
508 		kill(bm_pid, SIGKILL);
509 	umount_resctrlfs();
510 	if (current_test && current_test->cleanup)
511 		current_test->cleanup();
512 	ksft_print_msg("Ending\n\n");
513 
514 	exit(EXIT_SUCCESS);
515 }
516 
517 /*
518  * Register CTRL-C handler for parent, as it has to kill
519  * child process before exiting.
520  */
521 int signal_handler_register(const struct resctrl_test *test)
522 {
523 	struct sigaction sigact = {};
524 	int ret = 0;
525 
526 	bm_pid = 0;
527 
528 	current_test = test;
529 	sigact.sa_sigaction = ctrlc_handler;
530 	sigemptyset(&sigact.sa_mask);
531 	sigact.sa_flags = SA_SIGINFO;
532 	if (sigaction(SIGINT, &sigact, NULL) ||
533 	    sigaction(SIGTERM, &sigact, NULL) ||
534 	    sigaction(SIGHUP, &sigact, NULL)) {
535 		ksft_perror("sigaction");
536 		ret = -1;
537 	}
538 	return ret;
539 }
540 
541 /*
542  * Reset signal handler to SIG_DFL.
543  * Non-Value return because the caller should keep
544  * the error code of other path even if sigaction fails.
545  */
546 void signal_handler_unregister(void)
547 {
548 	struct sigaction sigact = {};
549 
550 	current_test = NULL;
551 	sigact.sa_handler = SIG_DFL;
552 	sigemptyset(&sigact.sa_mask);
553 	if (sigaction(SIGINT, &sigact, NULL) ||
554 	    sigaction(SIGTERM, &sigact, NULL) ||
555 	    sigaction(SIGHUP, &sigact, NULL)) {
556 		ksft_perror("sigaction");
557 	}
558 }
559 
560 static void parent_exit(pid_t ppid)
561 {
562 	kill(ppid, SIGKILL);
563 	umount_resctrlfs();
564 	exit(EXIT_FAILURE);
565 }
566 
567 /*
568  * print_results_bw:	the memory bandwidth results are stored in a file
569  * @filename:		file that stores the results
570  * @bm_pid:		child pid that runs benchmark
571  * @bw_imc:		perf imc counter value
572  * @bw_resc:		memory bandwidth value
573  *
574  * Return:		0 on success, < 0 on error.
575  */
576 static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc,
577 			    unsigned long bw_resc)
578 {
579 	unsigned long diff = fabs(bw_imc - bw_resc);
580 	FILE *fp;
581 
582 	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
583 		printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc);
584 		printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
585 	} else {
586 		fp = fopen(filename, "a");
587 		if (!fp) {
588 			ksft_perror("Cannot open results file");
589 
590 			return -1;
591 		}
592 		if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
593 			    (int)bm_pid, bw_imc, bw_resc, diff) <= 0) {
594 			ksft_print_msg("Could not log results\n");
595 			fclose(fp);
596 
597 			return -1;
598 		}
599 		fclose(fp);
600 	}
601 
602 	return 0;
603 }
604 
605 static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
606 {
607 	if (strlen(ctrlgrp) && strlen(mongrp))
608 		sprintf(llc_occup_path,	CON_MON_LCC_OCCUP_PATH,	RESCTRL_PATH,
609 			ctrlgrp, mongrp, sock_num);
610 	else if (!strlen(ctrlgrp) && strlen(mongrp))
611 		sprintf(llc_occup_path,	MON_LCC_OCCUP_PATH, RESCTRL_PATH,
612 			mongrp, sock_num);
613 	else if (strlen(ctrlgrp) && !strlen(mongrp))
614 		sprintf(llc_occup_path,	CON_LCC_OCCUP_PATH, RESCTRL_PATH,
615 			ctrlgrp, sock_num);
616 	else if (!strlen(ctrlgrp) && !strlen(mongrp))
617 		sprintf(llc_occup_path, LCC_OCCUP_PATH,	RESCTRL_PATH, sock_num);
618 }
619 
620 /*
621  * initialize_llc_occu_resctrl:	Appropriately populate "llc_occup_path"
622  * @ctrlgrp:			Name of the control monitor group (con_mon grp)
623  * @mongrp:			Name of the monitor group (mon grp)
624  * @domain_id:			Domain ID (cache ID; for MB, L3 cache ID)
625  * @resctrl_val:		Resctrl feature (Eg: cat, cmt.. etc)
626  */
627 static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
628 					int domain_id, char *resctrl_val)
629 {
630 	if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
631 		set_cmt_path(ctrlgrp, mongrp, domain_id);
632 }
633 
634 /*
635  * measure_mem_bw - Measures memory bandwidth numbers while benchmark runs
636  * @uparams:		User supplied parameters
637  * @param:		Parameters passed to resctrl_val()
638  * @bm_pid:		PID that runs the benchmark
639  *
640  * Measure memory bandwidth from resctrl and from another source which is
641  * perf imc value or could be something else if perf imc event is not
642  * available. Compare the two values to validate resctrl value. It takes
643  * 1 sec to measure the data.
644  */
645 static int measure_mem_bw(const struct user_params *uparams,
646 			  struct resctrl_val_param *param, pid_t bm_pid)
647 {
648 	unsigned long bw_resc, bw_resc_start, bw_resc_end;
649 	FILE *mem_bw_fp;
650 	float bw_imc;
651 	int ret;
652 
653 	mem_bw_fp = open_mem_bw_resctrl(mbm_total_path);
654 	if (!mem_bw_fp)
655 		return -1;
656 
657 	ret = perf_open_imc_mem_bw(uparams->cpu);
658 	if (ret < 0)
659 		goto close_fp;
660 
661 	ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start);
662 	if (ret < 0)
663 		goto close_imc;
664 
665 	rewind(mem_bw_fp);
666 
667 	do_imc_mem_bw_test();
668 
669 	ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end);
670 	if (ret < 0)
671 		goto close_imc;
672 
673 	ret = get_mem_bw_imc(param->bw_report, &bw_imc);
674 	if (ret < 0)
675 		goto close_imc;
676 
677 	perf_close_imc_mem_bw();
678 	fclose(mem_bw_fp);
679 
680 	bw_resc = (bw_resc_end - bw_resc_start) / MB;
681 
682 	return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
683 
684 close_imc:
685 	perf_close_imc_mem_bw();
686 close_fp:
687 	fclose(mem_bw_fp);
688 	return ret;
689 }
690 
691 /*
692  * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
693  *		   in specified signal. Direct benchmark stdio to /dev/null.
694  * @signum:	signal number
695  * @info:	signal info
696  * @ucontext:	user context in signal handling
697  */
698 static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
699 {
700 	int operation, ret, memflush;
701 	char **benchmark_cmd;
702 	size_t span;
703 	bool once;
704 	FILE *fp;
705 
706 	benchmark_cmd = info->si_ptr;
707 
708 	/*
709 	 * Direct stdio of child to /dev/null, so that only parent writes to
710 	 * stdio (console)
711 	 */
712 	fp = freopen("/dev/null", "w", stdout);
713 	if (!fp) {
714 		ksft_perror("Unable to direct benchmark status to /dev/null");
715 		parent_exit(ppid);
716 	}
717 
718 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
719 		/* Execute default fill_buf benchmark */
720 		span = strtoul(benchmark_cmd[1], NULL, 10);
721 		memflush =  atoi(benchmark_cmd[2]);
722 		operation = atoi(benchmark_cmd[3]);
723 		if (!strcmp(benchmark_cmd[4], "true")) {
724 			once = true;
725 		} else if (!strcmp(benchmark_cmd[4], "false")) {
726 			once = false;
727 		} else {
728 			ksft_print_msg("Invalid once parameter\n");
729 			parent_exit(ppid);
730 		}
731 
732 		if (run_fill_buf(span, memflush, operation, once))
733 			fprintf(stderr, "Error in running fill buffer\n");
734 	} else {
735 		/* Execute specified benchmark */
736 		ret = execvp(benchmark_cmd[0], benchmark_cmd);
737 		if (ret)
738 			ksft_perror("execvp");
739 	}
740 
741 	fclose(stdout);
742 	ksft_print_msg("Unable to run specified benchmark\n");
743 	parent_exit(ppid);
744 }
745 
746 /*
747  * resctrl_val:	execute benchmark and measure memory bandwidth on
748  *			the benchmark
749  * @test:		test information structure
750  * @uparams:		user supplied parameters
751  * @benchmark_cmd:	benchmark command and its arguments
752  * @param:		parameters passed to resctrl_val()
753  *
754  * Return:		0 when the test was run, < 0 on error.
755  */
756 int resctrl_val(const struct resctrl_test *test,
757 		const struct user_params *uparams,
758 		const char * const *benchmark_cmd,
759 		struct resctrl_val_param *param)
760 {
761 	char *resctrl_val = param->resctrl_val;
762 	struct sigaction sigact;
763 	int ret = 0, pipefd[2];
764 	char pipe_message = 0;
765 	union sigval value;
766 	int domain_id;
767 
768 	if (strcmp(param->filename, "") == 0)
769 		sprintf(param->filename, "stdio");
770 
771 	ret = get_domain_id(test->resource, uparams->cpu, &domain_id);
772 	if (ret < 0) {
773 		ksft_print_msg("Could not get domain ID\n");
774 		return ret;
775 	}
776 
777 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
778 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
779 		ret = validate_bw_report_request(param->bw_report);
780 		if (ret)
781 			return ret;
782 	}
783 
784 	/*
785 	 * If benchmark wasn't successfully started by child, then child should
786 	 * kill parent, so save parent's pid
787 	 */
788 	ppid = getpid();
789 
790 	if (pipe(pipefd)) {
791 		ksft_perror("Unable to create pipe");
792 
793 		return -1;
794 	}
795 
796 	/*
797 	 * Fork to start benchmark, save child's pid so that it can be killed
798 	 * when needed
799 	 */
800 	fflush(stdout);
801 	bm_pid = fork();
802 	if (bm_pid == -1) {
803 		ksft_perror("Unable to fork");
804 
805 		return -1;
806 	}
807 
808 	if (bm_pid == 0) {
809 		/*
810 		 * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
811 		 * start benchmark
812 		 */
813 		sigfillset(&sigact.sa_mask);
814 		sigdelset(&sigact.sa_mask, SIGUSR1);
815 
816 		sigact.sa_sigaction = run_benchmark;
817 		sigact.sa_flags = SA_SIGINFO;
818 
819 		/* Register for "SIGUSR1" signal from parent */
820 		if (sigaction(SIGUSR1, &sigact, NULL)) {
821 			ksft_perror("Can't register child for signal");
822 			parent_exit(ppid);
823 		}
824 
825 		/* Tell parent that child is ready */
826 		close(pipefd[0]);
827 		pipe_message = 1;
828 		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
829 		    sizeof(pipe_message)) {
830 			ksft_perror("Failed signaling parent process");
831 			close(pipefd[1]);
832 			return -1;
833 		}
834 		close(pipefd[1]);
835 
836 		/* Suspend child until delivery of "SIGUSR1" from parent */
837 		sigsuspend(&sigact.sa_mask);
838 
839 		ksft_perror("Child is done");
840 		parent_exit(ppid);
841 	}
842 
843 	ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid);
844 
845 	/*
846 	 * The cast removes constness but nothing mutates benchmark_cmd within
847 	 * the context of this process. At the receiving process, it becomes
848 	 * argv, which is mutable, on exec() but that's after fork() so it
849 	 * doesn't matter for the process running the tests.
850 	 */
851 	value.sival_ptr = (void *)benchmark_cmd;
852 
853 	/* Taskset benchmark to specified cpu */
854 	ret = taskset_benchmark(bm_pid, uparams->cpu, NULL);
855 	if (ret)
856 		goto out;
857 
858 	/* Write benchmark to specified control&monitoring grp in resctrl FS */
859 	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
860 				      resctrl_val);
861 	if (ret)
862 		goto out;
863 
864 	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
865 	    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
866 		ret = initialize_mem_bw_imc();
867 		if (ret)
868 			goto out;
869 
870 		initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
871 					  domain_id, resctrl_val);
872 	} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
873 		initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
874 					    domain_id, resctrl_val);
875 
876 	/* Parent waits for child to be ready. */
877 	close(pipefd[1]);
878 	while (pipe_message != 1) {
879 		if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
880 		    sizeof(pipe_message)) {
881 			ksft_perror("Failed reading message from child process");
882 			close(pipefd[0]);
883 			goto out;
884 		}
885 	}
886 	close(pipefd[0]);
887 
888 	/* Signal child to start benchmark */
889 	if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
890 		ksft_perror("sigqueue SIGUSR1 to child");
891 		ret = -1;
892 		goto out;
893 	}
894 
895 	/* Give benchmark enough time to fully run */
896 	sleep(1);
897 
898 	/* Test runs until the callback setup() tells the test to stop. */
899 	while (1) {
900 		ret = param->setup(test, uparams, param);
901 		if (ret == END_OF_TESTS) {
902 			ret = 0;
903 			break;
904 		}
905 		if (ret < 0)
906 			break;
907 
908 		if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
909 		    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
910 			ret = measure_mem_bw(uparams, param, bm_pid);
911 			if (ret)
912 				break;
913 		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
914 			sleep(1);
915 			ret = measure_llc_resctrl(param->filename, bm_pid);
916 			if (ret)
917 				break;
918 		}
919 	}
920 
921 out:
922 	kill(bm_pid, SIGKILL);
923 
924 	return ret;
925 }
926