xref: /dpdk/lib/power/power_pstate_cpufreq.c (revision 30a1de10)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <fcntl.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <limits.h>
10 #include <errno.h>
11 #include <inttypes.h>
12 
13 #include <rte_memcpy.h>
14 
15 #include "power_pstate_cpufreq.h"
16 #include "power_common.h"
17 
18 /* macros used for rounding frequency to nearest 100000 */
19 #define FREQ_ROUNDING_DELTA 50000
20 #define ROUND_FREQ_TO_N_100000 100000
21 
22 #define BUS_FREQ     100000
23 
24 #define POWER_GOVERNOR_PERF "performance"
25 #define POWER_SYSFILE_MAX_FREQ \
26 		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
27 #define POWER_SYSFILE_MIN_FREQ  \
28 		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_min_freq"
29 #define POWER_SYSFILE_CUR_FREQ  \
30 		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
31 #define POWER_SYSFILE_BASE_MAX_FREQ \
32 		"/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_max_freq"
33 #define POWER_SYSFILE_BASE_MIN_FREQ  \
34 		"/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq"
35 #define POWER_SYSFILE_BASE_FREQ  \
36 		"/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
37 #define POWER_PSTATE_DRIVER "intel_pstate"
38 #define POWER_MSR_PATH  "/dev/cpu/%u/msr"
39 
40 /*
41  * MSR related
42  */
43 #define PLATFORM_INFO     0x0CE
44 #define NON_TURBO_MASK    0xFF00
45 #define NON_TURBO_OFFSET  0x8
46 
47 
48 enum power_state {
49 	POWER_IDLE = 0,
50 	POWER_ONGOING,
51 	POWER_USED,
52 	POWER_UNKNOWN
53 };
54 
55 struct pstate_power_info {
56 	unsigned int lcore_id;               /**< Logical core id */
57 	uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
58 	uint32_t nb_freqs;                   /**< number of available freqs */
59 	FILE *f_cur_min;                     /**< FD of scaling_min */
60 	FILE *f_cur_max;                     /**< FD of scaling_max */
61 	char governor_ori[32];               /**< Original governor name */
62 	uint32_t curr_idx;                   /**< Freq index in freqs array */
63 	uint32_t non_turbo_max_ratio;        /**< Non Turbo Max ratio  */
64 	uint32_t sys_max_freq;               /**< system wide max freq  */
65 	uint32_t core_base_freq;             /**< core base freq  */
66 	uint32_t state;                      /**< Power in use state */
67 	uint16_t turbo_available;            /**< Turbo Boost available */
68 	uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
69 	uint16_t priority_core;              /**< High Performance core */
70 } __rte_cache_aligned;
71 
72 
73 static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE];
74 
75 /**
76  * It is to read the specific MSR.
77  */
78 
79 static int32_t
power_rdmsr(int msr,uint64_t * val,unsigned int lcore_id)80 power_rdmsr(int msr, uint64_t *val, unsigned int lcore_id)
81 {
82 	int fd, ret;
83 	char fullpath[PATH_MAX];
84 
85 	snprintf(fullpath, sizeof(fullpath), POWER_MSR_PATH, lcore_id);
86 
87 	fd = open(fullpath, O_RDONLY);
88 
89 	if (fd < 0) {
90 		RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", fullpath,
91 				 strerror(errno));
92 		return fd;
93 	}
94 
95 	ret = pread(fd, val, sizeof(uint64_t), msr);
96 
97 	if (ret < 0) {
98 		RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", fullpath,
99 				 strerror(errno));
100 		goto out;
101 	}
102 
103 	POWER_DEBUG_TRACE("MSR Path %s, offset 0x%X for lcore %u\n",
104 			fullpath, msr, lcore_id);
105 
106 	POWER_DEBUG_TRACE("Ret value %d, content is 0x%"PRIx64"\n", ret, *val);
107 
108 out:	close(fd);
109 	return ret;
110 }
111 
112 /**
113  * It is to fopen the sys file for the future setting the lcore frequency.
114  */
115 static int
power_init_for_setting_freq(struct pstate_power_info * pi)116 power_init_for_setting_freq(struct pstate_power_info *pi)
117 {
118 	FILE *f_base = NULL, *f_base_max = NULL, *f_min = NULL, *f_max = NULL;
119 	uint32_t base_ratio, base_max_ratio;
120 	uint64_t max_non_turbo;
121 	int ret;
122 
123 	/* open all files we expect to have open */
124 	open_core_sysfs_file(&f_base_max, "r", POWER_SYSFILE_BASE_MAX_FREQ,
125 			pi->lcore_id);
126 	if (f_base_max == NULL) {
127 		RTE_LOG(ERR, POWER, "failed to open %s\n",
128 				POWER_SYSFILE_BASE_MAX_FREQ);
129 		goto err;
130 	}
131 
132 	open_core_sysfs_file(&f_min, "rw+", POWER_SYSFILE_MIN_FREQ,
133 			pi->lcore_id);
134 	if (f_min == NULL) {
135 		RTE_LOG(ERR, POWER, "failed to open %s\n",
136 				POWER_SYSFILE_MIN_FREQ);
137 		goto err;
138 	}
139 
140 	open_core_sysfs_file(&f_max, "rw+", POWER_SYSFILE_MAX_FREQ,
141 			pi->lcore_id);
142 	if (f_max == NULL) {
143 		RTE_LOG(ERR, POWER, "failed to open %s\n",
144 				POWER_SYSFILE_MAX_FREQ);
145 		goto err;
146 	}
147 
148 	open_core_sysfs_file(&f_base, "r", POWER_SYSFILE_BASE_FREQ,
149 			pi->lcore_id);
150 	/* base ratio file may not exist in some kernels, so no error check */
151 
152 	/* read base max ratio */
153 	ret = read_core_sysfs_u32(f_base_max, &base_max_ratio);
154 	if (ret < 0) {
155 		RTE_LOG(ERR, POWER, "Failed to read %s\n",
156 				POWER_SYSFILE_BASE_MAX_FREQ);
157 		goto err;
158 	}
159 
160 	/* base ratio may not exist */
161 	if (f_base != NULL) {
162 		ret = read_core_sysfs_u32(f_base, &base_ratio);
163 		if (ret < 0) {
164 			RTE_LOG(ERR, POWER, "Failed to read %s\n",
165 					POWER_SYSFILE_BASE_FREQ);
166 			goto err;
167 		}
168 	} else {
169 		base_ratio = 0;
170 	}
171 
172 	/* Add MSR read to detect turbo status */
173 	if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0)
174 		goto err;
175 	/* no errors after this point */
176 
177 	/* convert ratios to bins */
178 	base_max_ratio /= BUS_FREQ;
179 	base_ratio /= BUS_FREQ;
180 
181 	/* assign file handles */
182 	pi->f_cur_min = f_min;
183 	pi->f_cur_max = f_max;
184 
185 	max_non_turbo = (max_non_turbo&NON_TURBO_MASK)>>NON_TURBO_OFFSET;
186 
187 	POWER_DEBUG_TRACE("no turbo perf %"PRIu64"\n", max_non_turbo);
188 
189 	pi->non_turbo_max_ratio = (uint32_t)max_non_turbo;
190 
191 	/*
192 	 * If base_frequency is reported as greater than the maximum
193 	 * turbo frequency, that's a known issue with some kernels.
194 	 * Set base_frequency to max_non_turbo as a workaround.
195 	 */
196 	if (base_ratio > base_max_ratio) {
197 		/* base_ratio is greater than max turbo. Kernel bug. */
198 		pi->priority_core = 0;
199 		goto out;
200 	}
201 
202 	/*
203 	 * If base_frequency is reported as greater than the maximum
204 	 * non-turbo frequency, then mark it as a high priority core.
205 	 */
206 	if (base_ratio > max_non_turbo)
207 		pi->priority_core = 1;
208 	else
209 		pi->priority_core = 0;
210 	pi->core_base_freq = base_ratio * BUS_FREQ;
211 
212 out:
213 	if (f_base != NULL)
214 		fclose(f_base);
215 	fclose(f_base_max);
216 	/* f_min and f_max are stored, no need to close */
217 	return 0;
218 
219 err:
220 	if (f_base != NULL)
221 		fclose(f_base);
222 	if (f_base_max != NULL)
223 		fclose(f_base_max);
224 	if (f_min != NULL)
225 		fclose(f_min);
226 	if (f_max != NULL)
227 		fclose(f_max);
228 	return -1;
229 }
230 
231 static int
set_freq_internal(struct pstate_power_info * pi,uint32_t idx)232 set_freq_internal(struct pstate_power_info *pi, uint32_t idx)
233 {
234 	uint32_t target_freq = 0;
235 
236 	if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
237 		RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
238 				"should be less than %u\n", idx, pi->nb_freqs);
239 		return -1;
240 	}
241 
242 	/* Check if it is the same as current */
243 	if (idx == pi->curr_idx)
244 		return 0;
245 
246 	/* Because Intel Pstate Driver only allow user change min/max hint
247 	 * User need change the min/max as same value.
248 	 */
249 	if (fseek(pi->f_cur_min, 0, SEEK_SET) < 0) {
250 		RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
251 				"for setting frequency for lcore %u\n",
252 				pi->lcore_id);
253 		return -1;
254 	}
255 
256 	if (fseek(pi->f_cur_max, 0, SEEK_SET) < 0) {
257 		RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
258 				"for setting frequency for lcore %u\n",
259 				pi->lcore_id);
260 		return -1;
261 	}
262 
263 	/* Turbo is available and enabled, first freq bucket is sys max freq */
264 	if (pi->turbo_available && idx == 0) {
265 		if (pi->turbo_enable)
266 			target_freq = pi->sys_max_freq;
267 		else {
268 			RTE_LOG(ERR, POWER, "Turbo is off, frequency can't be scaled up more %u\n",
269 					pi->lcore_id);
270 			return -1;
271 		}
272 	} else
273 		target_freq = pi->freqs[idx];
274 
275 	/* Decrease freq, the min freq should be updated first */
276 	if (idx  >  pi->curr_idx) {
277 
278 		if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) {
279 			RTE_LOG(ERR, POWER, "Fail to write new frequency for "
280 					"lcore %u\n", pi->lcore_id);
281 			return -1;
282 		}
283 
284 		if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) {
285 			RTE_LOG(ERR, POWER, "Fail to write new frequency for "
286 					"lcore %u\n", pi->lcore_id);
287 			return -1;
288 		}
289 
290 		POWER_DEBUG_TRACE("Frequency '%u' to be set for lcore %u\n",
291 				  target_freq, pi->lcore_id);
292 
293 		fflush(pi->f_cur_min);
294 		fflush(pi->f_cur_max);
295 
296 	}
297 
298 	/* Increase freq, the max freq should be updated first */
299 	if (idx  <  pi->curr_idx) {
300 
301 		if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) {
302 			RTE_LOG(ERR, POWER, "Fail to write new frequency for "
303 					"lcore %u\n", pi->lcore_id);
304 			return -1;
305 		}
306 
307 		if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) {
308 			RTE_LOG(ERR, POWER, "Fail to write new frequency for "
309 					"lcore %u\n", pi->lcore_id);
310 			return -1;
311 		}
312 
313 		POWER_DEBUG_TRACE("Frequency '%u' to be set for lcore %u\n",
314 				  target_freq, pi->lcore_id);
315 
316 		fflush(pi->f_cur_max);
317 		fflush(pi->f_cur_min);
318 	}
319 
320 	pi->curr_idx = idx;
321 
322 	return 1;
323 }
324 
325 /**
326  * It is to check the current scaling governor by reading sys file, and then
327  * set it into 'performance' if it is not by writing the sys file. The original
328  * governor will be saved for rolling back.
329  */
330 static int
power_set_governor_performance(struct pstate_power_info * pi)331 power_set_governor_performance(struct pstate_power_info *pi)
332 {
333 	return power_set_governor(pi->lcore_id, POWER_GOVERNOR_PERF,
334 			pi->governor_ori, sizeof(pi->governor_ori));
335 }
336 
337 /**
338  * It is to check the governor and then set the original governor back if
339  * needed by writing the sys file.
340  */
341 static int
power_set_governor_original(struct pstate_power_info * pi)342 power_set_governor_original(struct pstate_power_info *pi)
343 {
344 	return power_set_governor(pi->lcore_id, pi->governor_ori, NULL, 0);
345 }
346 
347 /**
348  * It is to get the available frequencies of the specific lcore by reading the
349  * sys file.
350  */
351 static int
power_get_available_freqs(struct pstate_power_info * pi)352 power_get_available_freqs(struct pstate_power_info *pi)
353 {
354 	FILE *f_min = NULL, *f_max = NULL;
355 	int ret = -1;
356 	uint32_t sys_min_freq = 0, sys_max_freq = 0, base_max_freq = 0;
357 	uint32_t i, num_freqs = 0;
358 
359 	/* open all files */
360 	open_core_sysfs_file(&f_max, "r", POWER_SYSFILE_BASE_MAX_FREQ,
361 			pi->lcore_id);
362 	if (f_max == NULL) {
363 		RTE_LOG(ERR, POWER, "failed to open %s\n",
364 				POWER_SYSFILE_BASE_MAX_FREQ);
365 		goto out;
366 	}
367 
368 	open_core_sysfs_file(&f_min, "r", POWER_SYSFILE_BASE_MIN_FREQ,
369 			pi->lcore_id);
370 	if (f_min == NULL) {
371 		RTE_LOG(ERR, POWER, "failed to open %s\n",
372 				POWER_SYSFILE_BASE_MIN_FREQ);
373 		goto out;
374 	}
375 
376 	/* read base ratios */
377 	ret = read_core_sysfs_u32(f_max, &sys_max_freq);
378 	if (ret < 0) {
379 		RTE_LOG(ERR, POWER, "Failed to read %s\n",
380 				POWER_SYSFILE_BASE_MAX_FREQ);
381 		goto out;
382 	}
383 
384 	ret = read_core_sysfs_u32(f_min, &sys_min_freq);
385 	if (ret < 0) {
386 		RTE_LOG(ERR, POWER, "Failed to read %s\n",
387 				POWER_SYSFILE_BASE_MIN_FREQ);
388 		goto out;
389 	}
390 
391 	if (sys_max_freq < sys_min_freq)
392 		goto out;
393 
394 	pi->sys_max_freq = sys_max_freq;
395 
396 	if (pi->priority_core == 1)
397 		base_max_freq = pi->core_base_freq;
398 	else
399 		base_max_freq = pi->non_turbo_max_ratio * BUS_FREQ;
400 
401 	POWER_DEBUG_TRACE("sys min %u, sys max %u, base_max %u\n",
402 			sys_min_freq,
403 			sys_max_freq,
404 			base_max_freq);
405 
406 	if (base_max_freq < sys_max_freq)
407 		pi->turbo_available = 1;
408 	else
409 		pi->turbo_available = 0;
410 
411 	/* If turbo is available then there is one extra freq bucket
412 	 * to store the sys max freq which value is base_max +1
413 	 */
414 	num_freqs = (base_max_freq - sys_min_freq) / BUS_FREQ + 1 +
415 		pi->turbo_available;
416 	if (num_freqs >= RTE_MAX_LCORE_FREQS) {
417 		RTE_LOG(ERR, POWER, "Too many available frequencies: %d\n",
418 				num_freqs);
419 		goto out;
420 	}
421 
422 	/* Generate the freq bucket array.
423 	 * If turbo is available the freq bucket[0] value is base_max +1
424 	 * the bucket[1] is base_max, bucket[2] is base_max - BUS_FREQ
425 	 * and so on.
426 	 * If turbo is not available bucket[0] is base_max and so on
427 	 */
428 	for (i = 0, pi->nb_freqs = 0; i < num_freqs; i++) {
429 		if ((i == 0) && pi->turbo_available)
430 			pi->freqs[pi->nb_freqs++] = base_max_freq + 1;
431 		else
432 			pi->freqs[pi->nb_freqs++] =
433 			base_max_freq - (i - pi->turbo_available) * BUS_FREQ;
434 	}
435 
436 	ret = 0;
437 
438 	POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
439 			num_freqs, pi->lcore_id);
440 
441 out:
442 	if (f_min != NULL)
443 		fclose(f_min);
444 	if (f_max != NULL)
445 		fclose(f_max);
446 
447 	return ret;
448 }
449 
450 static int
power_get_cur_idx(struct pstate_power_info * pi)451 power_get_cur_idx(struct pstate_power_info *pi)
452 {
453 	FILE *f_cur;
454 	int ret = -1;
455 	uint32_t sys_cur_freq = 0;
456 	unsigned int i;
457 
458 	open_core_sysfs_file(&f_cur, "r", POWER_SYSFILE_CUR_FREQ,
459 			pi->lcore_id);
460 	if (f_cur == NULL) {
461 		RTE_LOG(ERR, POWER, "failed to open %s\n",
462 				POWER_SYSFILE_CUR_FREQ);
463 		goto fail;
464 	}
465 
466 	ret = read_core_sysfs_u32(f_cur, &sys_cur_freq);
467 	if (ret < 0) {
468 		RTE_LOG(ERR, POWER, "Failed to read %s\n",
469 				POWER_SYSFILE_CUR_FREQ);
470 		goto fail;
471 	}
472 
473 	/* convert the frequency to nearest 100000 value
474 	 * Ex: if sys_cur_freq=1396789 then freq_conv=1400000
475 	 * Ex: if sys_cur_freq=800030 then freq_conv=800000
476 	 * Ex: if sys_cur_freq=800030 then freq_conv=800000
477 	 */
478 	unsigned int freq_conv = 0;
479 	freq_conv = (sys_cur_freq + FREQ_ROUNDING_DELTA)
480 				/ ROUND_FREQ_TO_N_100000;
481 	freq_conv = freq_conv * ROUND_FREQ_TO_N_100000;
482 
483 	for (i = 0; i < pi->nb_freqs; i++) {
484 		if (freq_conv == pi->freqs[i]) {
485 			pi->curr_idx = i;
486 			break;
487 		}
488 	}
489 
490 	ret = 0;
491 fail:
492 	if (f_cur != NULL)
493 		fclose(f_cur);
494 	return ret;
495 }
496 
497 int
power_pstate_cpufreq_check_supported(void)498 power_pstate_cpufreq_check_supported(void)
499 {
500 	return cpufreq_check_scaling_driver(POWER_PSTATE_DRIVER);
501 }
502 
503 int
power_pstate_cpufreq_init(unsigned int lcore_id)504 power_pstate_cpufreq_init(unsigned int lcore_id)
505 {
506 	struct pstate_power_info *pi;
507 	uint32_t exp_state;
508 
509 	if (lcore_id >= RTE_MAX_LCORE) {
510 		RTE_LOG(ERR, POWER, "Lcore id %u can not exceed %u\n",
511 				lcore_id, RTE_MAX_LCORE - 1U);
512 		return -1;
513 	}
514 
515 	pi = &lcore_power_info[lcore_id];
516 	exp_state = POWER_IDLE;
517 	/* The power in use state works as a guard variable between
518 	 * the CPU frequency control initialization and exit process.
519 	 * The ACQUIRE memory ordering here pairs with the RELEASE
520 	 * ordering below as lock to make sure the frequency operations
521 	 * in the critical section are done under the correct state.
522 	 */
523 	if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
524 					POWER_ONGOING, 0,
525 					__ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
526 		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
527 				"in use\n", lcore_id);
528 		return -1;
529 	}
530 
531 	pi->lcore_id = lcore_id;
532 	/* Check and set the governor */
533 	if (power_set_governor_performance(pi) < 0) {
534 		RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
535 				"performance\n", lcore_id);
536 		goto fail;
537 	}
538 	/* Init for setting lcore frequency */
539 	if (power_init_for_setting_freq(pi) < 0) {
540 		RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
541 				"lcore %u\n", lcore_id);
542 		goto fail;
543 	}
544 
545 	/* Get the available frequencies */
546 	if (power_get_available_freqs(pi) < 0) {
547 		RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
548 				"lcore %u\n", lcore_id);
549 		goto fail;
550 	}
551 
552 	if (power_get_cur_idx(pi) < 0) {
553 		RTE_LOG(ERR, POWER, "Cannot get current frequency "
554 				"index of lcore %u\n", lcore_id);
555 		goto fail;
556 	}
557 
558 	/* Set freq to max by default */
559 	if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
560 		RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
561 				"to max\n", lcore_id);
562 		goto fail;
563 	}
564 
565 	RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
566 			"power management\n", lcore_id);
567 	exp_state = POWER_ONGOING;
568 	__atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_USED,
569 				    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
570 
571 	return 0;
572 
573 fail:
574 	exp_state = POWER_ONGOING;
575 	__atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
576 				    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
577 
578 	return -1;
579 }
580 
581 int
power_pstate_cpufreq_exit(unsigned int lcore_id)582 power_pstate_cpufreq_exit(unsigned int lcore_id)
583 {
584 	struct pstate_power_info *pi;
585 	uint32_t exp_state;
586 
587 	if (lcore_id >= RTE_MAX_LCORE) {
588 		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
589 				lcore_id, RTE_MAX_LCORE - 1U);
590 		return -1;
591 	}
592 	pi = &lcore_power_info[lcore_id];
593 
594 	exp_state = POWER_USED;
595 	/* The power in use state works as a guard variable between
596 	 * the CPU frequency control initialization and exit process.
597 	 * The ACQUIRE memory ordering here pairs with the RELEASE
598 	 * ordering below as lock to make sure the frequency operations
599 	 * in the critical section are under done the correct state.
600 	 */
601 	if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
602 					POWER_ONGOING, 0,
603 					__ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
604 		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
605 				"not used\n", lcore_id);
606 		return -1;
607 	}
608 
609 	/* Close FD of setting freq */
610 	fclose(pi->f_cur_min);
611 	fclose(pi->f_cur_max);
612 	pi->f_cur_min = NULL;
613 	pi->f_cur_max = NULL;
614 
615 	/* Set the governor back to the original */
616 	if (power_set_governor_original(pi) < 0) {
617 		RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
618 				"to the original\n", lcore_id);
619 		goto fail;
620 	}
621 
622 	RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
623 			"'performance' mode and been set back to the "
624 			"original\n", lcore_id);
625 	exp_state = POWER_ONGOING;
626 	__atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_IDLE,
627 				    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
628 
629 	return 0;
630 
631 fail:
632 	exp_state = POWER_ONGOING;
633 	__atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
634 				    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
635 
636 	return -1;
637 }
638 
639 
640 uint32_t
power_pstate_cpufreq_freqs(unsigned int lcore_id,uint32_t * freqs,uint32_t num)641 power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
642 {
643 	struct pstate_power_info *pi;
644 
645 	if (lcore_id >= RTE_MAX_LCORE) {
646 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
647 		return 0;
648 	}
649 
650 	if (freqs == NULL) {
651 		RTE_LOG(ERR, POWER, "NULL buffer supplied\n");
652 		return 0;
653 	}
654 
655 	pi = &lcore_power_info[lcore_id];
656 	if (num < pi->nb_freqs) {
657 		RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
658 		return 0;
659 	}
660 	rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
661 
662 	return pi->nb_freqs;
663 }
664 
665 uint32_t
power_pstate_cpufreq_get_freq(unsigned int lcore_id)666 power_pstate_cpufreq_get_freq(unsigned int lcore_id)
667 {
668 	if (lcore_id >= RTE_MAX_LCORE) {
669 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
670 		return RTE_POWER_INVALID_FREQ_INDEX;
671 	}
672 
673 	return lcore_power_info[lcore_id].curr_idx;
674 }
675 
676 
677 int
power_pstate_cpufreq_set_freq(unsigned int lcore_id,uint32_t index)678 power_pstate_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
679 {
680 	if (lcore_id >= RTE_MAX_LCORE) {
681 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
682 		return -1;
683 	}
684 
685 	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
686 }
687 
688 int
power_pstate_cpufreq_freq_up(unsigned int lcore_id)689 power_pstate_cpufreq_freq_up(unsigned int lcore_id)
690 {
691 	struct pstate_power_info *pi;
692 
693 	if (lcore_id >= RTE_MAX_LCORE) {
694 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
695 		return -1;
696 	}
697 
698 	pi = &lcore_power_info[lcore_id];
699 	if (pi->curr_idx == 0 ||
700 	    (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
701 		return 0;
702 
703 	/* Frequencies in the array are from high to low. */
704 	return set_freq_internal(pi, pi->curr_idx - 1);
705 }
706 
707 int
power_pstate_cpufreq_freq_down(unsigned int lcore_id)708 power_pstate_cpufreq_freq_down(unsigned int lcore_id)
709 {
710 	struct pstate_power_info *pi;
711 
712 	if (lcore_id >= RTE_MAX_LCORE) {
713 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
714 		return -1;
715 	}
716 
717 	pi = &lcore_power_info[lcore_id];
718 	if (pi->curr_idx + 1 == pi->nb_freqs)
719 		return 0;
720 
721 	/* Frequencies in the array are from high to low. */
722 	return set_freq_internal(pi, pi->curr_idx + 1);
723 }
724 
725 int
power_pstate_cpufreq_freq_max(unsigned int lcore_id)726 power_pstate_cpufreq_freq_max(unsigned int lcore_id)
727 {
728 	if (lcore_id >= RTE_MAX_LCORE) {
729 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
730 		return -1;
731 	}
732 
733 	/* Frequencies in the array are from high to low. */
734 	if (lcore_power_info[lcore_id].turbo_available) {
735 		if (lcore_power_info[lcore_id].turbo_enable)
736 			/* Set to Turbo */
737 			return set_freq_internal(
738 					&lcore_power_info[lcore_id], 0);
739 		else
740 			/* Set to max non-turbo */
741 			return set_freq_internal(
742 					&lcore_power_info[lcore_id], 1);
743 	} else
744 		return set_freq_internal(&lcore_power_info[lcore_id], 0);
745 }
746 
747 
748 int
power_pstate_cpufreq_freq_min(unsigned int lcore_id)749 power_pstate_cpufreq_freq_min(unsigned int lcore_id)
750 {
751 	struct pstate_power_info *pi;
752 
753 	if (lcore_id >= RTE_MAX_LCORE) {
754 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
755 		return -1;
756 	}
757 
758 	pi = &lcore_power_info[lcore_id];
759 
760 	/* Frequencies in the array are from high to low. */
761 	return set_freq_internal(pi, pi->nb_freqs - 1);
762 }
763 
764 
765 int
power_pstate_turbo_status(unsigned int lcore_id)766 power_pstate_turbo_status(unsigned int lcore_id)
767 {
768 	struct pstate_power_info *pi;
769 
770 	if (lcore_id >= RTE_MAX_LCORE) {
771 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
772 		return -1;
773 	}
774 
775 	pi = &lcore_power_info[lcore_id];
776 
777 	return pi->turbo_enable;
778 }
779 
780 int
power_pstate_enable_turbo(unsigned int lcore_id)781 power_pstate_enable_turbo(unsigned int lcore_id)
782 {
783 	struct pstate_power_info *pi;
784 
785 	if (lcore_id >= RTE_MAX_LCORE) {
786 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
787 		return -1;
788 	}
789 
790 	pi = &lcore_power_info[lcore_id];
791 
792 	if (pi->turbo_available)
793 		pi->turbo_enable = 1;
794 	else {
795 		pi->turbo_enable = 0;
796 		RTE_LOG(ERR, POWER,
797 			"Failed to enable turbo on lcore %u\n",
798 			lcore_id);
799 			return -1;
800 	}
801 
802 	return 0;
803 }
804 
805 
806 int
power_pstate_disable_turbo(unsigned int lcore_id)807 power_pstate_disable_turbo(unsigned int lcore_id)
808 {
809 	struct pstate_power_info *pi;
810 
811 	if (lcore_id >= RTE_MAX_LCORE) {
812 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
813 		return -1;
814 	}
815 
816 	pi = &lcore_power_info[lcore_id];
817 
818 	pi->turbo_enable = 0;
819 
820 	if (pi->turbo_available && pi->curr_idx <= 1) {
821 		/* Try to set freq to max by default coming out of turbo */
822 		if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
823 			RTE_LOG(ERR, POWER,
824 				"Failed to set frequency of lcore %u to max\n",
825 				lcore_id);
826 			return -1;
827 		}
828 	}
829 
830 	return 0;
831 }
832 
833 
power_pstate_get_capabilities(unsigned int lcore_id,struct rte_power_core_capabilities * caps)834 int power_pstate_get_capabilities(unsigned int lcore_id,
835 		struct rte_power_core_capabilities *caps)
836 {
837 	struct pstate_power_info *pi;
838 
839 	if (lcore_id >= RTE_MAX_LCORE) {
840 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
841 		return -1;
842 	}
843 	if (caps == NULL) {
844 		RTE_LOG(ERR, POWER, "Invalid argument\n");
845 		return -1;
846 	}
847 
848 	pi = &lcore_power_info[lcore_id];
849 	caps->capabilities = 0;
850 	caps->turbo = !!(pi->turbo_available);
851 	caps->priority = pi->priority_core;
852 
853 	return 0;
854 }
855