xref: /dpdk/lib/power/power_acpi_cpufreq.c (revision 30a1de10)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <fcntl.h>
7 #include <stdlib.h>
8 
9 #include <rte_memcpy.h>
10 #include <rte_string_fns.h>
11 
12 #include "power_acpi_cpufreq.h"
13 #include "power_common.h"
14 
15 #define STR_SIZE     1024
16 #define POWER_CONVERT_TO_DECIMAL 10
17 
18 #define POWER_GOVERNOR_USERSPACE "userspace"
19 #define POWER_SYSFILE_AVAIL_FREQ \
20 		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
21 #define POWER_SYSFILE_SETSPEED   \
22 		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
23 #define POWER_ACPI_DRIVER "acpi-cpufreq"
24 
25 /*
26  * MSR related
27  */
28 #define PLATFORM_INFO     0x0CE
29 #define TURBO_RATIO_LIMIT 0x1AD
30 #define IA32_PERF_CTL     0x199
31 #define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32)
32 
33 enum power_state {
34 	POWER_IDLE = 0,
35 	POWER_ONGOING,
36 	POWER_USED,
37 	POWER_UNKNOWN
38 };
39 
40 /**
41  * Power info per lcore.
42  */
43 struct acpi_power_info {
44 	unsigned int lcore_id;                   /**< Logical core id */
45 	uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
46 	uint32_t nb_freqs;                   /**< number of available freqs */
47 	FILE *f;                             /**< FD of scaling_setspeed */
48 	char governor_ori[32];               /**< Original governor name */
49 	uint32_t curr_idx;                   /**< Freq index in freqs array */
50 	uint32_t state;                      /**< Power in use state */
51 	uint16_t turbo_available;            /**< Turbo Boost available */
52 	uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
53 } __rte_cache_aligned;
54 
55 static struct acpi_power_info lcore_power_info[RTE_MAX_LCORE];
56 
57 /**
58  * It is to set specific freq for specific logical core, according to the index
59  * of supported frequencies.
60  */
61 static int
set_freq_internal(struct acpi_power_info * pi,uint32_t idx)62 set_freq_internal(struct acpi_power_info *pi, uint32_t idx)
63 {
64 	if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
65 		RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
66 				"should be less than %u\n", idx, pi->nb_freqs);
67 		return -1;
68 	}
69 
70 	/* Check if it is the same as current */
71 	if (idx == pi->curr_idx)
72 		return 0;
73 
74 	POWER_DEBUG_TRACE("Frequency[%u] %u to be set for lcore %u\n",
75 			idx, pi->freqs[idx], pi->lcore_id);
76 	if (fseek(pi->f, 0, SEEK_SET) < 0) {
77 		RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
78 				"for setting frequency for lcore %u\n", pi->lcore_id);
79 		return -1;
80 	}
81 	if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
82 		RTE_LOG(ERR, POWER, "Fail to write new frequency for "
83 				"lcore %u\n", pi->lcore_id);
84 		return -1;
85 	}
86 	fflush(pi->f);
87 	pi->curr_idx = idx;
88 
89 	return 1;
90 }
91 
92 /**
93  * It is to check the current scaling governor by reading sys file, and then
94  * set it into 'userspace' if it is not by writing the sys file. The original
95  * governor will be saved for rolling back.
96  */
97 static int
power_set_governor_userspace(struct acpi_power_info * pi)98 power_set_governor_userspace(struct acpi_power_info *pi)
99 {
100 	return power_set_governor(pi->lcore_id, POWER_GOVERNOR_USERSPACE,
101 			pi->governor_ori, sizeof(pi->governor_ori));
102 }
103 
104 /**
105  * It is to check the governor and then set the original governor back if
106  * needed by writing the sys file.
107  */
108 static int
power_set_governor_original(struct acpi_power_info * pi)109 power_set_governor_original(struct acpi_power_info *pi)
110 {
111 	return power_set_governor(pi->lcore_id, pi->governor_ori, NULL, 0);
112 }
113 
114 /**
115  * It is to get the available frequencies of the specific lcore by reading the
116  * sys file.
117  */
118 static int
power_get_available_freqs(struct acpi_power_info * pi)119 power_get_available_freqs(struct acpi_power_info *pi)
120 {
121 	FILE *f;
122 	int ret = -1, i, count;
123 	char *p;
124 	char buf[BUFSIZ];
125 	char *freqs[RTE_MAX_LCORE_FREQS];
126 
127 	open_core_sysfs_file(&f, "r", POWER_SYSFILE_AVAIL_FREQ, pi->lcore_id);
128 	if (f == NULL) {
129 		RTE_LOG(ERR, POWER, "failed to open %s\n",
130 				POWER_SYSFILE_AVAIL_FREQ);
131 		goto out;
132 	}
133 
134 	ret = read_core_sysfs_s(f, buf, sizeof(buf));
135 	if ((ret) < 0) {
136 		RTE_LOG(ERR, POWER, "Failed to read %s\n",
137 				POWER_SYSFILE_AVAIL_FREQ);
138 		goto out;
139 	}
140 
141 	/* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
142 	count = rte_strsplit(buf, sizeof(buf), freqs,
143 			RTE_MAX_LCORE_FREQS, ' ');
144 	if (count <= 0) {
145 		RTE_LOG(ERR, POWER, "No available frequency in "
146 				""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
147 		goto out;
148 	}
149 	if (count >= RTE_MAX_LCORE_FREQS) {
150 		RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
151 				count);
152 		goto out;
153 	}
154 
155 	/* Store the available frequencies into power context */
156 	for (i = 0, pi->nb_freqs = 0; i < count; i++) {
157 		POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
158 				i, freqs[i]);
159 		pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
160 				POWER_CONVERT_TO_DECIMAL);
161 	}
162 
163 	if ((pi->freqs[0]-1000) == pi->freqs[1]) {
164 		pi->turbo_available = 1;
165 		pi->turbo_enable = 1;
166 		POWER_DEBUG_TRACE("Lcore %u Can do Turbo Boost\n",
167 				pi->lcore_id);
168 	} else {
169 		pi->turbo_available = 0;
170 		pi->turbo_enable = 0;
171 		POWER_DEBUG_TRACE("Turbo Boost not available on Lcore %u\n",
172 				pi->lcore_id);
173 	}
174 
175 	ret = 0;
176 	POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
177 			count, pi->lcore_id);
178 out:
179 	if (f != NULL)
180 		fclose(f);
181 
182 	return ret;
183 }
184 
185 /**
186  * It is to fopen the sys file for the future setting the lcore frequency.
187  */
188 static int
power_init_for_setting_freq(struct acpi_power_info * pi)189 power_init_for_setting_freq(struct acpi_power_info *pi)
190 {
191 	FILE *f;
192 	char buf[BUFSIZ];
193 	uint32_t i, freq;
194 	int ret;
195 
196 	open_core_sysfs_file(&f, "rw+", POWER_SYSFILE_SETSPEED, pi->lcore_id);
197 	if (f == NULL) {
198 		RTE_LOG(ERR, POWER, "Failed to open %s\n",
199 				POWER_SYSFILE_SETSPEED);
200 		goto err;
201 	}
202 
203 	ret = read_core_sysfs_s(f, buf, sizeof(buf));
204 	if ((ret) < 0) {
205 		RTE_LOG(ERR, POWER, "Failed to read %s\n",
206 				POWER_SYSFILE_SETSPEED);
207 		goto err;
208 	}
209 
210 	freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
211 	for (i = 0; i < pi->nb_freqs; i++) {
212 		if (freq == pi->freqs[i]) {
213 			pi->curr_idx = i;
214 			pi->f = f;
215 			return 0;
216 		}
217 	}
218 
219 err:
220 	if (f != NULL)
221 		fclose(f);
222 
223 	return -1;
224 }
225 
226 int
power_acpi_cpufreq_check_supported(void)227 power_acpi_cpufreq_check_supported(void)
228 {
229 	return cpufreq_check_scaling_driver(POWER_ACPI_DRIVER);
230 }
231 
232 int
power_acpi_cpufreq_init(unsigned int lcore_id)233 power_acpi_cpufreq_init(unsigned int lcore_id)
234 {
235 	struct acpi_power_info *pi;
236 	uint32_t exp_state;
237 
238 	if (lcore_id >= RTE_MAX_LCORE) {
239 		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
240 				lcore_id, RTE_MAX_LCORE - 1U);
241 		return -1;
242 	}
243 
244 	pi = &lcore_power_info[lcore_id];
245 	exp_state = POWER_IDLE;
246 	/* The power in use state works as a guard variable between
247 	 * the CPU frequency control initialization and exit process.
248 	 * The ACQUIRE memory ordering here pairs with the RELEASE
249 	 * ordering below as lock to make sure the frequency operations
250 	 * in the critical section are done under the correct state.
251 	 */
252 	if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
253 					POWER_ONGOING, 0,
254 					__ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
255 		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
256 				"in use\n", lcore_id);
257 		return -1;
258 	}
259 
260 	pi->lcore_id = lcore_id;
261 	/* Check and set the governor */
262 	if (power_set_governor_userspace(pi) < 0) {
263 		RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
264 				"userspace\n", lcore_id);
265 		goto fail;
266 	}
267 
268 	/* Get the available frequencies */
269 	if (power_get_available_freqs(pi) < 0) {
270 		RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
271 				"lcore %u\n", lcore_id);
272 		goto fail;
273 	}
274 
275 	/* Init for setting lcore frequency */
276 	if (power_init_for_setting_freq(pi) < 0) {
277 		RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
278 				"lcore %u\n", lcore_id);
279 		goto fail;
280 	}
281 
282 	/* Set freq to max by default */
283 	if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
284 		RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
285 				"to max\n", lcore_id);
286 		goto fail;
287 	}
288 
289 	RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
290 			"power management\n", lcore_id);
291 	exp_state = POWER_ONGOING;
292 	__atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_USED,
293 				    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
294 
295 	return 0;
296 
297 fail:
298 	exp_state = POWER_ONGOING;
299 	__atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
300 				    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
301 
302 	return -1;
303 }
304 
305 int
power_acpi_cpufreq_exit(unsigned int lcore_id)306 power_acpi_cpufreq_exit(unsigned int lcore_id)
307 {
308 	struct acpi_power_info *pi;
309 	uint32_t exp_state;
310 
311 	if (lcore_id >= RTE_MAX_LCORE) {
312 		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
313 				lcore_id, RTE_MAX_LCORE - 1U);
314 		return -1;
315 	}
316 	pi = &lcore_power_info[lcore_id];
317 	exp_state = POWER_USED;
318 	/* The power in use state works as a guard variable between
319 	 * the CPU frequency control initialization and exit process.
320 	 * The ACQUIRE memory ordering here pairs with the RELEASE
321 	 * ordering below as lock to make sure the frequency operations
322 	 * in the critical section are done under the correct state.
323 	 */
324 	if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
325 					POWER_ONGOING, 0,
326 					__ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
327 		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
328 				"not used\n", lcore_id);
329 		return -1;
330 	}
331 
332 	/* Close FD of setting freq */
333 	fclose(pi->f);
334 	pi->f = NULL;
335 
336 	/* Set the governor back to the original */
337 	if (power_set_governor_original(pi) < 0) {
338 		RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
339 				"to the original\n", lcore_id);
340 		goto fail;
341 	}
342 
343 	RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
344 			"'userspace' mode and been set back to the "
345 			"original\n", lcore_id);
346 	exp_state = POWER_ONGOING;
347 	__atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_IDLE,
348 				    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
349 
350 	return 0;
351 
352 fail:
353 	exp_state = POWER_ONGOING;
354 	__atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
355 				    0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
356 
357 	return -1;
358 }
359 
360 uint32_t
power_acpi_cpufreq_freqs(unsigned int lcore_id,uint32_t * freqs,uint32_t num)361 power_acpi_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
362 {
363 	struct acpi_power_info *pi;
364 
365 	if (lcore_id >= RTE_MAX_LCORE) {
366 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
367 		return 0;
368 	}
369 
370 	if (freqs == NULL) {
371 		RTE_LOG(ERR, POWER, "NULL buffer supplied\n");
372 		return 0;
373 	}
374 
375 	pi = &lcore_power_info[lcore_id];
376 	if (num < pi->nb_freqs) {
377 		RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
378 		return 0;
379 	}
380 	rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
381 
382 	return pi->nb_freqs;
383 }
384 
385 uint32_t
power_acpi_cpufreq_get_freq(unsigned int lcore_id)386 power_acpi_cpufreq_get_freq(unsigned int lcore_id)
387 {
388 	if (lcore_id >= RTE_MAX_LCORE) {
389 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
390 		return RTE_POWER_INVALID_FREQ_INDEX;
391 	}
392 
393 	return lcore_power_info[lcore_id].curr_idx;
394 }
395 
396 int
power_acpi_cpufreq_set_freq(unsigned int lcore_id,uint32_t index)397 power_acpi_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
398 {
399 	if (lcore_id >= RTE_MAX_LCORE) {
400 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
401 		return -1;
402 	}
403 
404 	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
405 }
406 
407 int
power_acpi_cpufreq_freq_down(unsigned int lcore_id)408 power_acpi_cpufreq_freq_down(unsigned int lcore_id)
409 {
410 	struct acpi_power_info *pi;
411 
412 	if (lcore_id >= RTE_MAX_LCORE) {
413 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
414 		return -1;
415 	}
416 
417 	pi = &lcore_power_info[lcore_id];
418 	if (pi->curr_idx + 1 == pi->nb_freqs)
419 		return 0;
420 
421 	/* Frequencies in the array are from high to low. */
422 	return set_freq_internal(pi, pi->curr_idx + 1);
423 }
424 
425 int
power_acpi_cpufreq_freq_up(unsigned int lcore_id)426 power_acpi_cpufreq_freq_up(unsigned int lcore_id)
427 {
428 	struct acpi_power_info *pi;
429 
430 	if (lcore_id >= RTE_MAX_LCORE) {
431 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
432 		return -1;
433 	}
434 
435 	pi = &lcore_power_info[lcore_id];
436 	if (pi->curr_idx == 0 ||
437 	    (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
438 		return 0;
439 
440 	/* Frequencies in the array are from high to low. */
441 	return set_freq_internal(pi, pi->curr_idx - 1);
442 }
443 
444 int
power_acpi_cpufreq_freq_max(unsigned int lcore_id)445 power_acpi_cpufreq_freq_max(unsigned int lcore_id)
446 {
447 	if (lcore_id >= RTE_MAX_LCORE) {
448 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
449 		return -1;
450 	}
451 
452 	/* Frequencies in the array are from high to low. */
453 	if (lcore_power_info[lcore_id].turbo_available) {
454 		if (lcore_power_info[lcore_id].turbo_enable)
455 			/* Set to Turbo */
456 			return set_freq_internal(
457 					&lcore_power_info[lcore_id], 0);
458 		else
459 			/* Set to max non-turbo */
460 			return set_freq_internal(
461 					&lcore_power_info[lcore_id], 1);
462 	} else
463 		return set_freq_internal(&lcore_power_info[lcore_id], 0);
464 }
465 
466 int
power_acpi_cpufreq_freq_min(unsigned int lcore_id)467 power_acpi_cpufreq_freq_min(unsigned int lcore_id)
468 {
469 	struct acpi_power_info *pi;
470 
471 	if (lcore_id >= RTE_MAX_LCORE) {
472 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
473 		return -1;
474 	}
475 
476 	pi = &lcore_power_info[lcore_id];
477 
478 	/* Frequencies in the array are from high to low. */
479 	return set_freq_internal(pi, pi->nb_freqs - 1);
480 }
481 
482 
483 int
power_acpi_turbo_status(unsigned int lcore_id)484 power_acpi_turbo_status(unsigned int lcore_id)
485 {
486 	struct acpi_power_info *pi;
487 
488 	if (lcore_id >= RTE_MAX_LCORE) {
489 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
490 		return -1;
491 	}
492 
493 	pi = &lcore_power_info[lcore_id];
494 
495 	return pi->turbo_enable;
496 }
497 
498 
499 int
power_acpi_enable_turbo(unsigned int lcore_id)500 power_acpi_enable_turbo(unsigned int lcore_id)
501 {
502 	struct acpi_power_info *pi;
503 
504 	if (lcore_id >= RTE_MAX_LCORE) {
505 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
506 		return -1;
507 	}
508 
509 	pi = &lcore_power_info[lcore_id];
510 
511 	if (pi->turbo_available)
512 		pi->turbo_enable = 1;
513 	else {
514 		pi->turbo_enable = 0;
515 		RTE_LOG(ERR, POWER,
516 			"Failed to enable turbo on lcore %u\n",
517 			lcore_id);
518 			return -1;
519 	}
520 
521 	/* Max may have changed, so call to max function */
522 	if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
523 		RTE_LOG(ERR, POWER,
524 			"Failed to set frequency of lcore %u to max\n",
525 			lcore_id);
526 			return -1;
527 	}
528 
529 	return 0;
530 }
531 
532 int
power_acpi_disable_turbo(unsigned int lcore_id)533 power_acpi_disable_turbo(unsigned int lcore_id)
534 {
535 	struct acpi_power_info *pi;
536 
537 	if (lcore_id >= RTE_MAX_LCORE) {
538 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
539 		return -1;
540 	}
541 
542 	pi = &lcore_power_info[lcore_id];
543 
544 	 pi->turbo_enable = 0;
545 
546 	if ((pi->turbo_available) && (pi->curr_idx <= 1)) {
547 		/* Try to set freq to max by default coming out of turbo */
548 		if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
549 			RTE_LOG(ERR, POWER,
550 				"Failed to set frequency of lcore %u to max\n",
551 				lcore_id);
552 			return -1;
553 		}
554 	}
555 
556 	return 0;
557 }
558 
power_acpi_get_capabilities(unsigned int lcore_id,struct rte_power_core_capabilities * caps)559 int power_acpi_get_capabilities(unsigned int lcore_id,
560 		struct rte_power_core_capabilities *caps)
561 {
562 	struct acpi_power_info *pi;
563 
564 	if (lcore_id >= RTE_MAX_LCORE) {
565 		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
566 		return -1;
567 	}
568 	if (caps == NULL) {
569 		RTE_LOG(ERR, POWER, "Invalid argument\n");
570 		return -1;
571 	}
572 
573 	pi = &lcore_power_info[lcore_id];
574 	caps->capabilities = 0;
575 	caps->turbo = !!(pi->turbo_available);
576 
577 	return 0;
578 }
579