xref: /dpdk/lib/power/rte_power_empty_poll.c (revision 30a1de10)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4 
5 #include <string.h>
6 
7 #include <rte_lcore.h>
8 #include <rte_malloc.h>
9 #include <inttypes.h>
10 
11 #include "rte_power.h"
12 #include "rte_power_empty_poll.h"
13 
14 #define INTERVALS_PER_SECOND 100     /* (10ms) */
15 #define SECONDS_TO_TRAIN_FOR 2
16 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
17 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
18 #define DEFAULT_CYCLES_PER_PACKET 800
19 
20 static struct ep_params *ep_params;
21 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
22 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
23 
24 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
25 
26 static uint32_t total_avail_freqs[RTE_MAX_LCORE];
27 
28 static uint32_t freq_index[NUM_FREQ];
29 
30 static uint32_t
get_freq_index(enum freq_val index)31 get_freq_index(enum freq_val index)
32 {
33 	return freq_index[index];
34 }
35 
36 
37 static int
set_power_freq(int lcore_id,enum freq_val freq,bool specific_freq)38 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
39 {
40 	int err = 0;
41 	uint32_t power_freq_index;
42 	if (!specific_freq)
43 		power_freq_index = get_freq_index(freq);
44 	else
45 		power_freq_index = freq;
46 
47 	err = rte_power_set_freq(lcore_id, power_freq_index);
48 
49 	return err;
50 }
51 
52 
53 static __rte_always_inline void
exit_training_state(struct priority_worker * poll_stats)54 exit_training_state(struct priority_worker *poll_stats)
55 {
56 	RTE_SET_USED(poll_stats);
57 }
58 
59 static __rte_always_inline void
enter_training_state(struct priority_worker * poll_stats)60 enter_training_state(struct priority_worker *poll_stats)
61 {
62 	poll_stats->iter_counter = 0;
63 	poll_stats->cur_freq = LOW;
64 	poll_stats->queue_state = TRAINING;
65 }
66 
67 static __rte_always_inline void
enter_normal_state(struct priority_worker * poll_stats)68 enter_normal_state(struct priority_worker *poll_stats)
69 {
70 	/* Clear the averages arrays and strs */
71 	memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
72 	poll_stats->ec = 0;
73 
74 	poll_stats->cur_freq = MED;
75 	poll_stats->iter_counter = 0;
76 	poll_stats->threshold_ctr = 0;
77 	poll_stats->queue_state = MED_NORMAL;
78 	RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
79 	set_power_freq(poll_stats->lcore_id, MED, false);
80 
81 	poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
82 	poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
83 }
84 
85 static __rte_always_inline void
enter_busy_state(struct priority_worker * poll_stats)86 enter_busy_state(struct priority_worker *poll_stats)
87 {
88 	memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
89 	poll_stats->ec = 0;
90 
91 	poll_stats->cur_freq = HGH;
92 	poll_stats->iter_counter = 0;
93 	poll_stats->threshold_ctr = 0;
94 	poll_stats->queue_state = HGH_BUSY;
95 	set_power_freq(poll_stats->lcore_id, HGH, false);
96 }
97 
98 static __rte_always_inline void
enter_purge_state(struct priority_worker * poll_stats)99 enter_purge_state(struct priority_worker *poll_stats)
100 {
101 	poll_stats->iter_counter = 0;
102 	poll_stats->queue_state = LOW_PURGE;
103 }
104 
105 static __rte_always_inline void
set_state(struct priority_worker * poll_stats,enum queue_state new_state)106 set_state(struct priority_worker *poll_stats,
107 		enum queue_state new_state)
108 {
109 	enum queue_state old_state = poll_stats->queue_state;
110 	if (old_state != new_state) {
111 
112 		/* Call any old state exit functions */
113 		if (old_state == TRAINING)
114 			exit_training_state(poll_stats);
115 
116 		/* Call any new state entry functions */
117 		if (new_state == TRAINING)
118 			enter_training_state(poll_stats);
119 		if (new_state == MED_NORMAL)
120 			enter_normal_state(poll_stats);
121 		if (new_state == HGH_BUSY)
122 			enter_busy_state(poll_stats);
123 		if (new_state == LOW_PURGE)
124 			enter_purge_state(poll_stats);
125 	}
126 }
127 
128 static __rte_always_inline void
set_policy(struct priority_worker * poll_stats,struct ep_policy * policy)129 set_policy(struct priority_worker *poll_stats,
130 		struct ep_policy *policy)
131 {
132 	set_state(poll_stats, policy->state);
133 
134 	if (policy->state == TRAINING)
135 		return;
136 
137 	poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
138 	poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
139 
140 	poll_stats->thresh[MED_NORMAL].trained = true;
141 	poll_stats->thresh[HGH_BUSY].trained = true;
142 
143 }
144 
145 static void
update_training_stats(struct priority_worker * poll_stats,uint32_t freq,bool specific_freq,uint32_t max_train_iter)146 update_training_stats(struct priority_worker *poll_stats,
147 		uint32_t freq,
148 		bool specific_freq,
149 		uint32_t max_train_iter)
150 {
151 	RTE_SET_USED(specific_freq);
152 
153 	uint64_t p0_empty_deq;
154 
155 	if (poll_stats->cur_freq == freq &&
156 			poll_stats->thresh[freq].trained == false) {
157 		if (poll_stats->thresh[freq].cur_train_iter == 0) {
158 
159 			set_power_freq(poll_stats->lcore_id,
160 					freq, specific_freq);
161 
162 			poll_stats->empty_dequeues_prev =
163 				poll_stats->empty_dequeues;
164 
165 			poll_stats->thresh[freq].cur_train_iter++;
166 
167 			return;
168 		} else if (poll_stats->thresh[freq].cur_train_iter
169 				<= max_train_iter) {
170 
171 			p0_empty_deq = poll_stats->empty_dequeues -
172 				poll_stats->empty_dequeues_prev;
173 
174 			poll_stats->empty_dequeues_prev =
175 				poll_stats->empty_dequeues;
176 
177 			poll_stats->thresh[freq].base_edpi += p0_empty_deq;
178 			poll_stats->thresh[freq].cur_train_iter++;
179 
180 		} else {
181 			if (poll_stats->thresh[freq].trained == false) {
182 				poll_stats->thresh[freq].base_edpi =
183 					poll_stats->thresh[freq].base_edpi /
184 					max_train_iter;
185 
186 				/* Add on a factor of 0.05%
187 				 * this should remove any
188 				 * false negatives when the system is 0% busy
189 				 */
190 				poll_stats->thresh[freq].base_edpi +=
191 				poll_stats->thresh[freq].base_edpi / 2000;
192 
193 				poll_stats->thresh[freq].trained = true;
194 				poll_stats->cur_freq++;
195 
196 			}
197 		}
198 	}
199 }
200 
201 static __rte_always_inline uint32_t
update_stats(struct priority_worker * poll_stats)202 update_stats(struct priority_worker *poll_stats)
203 {
204 	uint64_t tot_edpi = 0;
205 	uint32_t j, percent;
206 
207 	struct priority_worker *s = poll_stats;
208 
209 	uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
210 
211 	s->empty_dequeues_prev = s->empty_dequeues;
212 
213 	if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
214 
215 		/* edpi mean empty poll counter difference per interval */
216 		RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
217 				"cur edpi %"PRId64" "
218 				"base edpi %"PRId64"\n",
219 				cur_edpi,
220 				s->thresh[s->cur_freq].base_edpi);
221 		/* Value to make us fail need debug log*/
222 		return 1000UL;
223 	}
224 
225 	s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
226 
227 	for (j = 0; j < BINS_AV; j++) {
228 		tot_edpi += s->edpi_av[j];
229 	}
230 
231 	tot_edpi = tot_edpi / BINS_AV;
232 
233 	percent = 100 - (uint32_t)(((float)tot_edpi /
234 			(float)s->thresh[s->cur_freq].base_edpi) * 100);
235 
236 	return (uint32_t)percent;
237 }
238 
239 
240 static __rte_always_inline void
update_stats_normal(struct priority_worker * poll_stats)241 update_stats_normal(struct priority_worker *poll_stats)
242 {
243 	uint32_t percent;
244 
245 	if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
246 
247 		enum freq_val cur_freq = poll_stats->cur_freq;
248 
249 		/* edpi mean empty poll counter difference per interval */
250 		RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
251 				cur_freq,
252 				poll_stats->thresh[cur_freq].base_edpi);
253 		return;
254 	}
255 
256 	percent = update_stats(poll_stats);
257 
258 	if (percent > 100) {
259 		/* edpi mean empty poll counter difference per interval */
260 		RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
261 		return;
262 	}
263 
264 	if (poll_stats->cur_freq == LOW)
265 		RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
266 	else if (poll_stats->cur_freq == MED) {
267 
268 		if (percent >
269 			poll_stats->thresh[MED].threshold_percent) {
270 
271 			if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
272 				poll_stats->threshold_ctr++;
273 			else {
274 				set_state(poll_stats, HGH_BUSY);
275 				RTE_LOG(INFO, POWER, "MOVE to HGH\n");
276 			}
277 
278 		} else {
279 			/* reset */
280 			poll_stats->threshold_ctr = 0;
281 		}
282 
283 	} else if (poll_stats->cur_freq == HGH) {
284 
285 		if (percent <
286 				poll_stats->thresh[HGH].threshold_percent) {
287 
288 			if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
289 				poll_stats->threshold_ctr++;
290 			else {
291 				set_state(poll_stats, MED_NORMAL);
292 				RTE_LOG(INFO, POWER, "MOVE to MED\n");
293 			}
294 		} else {
295 			/* reset */
296 			poll_stats->threshold_ctr = 0;
297 		}
298 
299 	}
300 }
301 
302 static int
empty_poll_training(struct priority_worker * poll_stats,uint32_t max_train_iter)303 empty_poll_training(struct priority_worker *poll_stats,
304 		uint32_t max_train_iter)
305 {
306 
307 	if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
308 		poll_stats->iter_counter++;
309 		return 0;
310 	}
311 
312 
313 	update_training_stats(poll_stats,
314 			LOW,
315 			false,
316 			max_train_iter);
317 
318 	update_training_stats(poll_stats,
319 			MED,
320 			false,
321 			max_train_iter);
322 
323 	update_training_stats(poll_stats,
324 			HGH,
325 			false,
326 			max_train_iter);
327 
328 
329 	if (poll_stats->thresh[LOW].trained == true
330 			&& poll_stats->thresh[MED].trained == true
331 			&& poll_stats->thresh[HGH].trained == true) {
332 
333 		set_state(poll_stats, MED_NORMAL);
334 
335 		RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
336 				poll_stats->thresh[LOW].base_edpi);
337 
338 		RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
339 				poll_stats->thresh[MED].base_edpi);
340 
341 
342 		RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
343 				poll_stats->thresh[HGH].base_edpi);
344 
345 		RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
346 				poll_stats->lcore_id);
347 	}
348 
349 	return 0;
350 }
351 
352 void
rte_empty_poll_detection(struct rte_timer * tim,void * arg)353 rte_empty_poll_detection(struct rte_timer *tim, void *arg)
354 {
355 
356 	uint32_t i;
357 
358 	struct priority_worker *poll_stats;
359 
360 	RTE_SET_USED(tim);
361 
362 	RTE_SET_USED(arg);
363 
364 	for (i = 0; i < NUM_NODES; i++) {
365 
366 		poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
367 
368 		if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
369 			continue;
370 
371 		switch (poll_stats->queue_state) {
372 		case(TRAINING):
373 			empty_poll_training(poll_stats,
374 					ep_params->max_train_iter);
375 			break;
376 
377 		case(HGH_BUSY):
378 		case(MED_NORMAL):
379 			update_stats_normal(poll_stats);
380 			break;
381 
382 		case(LOW_PURGE):
383 			break;
384 		default:
385 			break;
386 
387 		}
388 
389 	}
390 
391 }
392 
393 int
rte_power_empty_poll_stat_init(struct ep_params ** eptr,uint8_t * freq_tlb,struct ep_policy * policy)394 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
395 		struct ep_policy *policy)
396 {
397 	uint32_t i;
398 	/* Allocate the ep_params structure */
399 	ep_params = rte_zmalloc_socket(NULL,
400 			sizeof(struct ep_params),
401 			0,
402 			rte_socket_id());
403 
404 	if (!ep_params)
405 		return -1;
406 
407 	if (freq_tlb == NULL) {
408 		freq_index[LOW] = 14;
409 		freq_index[MED] = 9;
410 		freq_index[HGH] = 1;
411 	} else {
412 		freq_index[LOW] = freq_tlb[LOW];
413 		freq_index[MED] = freq_tlb[MED];
414 		freq_index[HGH] = freq_tlb[HGH];
415 	}
416 
417 	RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
418 
419 	/* Train for pre-defined period */
420 	ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
421 
422 	struct stats_data *w = &ep_params->wrk_data;
423 
424 	*eptr = ep_params;
425 
426 	/* initialize all wrk_stats state */
427 	for (i = 0; i < NUM_NODES; i++) {
428 
429 		if (rte_lcore_is_enabled(i) == 0)
430 			continue;
431 		/*init the freqs table */
432 		total_avail_freqs[i] = rte_power_freqs(i,
433 				avail_freqs[i],
434 				NUM_FREQS);
435 
436 		RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
437 				total_avail_freqs[i],
438 				i);
439 
440 		if (get_freq_index(LOW) > total_avail_freqs[i])
441 			return -1;
442 
443 		if (rte_get_main_lcore() != i) {
444 			w->wrk_stats[i].lcore_id = i;
445 			set_policy(&w->wrk_stats[i], policy);
446 		}
447 	}
448 
449 	return 0;
450 }
451 
452 void
rte_power_empty_poll_stat_free(void)453 rte_power_empty_poll_stat_free(void)
454 {
455 
456 	RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
457 
458 	rte_free(ep_params);
459 }
460 
461 int
rte_power_empty_poll_stat_update(unsigned int lcore_id)462 rte_power_empty_poll_stat_update(unsigned int lcore_id)
463 {
464 	struct priority_worker *poll_stats;
465 
466 	if (lcore_id >= NUM_NODES)
467 		return -1;
468 
469 	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
470 
471 	if (poll_stats->lcore_id == 0)
472 		poll_stats->lcore_id = lcore_id;
473 
474 	poll_stats->empty_dequeues++;
475 
476 	return 0;
477 }
478 
479 int
rte_power_poll_stat_update(unsigned int lcore_id,uint8_t nb_pkt)480 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
481 {
482 
483 	struct priority_worker *poll_stats;
484 
485 	if (lcore_id >= NUM_NODES)
486 		return -1;
487 
488 	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
489 
490 	if (poll_stats->lcore_id == 0)
491 		poll_stats->lcore_id = lcore_id;
492 
493 	poll_stats->num_dequeue_pkts += nb_pkt;
494 
495 	return 0;
496 }
497 
498 
499 uint64_t
rte_power_empty_poll_stat_fetch(unsigned int lcore_id)500 rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
501 {
502 	struct priority_worker *poll_stats;
503 
504 	if (lcore_id >= NUM_NODES)
505 		return -1;
506 
507 	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
508 
509 	if (poll_stats->lcore_id == 0)
510 		poll_stats->lcore_id = lcore_id;
511 
512 	return poll_stats->empty_dequeues;
513 }
514 
515 uint64_t
rte_power_poll_stat_fetch(unsigned int lcore_id)516 rte_power_poll_stat_fetch(unsigned int lcore_id)
517 {
518 	struct priority_worker *poll_stats;
519 
520 	if (lcore_id >= NUM_NODES)
521 		return -1;
522 
523 	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
524 
525 	if (poll_stats->lcore_id == 0)
526 		poll_stats->lcore_id = lcore_id;
527 
528 	return poll_stats->num_dequeue_pkts;
529 }
530