1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4 
5 #include <string.h>
6 
7 #include <rte_lcore.h>
8 #include <rte_cycles.h>
9 #include <rte_atomic.h>
10 #include <rte_malloc.h>
11 #include <inttypes.h>
12 
13 #include "rte_power.h"
14 #include "rte_power_empty_poll.h"
15 
16 #define INTERVALS_PER_SECOND 100     /* (10ms) */
17 #define SECONDS_TO_TRAIN_FOR 2
18 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
19 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
20 #define DEFAULT_CYCLES_PER_PACKET 800
21 
22 static struct ep_params *ep_params;
23 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
24 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
25 
26 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
27 
28 static uint32_t total_avail_freqs[RTE_MAX_LCORE];
29 
30 static uint32_t freq_index[NUM_FREQ];
31 
32 static uint32_t
get_freq_index(enum freq_val index)33 get_freq_index(enum freq_val index)
34 {
35 	return freq_index[index];
36 }
37 
38 
39 static int
set_power_freq(int lcore_id,enum freq_val freq,bool specific_freq)40 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
41 {
42 	int err = 0;
43 	uint32_t power_freq_index;
44 	if (!specific_freq)
45 		power_freq_index = get_freq_index(freq);
46 	else
47 		power_freq_index = freq;
48 
49 	err = rte_power_set_freq(lcore_id, power_freq_index);
50 
51 	return err;
52 }
53 
54 
55 static __rte_always_inline void
exit_training_state(struct priority_worker * poll_stats)56 exit_training_state(struct priority_worker *poll_stats)
57 {
58 	RTE_SET_USED(poll_stats);
59 }
60 
61 static __rte_always_inline void
enter_training_state(struct priority_worker * poll_stats)62 enter_training_state(struct priority_worker *poll_stats)
63 {
64 	poll_stats->iter_counter = 0;
65 	poll_stats->cur_freq = LOW;
66 	poll_stats->queue_state = TRAINING;
67 }
68 
69 static __rte_always_inline void
enter_normal_state(struct priority_worker * poll_stats)70 enter_normal_state(struct priority_worker *poll_stats)
71 {
72 	/* Clear the averages arrays and strs */
73 	memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
74 	poll_stats->ec = 0;
75 	memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
76 	poll_stats->pc = 0;
77 
78 	poll_stats->cur_freq = MED;
79 	poll_stats->iter_counter = 0;
80 	poll_stats->threshold_ctr = 0;
81 	poll_stats->queue_state = MED_NORMAL;
82 	RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
83 	set_power_freq(poll_stats->lcore_id, MED, false);
84 
85 	poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
86 	poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
87 }
88 
89 static __rte_always_inline void
enter_busy_state(struct priority_worker * poll_stats)90 enter_busy_state(struct priority_worker *poll_stats)
91 {
92 	memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
93 	poll_stats->ec = 0;
94 	memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
95 	poll_stats->pc = 0;
96 
97 	poll_stats->cur_freq = HGH;
98 	poll_stats->iter_counter = 0;
99 	poll_stats->threshold_ctr = 0;
100 	poll_stats->queue_state = HGH_BUSY;
101 	set_power_freq(poll_stats->lcore_id, HGH, false);
102 }
103 
104 static __rte_always_inline void
enter_purge_state(struct priority_worker * poll_stats)105 enter_purge_state(struct priority_worker *poll_stats)
106 {
107 	poll_stats->iter_counter = 0;
108 	poll_stats->queue_state = LOW_PURGE;
109 }
110 
111 static __rte_always_inline void
set_state(struct priority_worker * poll_stats,enum queue_state new_state)112 set_state(struct priority_worker *poll_stats,
113 		enum queue_state new_state)
114 {
115 	enum queue_state old_state = poll_stats->queue_state;
116 	if (old_state != new_state) {
117 
118 		/* Call any old state exit functions */
119 		if (old_state == TRAINING)
120 			exit_training_state(poll_stats);
121 
122 		/* Call any new state entry functions */
123 		if (new_state == TRAINING)
124 			enter_training_state(poll_stats);
125 		if (new_state == MED_NORMAL)
126 			enter_normal_state(poll_stats);
127 		if (new_state == HGH_BUSY)
128 			enter_busy_state(poll_stats);
129 		if (new_state == LOW_PURGE)
130 			enter_purge_state(poll_stats);
131 	}
132 }
133 
134 static __rte_always_inline void
set_policy(struct priority_worker * poll_stats,struct ep_policy * policy)135 set_policy(struct priority_worker *poll_stats,
136 		struct ep_policy *policy)
137 {
138 	set_state(poll_stats, policy->state);
139 
140 	if (policy->state == TRAINING)
141 		return;
142 
143 	poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
144 	poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
145 
146 	poll_stats->thresh[MED_NORMAL].trained = true;
147 	poll_stats->thresh[HGH_BUSY].trained = true;
148 
149 }
150 
151 static void
update_training_stats(struct priority_worker * poll_stats,uint32_t freq,bool specific_freq,uint32_t max_train_iter)152 update_training_stats(struct priority_worker *poll_stats,
153 		uint32_t freq,
154 		bool specific_freq,
155 		uint32_t max_train_iter)
156 {
157 	RTE_SET_USED(specific_freq);
158 
159 	uint64_t p0_empty_deq;
160 
161 	if (poll_stats->cur_freq == freq &&
162 			poll_stats->thresh[freq].trained == false) {
163 		if (poll_stats->thresh[freq].cur_train_iter == 0) {
164 
165 			set_power_freq(poll_stats->lcore_id,
166 					freq, specific_freq);
167 
168 			poll_stats->empty_dequeues_prev =
169 				poll_stats->empty_dequeues;
170 
171 			poll_stats->thresh[freq].cur_train_iter++;
172 
173 			return;
174 		} else if (poll_stats->thresh[freq].cur_train_iter
175 				<= max_train_iter) {
176 
177 			p0_empty_deq = poll_stats->empty_dequeues -
178 				poll_stats->empty_dequeues_prev;
179 
180 			poll_stats->empty_dequeues_prev =
181 				poll_stats->empty_dequeues;
182 
183 			poll_stats->thresh[freq].base_edpi += p0_empty_deq;
184 			poll_stats->thresh[freq].cur_train_iter++;
185 
186 		} else {
187 			if (poll_stats->thresh[freq].trained == false) {
188 				poll_stats->thresh[freq].base_edpi =
189 					poll_stats->thresh[freq].base_edpi /
190 					max_train_iter;
191 
192 				/* Add on a factor of 0.05%
193 				 * this should remove any
194 				 * false negatives when the system is 0% busy
195 				 */
196 				poll_stats->thresh[freq].base_edpi +=
197 				poll_stats->thresh[freq].base_edpi / 2000;
198 
199 				poll_stats->thresh[freq].trained = true;
200 				poll_stats->cur_freq++;
201 
202 			}
203 		}
204 	}
205 }
206 
207 static __rte_always_inline uint32_t
update_stats(struct priority_worker * poll_stats)208 update_stats(struct priority_worker *poll_stats)
209 {
210 	uint64_t tot_edpi = 0, tot_ppi = 0;
211 	uint32_t j, percent;
212 
213 	struct priority_worker *s = poll_stats;
214 
215 	uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
216 
217 	s->empty_dequeues_prev = s->empty_dequeues;
218 
219 	uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
220 
221 	s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
222 
223 	if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
224 
225 		/* edpi mean empty poll counter difference per interval */
226 		RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
227 				"cur edpi %"PRId64" "
228 				"base edpi %"PRId64"\n",
229 				cur_edpi,
230 				s->thresh[s->cur_freq].base_edpi);
231 		/* Value to make us fail need debug log*/
232 		return 1000UL;
233 	}
234 
235 	s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
236 	s->ppi_av[s->pc++ % BINS_AV] = ppi;
237 
238 	for (j = 0; j < BINS_AV; j++) {
239 		tot_edpi += s->edpi_av[j];
240 		tot_ppi += s->ppi_av[j];
241 	}
242 
243 	tot_edpi = tot_edpi / BINS_AV;
244 
245 	percent = 100 - (uint32_t)(((float)tot_edpi /
246 			(float)s->thresh[s->cur_freq].base_edpi) * 100);
247 
248 	return (uint32_t)percent;
249 }
250 
251 
252 static __rte_always_inline void
update_stats_normal(struct priority_worker * poll_stats)253 update_stats_normal(struct priority_worker *poll_stats)
254 {
255 	uint32_t percent;
256 
257 	if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
258 
259 		enum freq_val cur_freq = poll_stats->cur_freq;
260 
261 		/* edpi mean empty poll counter difference per interval */
262 		RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
263 				cur_freq,
264 				poll_stats->thresh[cur_freq].base_edpi);
265 		return;
266 	}
267 
268 	percent = update_stats(poll_stats);
269 
270 	if (percent > 100) {
271 		/* edpi mean empty poll counter difference per interval */
272 		RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
273 		return;
274 	}
275 
276 	if (poll_stats->cur_freq == LOW)
277 		RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
278 	else if (poll_stats->cur_freq == MED) {
279 
280 		if (percent >
281 			poll_stats->thresh[MED].threshold_percent) {
282 
283 			if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
284 				poll_stats->threshold_ctr++;
285 			else {
286 				set_state(poll_stats, HGH_BUSY);
287 				RTE_LOG(INFO, POWER, "MOVE to HGH\n");
288 			}
289 
290 		} else {
291 			/* reset */
292 			poll_stats->threshold_ctr = 0;
293 		}
294 
295 	} else if (poll_stats->cur_freq == HGH) {
296 
297 		if (percent <
298 				poll_stats->thresh[HGH].threshold_percent) {
299 
300 			if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
301 				poll_stats->threshold_ctr++;
302 			else {
303 				set_state(poll_stats, MED_NORMAL);
304 				RTE_LOG(INFO, POWER, "MOVE to MED\n");
305 			}
306 		} else {
307 			/* reset */
308 			poll_stats->threshold_ctr = 0;
309 		}
310 
311 	}
312 }
313 
314 static int
empty_poll_training(struct priority_worker * poll_stats,uint32_t max_train_iter)315 empty_poll_training(struct priority_worker *poll_stats,
316 		uint32_t max_train_iter)
317 {
318 
319 	if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
320 		poll_stats->iter_counter++;
321 		return 0;
322 	}
323 
324 
325 	update_training_stats(poll_stats,
326 			LOW,
327 			false,
328 			max_train_iter);
329 
330 	update_training_stats(poll_stats,
331 			MED,
332 			false,
333 			max_train_iter);
334 
335 	update_training_stats(poll_stats,
336 			HGH,
337 			false,
338 			max_train_iter);
339 
340 
341 	if (poll_stats->thresh[LOW].trained == true
342 			&& poll_stats->thresh[MED].trained == true
343 			&& poll_stats->thresh[HGH].trained == true) {
344 
345 		set_state(poll_stats, MED_NORMAL);
346 
347 		RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
348 				poll_stats->thresh[LOW].base_edpi);
349 
350 		RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
351 				poll_stats->thresh[MED].base_edpi);
352 
353 
354 		RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
355 				poll_stats->thresh[HGH].base_edpi);
356 
357 		RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
358 				poll_stats->lcore_id);
359 	}
360 
361 	return 0;
362 }
363 
364 void
rte_empty_poll_detection(struct rte_timer * tim,void * arg)365 rte_empty_poll_detection(struct rte_timer *tim, void *arg)
366 {
367 
368 	uint32_t i;
369 
370 	struct priority_worker *poll_stats;
371 
372 	RTE_SET_USED(tim);
373 
374 	RTE_SET_USED(arg);
375 
376 	for (i = 0; i < NUM_NODES; i++) {
377 
378 		poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
379 
380 		if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
381 			continue;
382 
383 		switch (poll_stats->queue_state) {
384 		case(TRAINING):
385 			empty_poll_training(poll_stats,
386 					ep_params->max_train_iter);
387 			break;
388 
389 		case(HGH_BUSY):
390 		case(MED_NORMAL):
391 			update_stats_normal(poll_stats);
392 			break;
393 
394 		case(LOW_PURGE):
395 			break;
396 		default:
397 			break;
398 
399 		}
400 
401 	}
402 
403 }
404 
405 int
rte_power_empty_poll_stat_init(struct ep_params ** eptr,uint8_t * freq_tlb,struct ep_policy * policy)406 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
407 		struct ep_policy *policy)
408 {
409 	uint32_t i;
410 	/* Allocate the ep_params structure */
411 	ep_params = rte_zmalloc_socket(NULL,
412 			sizeof(struct ep_params),
413 			0,
414 			rte_socket_id());
415 
416 	if (!ep_params)
417 		return -1;
418 
419 	if (freq_tlb == NULL) {
420 		freq_index[LOW] = 14;
421 		freq_index[MED] = 9;
422 		freq_index[HGH] = 1;
423 	} else {
424 		freq_index[LOW] = freq_tlb[LOW];
425 		freq_index[MED] = freq_tlb[MED];
426 		freq_index[HGH] = freq_tlb[HGH];
427 	}
428 
429 	RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
430 
431 	/* Train for pre-defined period */
432 	ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
433 
434 	struct stats_data *w = &ep_params->wrk_data;
435 
436 	*eptr = ep_params;
437 
438 	/* initialize all wrk_stats state */
439 	for (i = 0; i < NUM_NODES; i++) {
440 
441 		if (rte_lcore_is_enabled(i) == 0)
442 			continue;
443 		/*init the freqs table */
444 		total_avail_freqs[i] = rte_power_freqs(i,
445 				avail_freqs[i],
446 				NUM_FREQS);
447 
448 		RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
449 				total_avail_freqs[i],
450 				i);
451 
452 		if (get_freq_index(LOW) > total_avail_freqs[i])
453 			return -1;
454 
455 		if (rte_get_main_lcore() != i) {
456 			w->wrk_stats[i].lcore_id = i;
457 			set_policy(&w->wrk_stats[i], policy);
458 		}
459 	}
460 
461 	return 0;
462 }
463 
464 void
rte_power_empty_poll_stat_free(void)465 rte_power_empty_poll_stat_free(void)
466 {
467 
468 	RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
469 
470 	if (ep_params != NULL)
471 		rte_free(ep_params);
472 }
473 
474 int
rte_power_empty_poll_stat_update(unsigned int lcore_id)475 rte_power_empty_poll_stat_update(unsigned int lcore_id)
476 {
477 	struct priority_worker *poll_stats;
478 
479 	if (lcore_id >= NUM_NODES)
480 		return -1;
481 
482 	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
483 
484 	if (poll_stats->lcore_id == 0)
485 		poll_stats->lcore_id = lcore_id;
486 
487 	poll_stats->empty_dequeues++;
488 
489 	return 0;
490 }
491 
492 int
rte_power_poll_stat_update(unsigned int lcore_id,uint8_t nb_pkt)493 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
494 {
495 
496 	struct priority_worker *poll_stats;
497 
498 	if (lcore_id >= NUM_NODES)
499 		return -1;
500 
501 	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
502 
503 	if (poll_stats->lcore_id == 0)
504 		poll_stats->lcore_id = lcore_id;
505 
506 	poll_stats->num_dequeue_pkts += nb_pkt;
507 
508 	return 0;
509 }
510 
511 
512 uint64_t
rte_power_empty_poll_stat_fetch(unsigned int lcore_id)513 rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
514 {
515 	struct priority_worker *poll_stats;
516 
517 	if (lcore_id >= NUM_NODES)
518 		return -1;
519 
520 	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
521 
522 	if (poll_stats->lcore_id == 0)
523 		poll_stats->lcore_id = lcore_id;
524 
525 	return poll_stats->empty_dequeues;
526 }
527 
528 uint64_t
rte_power_poll_stat_fetch(unsigned int lcore_id)529 rte_power_poll_stat_fetch(unsigned int lcore_id)
530 {
531 	struct priority_worker *poll_stats;
532 
533 	if (lcore_id >= NUM_NODES)
534 		return -1;
535 
536 	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
537 
538 	if (poll_stats->lcore_id == 0)
539 		poll_stats->lcore_id = lcore_id;
540 
541 	return poll_stats->num_dequeue_pkts;
542 }
543