1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2018 Intel Corporation
3 */
4
5 #include <string.h>
6
7 #include <rte_lcore.h>
8 #include <rte_malloc.h>
9 #include <inttypes.h>
10
11 #include "rte_power.h"
12 #include "rte_power_empty_poll.h"
13
14 #define INTERVALS_PER_SECOND 100 /* (10ms) */
15 #define SECONDS_TO_TRAIN_FOR 2
16 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
17 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
18 #define DEFAULT_CYCLES_PER_PACKET 800
19
20 static struct ep_params *ep_params;
21 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
22 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
23
24 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
25
26 static uint32_t total_avail_freqs[RTE_MAX_LCORE];
27
28 static uint32_t freq_index[NUM_FREQ];
29
30 static uint32_t
get_freq_index(enum freq_val index)31 get_freq_index(enum freq_val index)
32 {
33 return freq_index[index];
34 }
35
36
37 static int
set_power_freq(int lcore_id,enum freq_val freq,bool specific_freq)38 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
39 {
40 int err = 0;
41 uint32_t power_freq_index;
42 if (!specific_freq)
43 power_freq_index = get_freq_index(freq);
44 else
45 power_freq_index = freq;
46
47 err = rte_power_set_freq(lcore_id, power_freq_index);
48
49 return err;
50 }
51
52
53 static __rte_always_inline void
exit_training_state(struct priority_worker * poll_stats)54 exit_training_state(struct priority_worker *poll_stats)
55 {
56 RTE_SET_USED(poll_stats);
57 }
58
59 static __rte_always_inline void
enter_training_state(struct priority_worker * poll_stats)60 enter_training_state(struct priority_worker *poll_stats)
61 {
62 poll_stats->iter_counter = 0;
63 poll_stats->cur_freq = LOW;
64 poll_stats->queue_state = TRAINING;
65 }
66
67 static __rte_always_inline void
enter_normal_state(struct priority_worker * poll_stats)68 enter_normal_state(struct priority_worker *poll_stats)
69 {
70 /* Clear the averages arrays and strs */
71 memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
72 poll_stats->ec = 0;
73
74 poll_stats->cur_freq = MED;
75 poll_stats->iter_counter = 0;
76 poll_stats->threshold_ctr = 0;
77 poll_stats->queue_state = MED_NORMAL;
78 RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
79 set_power_freq(poll_stats->lcore_id, MED, false);
80
81 poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
82 poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
83 }
84
85 static __rte_always_inline void
enter_busy_state(struct priority_worker * poll_stats)86 enter_busy_state(struct priority_worker *poll_stats)
87 {
88 memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
89 poll_stats->ec = 0;
90
91 poll_stats->cur_freq = HGH;
92 poll_stats->iter_counter = 0;
93 poll_stats->threshold_ctr = 0;
94 poll_stats->queue_state = HGH_BUSY;
95 set_power_freq(poll_stats->lcore_id, HGH, false);
96 }
97
98 static __rte_always_inline void
enter_purge_state(struct priority_worker * poll_stats)99 enter_purge_state(struct priority_worker *poll_stats)
100 {
101 poll_stats->iter_counter = 0;
102 poll_stats->queue_state = LOW_PURGE;
103 }
104
105 static __rte_always_inline void
set_state(struct priority_worker * poll_stats,enum queue_state new_state)106 set_state(struct priority_worker *poll_stats,
107 enum queue_state new_state)
108 {
109 enum queue_state old_state = poll_stats->queue_state;
110 if (old_state != new_state) {
111
112 /* Call any old state exit functions */
113 if (old_state == TRAINING)
114 exit_training_state(poll_stats);
115
116 /* Call any new state entry functions */
117 if (new_state == TRAINING)
118 enter_training_state(poll_stats);
119 if (new_state == MED_NORMAL)
120 enter_normal_state(poll_stats);
121 if (new_state == HGH_BUSY)
122 enter_busy_state(poll_stats);
123 if (new_state == LOW_PURGE)
124 enter_purge_state(poll_stats);
125 }
126 }
127
128 static __rte_always_inline void
set_policy(struct priority_worker * poll_stats,struct ep_policy * policy)129 set_policy(struct priority_worker *poll_stats,
130 struct ep_policy *policy)
131 {
132 set_state(poll_stats, policy->state);
133
134 if (policy->state == TRAINING)
135 return;
136
137 poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
138 poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
139
140 poll_stats->thresh[MED_NORMAL].trained = true;
141 poll_stats->thresh[HGH_BUSY].trained = true;
142
143 }
144
145 static void
update_training_stats(struct priority_worker * poll_stats,uint32_t freq,bool specific_freq,uint32_t max_train_iter)146 update_training_stats(struct priority_worker *poll_stats,
147 uint32_t freq,
148 bool specific_freq,
149 uint32_t max_train_iter)
150 {
151 RTE_SET_USED(specific_freq);
152
153 uint64_t p0_empty_deq;
154
155 if (poll_stats->cur_freq == freq &&
156 poll_stats->thresh[freq].trained == false) {
157 if (poll_stats->thresh[freq].cur_train_iter == 0) {
158
159 set_power_freq(poll_stats->lcore_id,
160 freq, specific_freq);
161
162 poll_stats->empty_dequeues_prev =
163 poll_stats->empty_dequeues;
164
165 poll_stats->thresh[freq].cur_train_iter++;
166
167 return;
168 } else if (poll_stats->thresh[freq].cur_train_iter
169 <= max_train_iter) {
170
171 p0_empty_deq = poll_stats->empty_dequeues -
172 poll_stats->empty_dequeues_prev;
173
174 poll_stats->empty_dequeues_prev =
175 poll_stats->empty_dequeues;
176
177 poll_stats->thresh[freq].base_edpi += p0_empty_deq;
178 poll_stats->thresh[freq].cur_train_iter++;
179
180 } else {
181 if (poll_stats->thresh[freq].trained == false) {
182 poll_stats->thresh[freq].base_edpi =
183 poll_stats->thresh[freq].base_edpi /
184 max_train_iter;
185
186 /* Add on a factor of 0.05%
187 * this should remove any
188 * false negatives when the system is 0% busy
189 */
190 poll_stats->thresh[freq].base_edpi +=
191 poll_stats->thresh[freq].base_edpi / 2000;
192
193 poll_stats->thresh[freq].trained = true;
194 poll_stats->cur_freq++;
195
196 }
197 }
198 }
199 }
200
201 static __rte_always_inline uint32_t
update_stats(struct priority_worker * poll_stats)202 update_stats(struct priority_worker *poll_stats)
203 {
204 uint64_t tot_edpi = 0;
205 uint32_t j, percent;
206
207 struct priority_worker *s = poll_stats;
208
209 uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
210
211 s->empty_dequeues_prev = s->empty_dequeues;
212
213 if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
214
215 /* edpi mean empty poll counter difference per interval */
216 RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
217 "cur edpi %"PRId64" "
218 "base edpi %"PRId64"\n",
219 cur_edpi,
220 s->thresh[s->cur_freq].base_edpi);
221 /* Value to make us fail need debug log*/
222 return 1000UL;
223 }
224
225 s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
226
227 for (j = 0; j < BINS_AV; j++) {
228 tot_edpi += s->edpi_av[j];
229 }
230
231 tot_edpi = tot_edpi / BINS_AV;
232
233 percent = 100 - (uint32_t)(((float)tot_edpi /
234 (float)s->thresh[s->cur_freq].base_edpi) * 100);
235
236 return (uint32_t)percent;
237 }
238
239
240 static __rte_always_inline void
update_stats_normal(struct priority_worker * poll_stats)241 update_stats_normal(struct priority_worker *poll_stats)
242 {
243 uint32_t percent;
244
245 if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
246
247 enum freq_val cur_freq = poll_stats->cur_freq;
248
249 /* edpi mean empty poll counter difference per interval */
250 RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
251 cur_freq,
252 poll_stats->thresh[cur_freq].base_edpi);
253 return;
254 }
255
256 percent = update_stats(poll_stats);
257
258 if (percent > 100) {
259 /* edpi mean empty poll counter difference per interval */
260 RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
261 return;
262 }
263
264 if (poll_stats->cur_freq == LOW)
265 RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
266 else if (poll_stats->cur_freq == MED) {
267
268 if (percent >
269 poll_stats->thresh[MED].threshold_percent) {
270
271 if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
272 poll_stats->threshold_ctr++;
273 else {
274 set_state(poll_stats, HGH_BUSY);
275 RTE_LOG(INFO, POWER, "MOVE to HGH\n");
276 }
277
278 } else {
279 /* reset */
280 poll_stats->threshold_ctr = 0;
281 }
282
283 } else if (poll_stats->cur_freq == HGH) {
284
285 if (percent <
286 poll_stats->thresh[HGH].threshold_percent) {
287
288 if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
289 poll_stats->threshold_ctr++;
290 else {
291 set_state(poll_stats, MED_NORMAL);
292 RTE_LOG(INFO, POWER, "MOVE to MED\n");
293 }
294 } else {
295 /* reset */
296 poll_stats->threshold_ctr = 0;
297 }
298
299 }
300 }
301
302 static int
empty_poll_training(struct priority_worker * poll_stats,uint32_t max_train_iter)303 empty_poll_training(struct priority_worker *poll_stats,
304 uint32_t max_train_iter)
305 {
306
307 if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
308 poll_stats->iter_counter++;
309 return 0;
310 }
311
312
313 update_training_stats(poll_stats,
314 LOW,
315 false,
316 max_train_iter);
317
318 update_training_stats(poll_stats,
319 MED,
320 false,
321 max_train_iter);
322
323 update_training_stats(poll_stats,
324 HGH,
325 false,
326 max_train_iter);
327
328
329 if (poll_stats->thresh[LOW].trained == true
330 && poll_stats->thresh[MED].trained == true
331 && poll_stats->thresh[HGH].trained == true) {
332
333 set_state(poll_stats, MED_NORMAL);
334
335 RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
336 poll_stats->thresh[LOW].base_edpi);
337
338 RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
339 poll_stats->thresh[MED].base_edpi);
340
341
342 RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
343 poll_stats->thresh[HGH].base_edpi);
344
345 RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
346 poll_stats->lcore_id);
347 }
348
349 return 0;
350 }
351
352 void
rte_empty_poll_detection(struct rte_timer * tim,void * arg)353 rte_empty_poll_detection(struct rte_timer *tim, void *arg)
354 {
355
356 uint32_t i;
357
358 struct priority_worker *poll_stats;
359
360 RTE_SET_USED(tim);
361
362 RTE_SET_USED(arg);
363
364 for (i = 0; i < NUM_NODES; i++) {
365
366 poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
367
368 if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
369 continue;
370
371 switch (poll_stats->queue_state) {
372 case(TRAINING):
373 empty_poll_training(poll_stats,
374 ep_params->max_train_iter);
375 break;
376
377 case(HGH_BUSY):
378 case(MED_NORMAL):
379 update_stats_normal(poll_stats);
380 break;
381
382 case(LOW_PURGE):
383 break;
384 default:
385 break;
386
387 }
388
389 }
390
391 }
392
393 int
rte_power_empty_poll_stat_init(struct ep_params ** eptr,uint8_t * freq_tlb,struct ep_policy * policy)394 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
395 struct ep_policy *policy)
396 {
397 uint32_t i;
398 /* Allocate the ep_params structure */
399 ep_params = rte_zmalloc_socket(NULL,
400 sizeof(struct ep_params),
401 0,
402 rte_socket_id());
403
404 if (!ep_params)
405 return -1;
406
407 if (freq_tlb == NULL) {
408 freq_index[LOW] = 14;
409 freq_index[MED] = 9;
410 freq_index[HGH] = 1;
411 } else {
412 freq_index[LOW] = freq_tlb[LOW];
413 freq_index[MED] = freq_tlb[MED];
414 freq_index[HGH] = freq_tlb[HGH];
415 }
416
417 RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
418
419 /* Train for pre-defined period */
420 ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
421
422 struct stats_data *w = &ep_params->wrk_data;
423
424 *eptr = ep_params;
425
426 /* initialize all wrk_stats state */
427 for (i = 0; i < NUM_NODES; i++) {
428
429 if (rte_lcore_is_enabled(i) == 0)
430 continue;
431 /*init the freqs table */
432 total_avail_freqs[i] = rte_power_freqs(i,
433 avail_freqs[i],
434 NUM_FREQS);
435
436 RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
437 total_avail_freqs[i],
438 i);
439
440 if (get_freq_index(LOW) > total_avail_freqs[i])
441 return -1;
442
443 if (rte_get_main_lcore() != i) {
444 w->wrk_stats[i].lcore_id = i;
445 set_policy(&w->wrk_stats[i], policy);
446 }
447 }
448
449 return 0;
450 }
451
452 void
rte_power_empty_poll_stat_free(void)453 rte_power_empty_poll_stat_free(void)
454 {
455
456 RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
457
458 rte_free(ep_params);
459 }
460
461 int
rte_power_empty_poll_stat_update(unsigned int lcore_id)462 rte_power_empty_poll_stat_update(unsigned int lcore_id)
463 {
464 struct priority_worker *poll_stats;
465
466 if (lcore_id >= NUM_NODES)
467 return -1;
468
469 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
470
471 if (poll_stats->lcore_id == 0)
472 poll_stats->lcore_id = lcore_id;
473
474 poll_stats->empty_dequeues++;
475
476 return 0;
477 }
478
479 int
rte_power_poll_stat_update(unsigned int lcore_id,uint8_t nb_pkt)480 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
481 {
482
483 struct priority_worker *poll_stats;
484
485 if (lcore_id >= NUM_NODES)
486 return -1;
487
488 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
489
490 if (poll_stats->lcore_id == 0)
491 poll_stats->lcore_id = lcore_id;
492
493 poll_stats->num_dequeue_pkts += nb_pkt;
494
495 return 0;
496 }
497
498
499 uint64_t
rte_power_empty_poll_stat_fetch(unsigned int lcore_id)500 rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
501 {
502 struct priority_worker *poll_stats;
503
504 if (lcore_id >= NUM_NODES)
505 return -1;
506
507 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
508
509 if (poll_stats->lcore_id == 0)
510 poll_stats->lcore_id = lcore_id;
511
512 return poll_stats->empty_dequeues;
513 }
514
515 uint64_t
rte_power_poll_stat_fetch(unsigned int lcore_id)516 rte_power_poll_stat_fetch(unsigned int lcore_id)
517 {
518 struct priority_worker *poll_stats;
519
520 if (lcore_id >= NUM_NODES)
521 return -1;
522
523 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
524
525 if (poll_stats->lcore_id == 0)
526 poll_stats->lcore_id = lcore_id;
527
528 return poll_stats->num_dequeue_pkts;
529 }
530