1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 #include <string.h> 6 7 #include <rte_lcore.h> 8 #include <rte_malloc.h> 9 #include <inttypes.h> 10 11 #include "rte_power.h" 12 #include "rte_power_empty_poll.h" 13 14 #define INTERVALS_PER_SECOND 100 /* (10ms) */ 15 #define SECONDS_TO_TRAIN_FOR 2 16 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70 17 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30 18 #define DEFAULT_CYCLES_PER_PACKET 800 19 20 static struct ep_params *ep_params; 21 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD; 22 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD; 23 24 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS]; 25 26 static uint32_t total_avail_freqs[RTE_MAX_LCORE]; 27 28 static uint32_t freq_index[NUM_FREQ]; 29 30 static uint32_t 31 get_freq_index(enum freq_val index) 32 { 33 return freq_index[index]; 34 } 35 36 37 static int 38 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq) 39 { 40 int err = 0; 41 uint32_t power_freq_index; 42 if (!specific_freq) 43 power_freq_index = get_freq_index(freq); 44 else 45 power_freq_index = freq; 46 47 err = rte_power_set_freq(lcore_id, power_freq_index); 48 49 return err; 50 } 51 52 53 static __rte_always_inline void 54 exit_training_state(struct priority_worker *poll_stats) 55 { 56 RTE_SET_USED(poll_stats); 57 } 58 59 static __rte_always_inline void 60 enter_training_state(struct priority_worker *poll_stats) 61 { 62 poll_stats->iter_counter = 0; 63 poll_stats->cur_freq = LOW; 64 poll_stats->queue_state = TRAINING; 65 } 66 67 static __rte_always_inline void 68 enter_normal_state(struct priority_worker *poll_stats) 69 { 70 /* Clear the averages arrays and strs */ 71 memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av)); 72 poll_stats->ec = 0; 73 74 poll_stats->cur_freq = MED; 75 poll_stats->iter_counter = 0; 76 poll_stats->threshold_ctr = 0; 77 poll_stats->queue_state = MED_NORMAL; 78 RTE_LOG(INFO, POWER, "Set the power freq to MED\n"); 79 set_power_freq(poll_stats->lcore_id, MED, false); 80 81 poll_stats->thresh[MED].threshold_percent = med_to_high_threshold; 82 poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold; 83 } 84 85 static __rte_always_inline void 86 enter_busy_state(struct priority_worker *poll_stats) 87 { 88 memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av)); 89 poll_stats->ec = 0; 90 91 poll_stats->cur_freq = HGH; 92 poll_stats->iter_counter = 0; 93 poll_stats->threshold_ctr = 0; 94 poll_stats->queue_state = HGH_BUSY; 95 set_power_freq(poll_stats->lcore_id, HGH, false); 96 } 97 98 static __rte_always_inline void 99 enter_purge_state(struct priority_worker *poll_stats) 100 { 101 poll_stats->iter_counter = 0; 102 poll_stats->queue_state = LOW_PURGE; 103 } 104 105 static __rte_always_inline void 106 set_state(struct priority_worker *poll_stats, 107 enum queue_state new_state) 108 { 109 enum queue_state old_state = poll_stats->queue_state; 110 if (old_state != new_state) { 111 112 /* Call any old state exit functions */ 113 if (old_state == TRAINING) 114 exit_training_state(poll_stats); 115 116 /* Call any new state entry functions */ 117 if (new_state == TRAINING) 118 enter_training_state(poll_stats); 119 if (new_state == MED_NORMAL) 120 enter_normal_state(poll_stats); 121 if (new_state == HGH_BUSY) 122 enter_busy_state(poll_stats); 123 if (new_state == LOW_PURGE) 124 enter_purge_state(poll_stats); 125 } 126 } 127 128 static __rte_always_inline void 129 set_policy(struct priority_worker *poll_stats, 130 struct ep_policy *policy) 131 { 132 set_state(poll_stats, policy->state); 133 134 if (policy->state == TRAINING) 135 return; 136 137 poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi; 138 poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi; 139 140 poll_stats->thresh[MED_NORMAL].trained = true; 141 poll_stats->thresh[HGH_BUSY].trained = true; 142 143 } 144 145 static void 146 update_training_stats(struct priority_worker *poll_stats, 147 uint32_t freq, 148 bool specific_freq, 149 uint32_t max_train_iter) 150 { 151 RTE_SET_USED(specific_freq); 152 153 uint64_t p0_empty_deq; 154 155 if (poll_stats->cur_freq == freq && 156 poll_stats->thresh[freq].trained == false) { 157 if (poll_stats->thresh[freq].cur_train_iter == 0) { 158 159 set_power_freq(poll_stats->lcore_id, 160 freq, specific_freq); 161 162 poll_stats->empty_dequeues_prev = 163 poll_stats->empty_dequeues; 164 165 poll_stats->thresh[freq].cur_train_iter++; 166 167 return; 168 } else if (poll_stats->thresh[freq].cur_train_iter 169 <= max_train_iter) { 170 171 p0_empty_deq = poll_stats->empty_dequeues - 172 poll_stats->empty_dequeues_prev; 173 174 poll_stats->empty_dequeues_prev = 175 poll_stats->empty_dequeues; 176 177 poll_stats->thresh[freq].base_edpi += p0_empty_deq; 178 poll_stats->thresh[freq].cur_train_iter++; 179 180 } else { 181 if (poll_stats->thresh[freq].trained == false) { 182 poll_stats->thresh[freq].base_edpi = 183 poll_stats->thresh[freq].base_edpi / 184 max_train_iter; 185 186 /* Add on a factor of 0.05% 187 * this should remove any 188 * false negatives when the system is 0% busy 189 */ 190 poll_stats->thresh[freq].base_edpi += 191 poll_stats->thresh[freq].base_edpi / 2000; 192 193 poll_stats->thresh[freq].trained = true; 194 poll_stats->cur_freq++; 195 196 } 197 } 198 } 199 } 200 201 static __rte_always_inline uint32_t 202 update_stats(struct priority_worker *poll_stats) 203 { 204 uint64_t tot_edpi = 0; 205 uint32_t j, percent; 206 207 struct priority_worker *s = poll_stats; 208 209 uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev; 210 211 s->empty_dequeues_prev = s->empty_dequeues; 212 213 if (s->thresh[s->cur_freq].base_edpi < cur_edpi) { 214 215 /* edpi mean empty poll counter difference per interval */ 216 RTE_LOG(DEBUG, POWER, "cur_edpi is too large " 217 "cur edpi %"PRId64" " 218 "base edpi %"PRId64"\n", 219 cur_edpi, 220 s->thresh[s->cur_freq].base_edpi); 221 /* Value to make us fail need debug log*/ 222 return 1000UL; 223 } 224 225 s->edpi_av[s->ec++ % BINS_AV] = cur_edpi; 226 227 for (j = 0; j < BINS_AV; j++) { 228 tot_edpi += s->edpi_av[j]; 229 } 230 231 tot_edpi = tot_edpi / BINS_AV; 232 233 percent = 100 - (uint32_t)(((float)tot_edpi / 234 (float)s->thresh[s->cur_freq].base_edpi) * 100); 235 236 return (uint32_t)percent; 237 } 238 239 240 static __rte_always_inline void 241 update_stats_normal(struct priority_worker *poll_stats) 242 { 243 uint32_t percent; 244 245 if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) { 246 247 enum freq_val cur_freq = poll_stats->cur_freq; 248 249 /* edpi mean empty poll counter difference per interval */ 250 RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n", 251 cur_freq, 252 poll_stats->thresh[cur_freq].base_edpi); 253 return; 254 } 255 256 percent = update_stats(poll_stats); 257 258 if (percent > 100) { 259 /* edpi mean empty poll counter difference per interval */ 260 RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n"); 261 return; 262 } 263 264 if (poll_stats->cur_freq == LOW) 265 RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n"); 266 else if (poll_stats->cur_freq == MED) { 267 268 if (percent > 269 poll_stats->thresh[MED].threshold_percent) { 270 271 if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND) 272 poll_stats->threshold_ctr++; 273 else { 274 set_state(poll_stats, HGH_BUSY); 275 RTE_LOG(INFO, POWER, "MOVE to HGH\n"); 276 } 277 278 } else { 279 /* reset */ 280 poll_stats->threshold_ctr = 0; 281 } 282 283 } else if (poll_stats->cur_freq == HGH) { 284 285 if (percent < 286 poll_stats->thresh[HGH].threshold_percent) { 287 288 if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND) 289 poll_stats->threshold_ctr++; 290 else { 291 set_state(poll_stats, MED_NORMAL); 292 RTE_LOG(INFO, POWER, "MOVE to MED\n"); 293 } 294 } else { 295 /* reset */ 296 poll_stats->threshold_ctr = 0; 297 } 298 299 } 300 } 301 302 static int 303 empty_poll_training(struct priority_worker *poll_stats, 304 uint32_t max_train_iter) 305 { 306 307 if (poll_stats->iter_counter < INTERVALS_PER_SECOND) { 308 poll_stats->iter_counter++; 309 return 0; 310 } 311 312 313 update_training_stats(poll_stats, 314 LOW, 315 false, 316 max_train_iter); 317 318 update_training_stats(poll_stats, 319 MED, 320 false, 321 max_train_iter); 322 323 update_training_stats(poll_stats, 324 HGH, 325 false, 326 max_train_iter); 327 328 329 if (poll_stats->thresh[LOW].trained == true 330 && poll_stats->thresh[MED].trained == true 331 && poll_stats->thresh[HGH].trained == true) { 332 333 set_state(poll_stats, MED_NORMAL); 334 335 RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n", 336 poll_stats->thresh[LOW].base_edpi); 337 338 RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n", 339 poll_stats->thresh[MED].base_edpi); 340 341 342 RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n", 343 poll_stats->thresh[HGH].base_edpi); 344 345 RTE_LOG(INFO, POWER, "Training is Complete for %d\n", 346 poll_stats->lcore_id); 347 } 348 349 return 0; 350 } 351 352 void 353 rte_empty_poll_detection(struct rte_timer *tim, void *arg) 354 { 355 356 uint32_t i; 357 358 struct priority_worker *poll_stats; 359 360 RTE_SET_USED(tim); 361 362 RTE_SET_USED(arg); 363 364 for (i = 0; i < NUM_NODES; i++) { 365 366 poll_stats = &(ep_params->wrk_data.wrk_stats[i]); 367 368 if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0) 369 continue; 370 371 switch (poll_stats->queue_state) { 372 case(TRAINING): 373 empty_poll_training(poll_stats, 374 ep_params->max_train_iter); 375 break; 376 377 case(HGH_BUSY): 378 case(MED_NORMAL): 379 update_stats_normal(poll_stats); 380 break; 381 382 case(LOW_PURGE): 383 break; 384 default: 385 break; 386 387 } 388 389 } 390 391 } 392 393 int 394 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb, 395 struct ep_policy *policy) 396 { 397 uint32_t i; 398 /* Allocate the ep_params structure */ 399 ep_params = rte_zmalloc_socket(NULL, 400 sizeof(struct ep_params), 401 0, 402 rte_socket_id()); 403 404 if (!ep_params) 405 return -1; 406 407 if (freq_tlb == NULL) { 408 freq_index[LOW] = 14; 409 freq_index[MED] = 9; 410 freq_index[HGH] = 1; 411 } else { 412 freq_index[LOW] = freq_tlb[LOW]; 413 freq_index[MED] = freq_tlb[MED]; 414 freq_index[HGH] = freq_tlb[HGH]; 415 } 416 417 RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n"); 418 419 /* Train for pre-defined period */ 420 ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR; 421 422 struct stats_data *w = &ep_params->wrk_data; 423 424 *eptr = ep_params; 425 426 /* initialize all wrk_stats state */ 427 for (i = 0; i < NUM_NODES; i++) { 428 429 if (rte_lcore_is_enabled(i) == 0) 430 continue; 431 /*init the freqs table */ 432 total_avail_freqs[i] = rte_power_freqs(i, 433 avail_freqs[i], 434 NUM_FREQS); 435 436 RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n", 437 total_avail_freqs[i], 438 i); 439 440 if (get_freq_index(LOW) > total_avail_freqs[i]) 441 return -1; 442 443 if (rte_get_main_lcore() != i) { 444 w->wrk_stats[i].lcore_id = i; 445 set_policy(&w->wrk_stats[i], policy); 446 } 447 } 448 449 return 0; 450 } 451 452 void 453 rte_power_empty_poll_stat_free(void) 454 { 455 456 RTE_LOG(INFO, POWER, "Close the Empty Poll\n"); 457 458 rte_free(ep_params); 459 } 460 461 int 462 rte_power_empty_poll_stat_update(unsigned int lcore_id) 463 { 464 struct priority_worker *poll_stats; 465 466 if (lcore_id >= NUM_NODES) 467 return -1; 468 469 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]); 470 471 if (poll_stats->lcore_id == 0) 472 poll_stats->lcore_id = lcore_id; 473 474 poll_stats->empty_dequeues++; 475 476 return 0; 477 } 478 479 int 480 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt) 481 { 482 483 struct priority_worker *poll_stats; 484 485 if (lcore_id >= NUM_NODES) 486 return -1; 487 488 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]); 489 490 if (poll_stats->lcore_id == 0) 491 poll_stats->lcore_id = lcore_id; 492 493 poll_stats->num_dequeue_pkts += nb_pkt; 494 495 return 0; 496 } 497 498 499 uint64_t 500 rte_power_empty_poll_stat_fetch(unsigned int lcore_id) 501 { 502 struct priority_worker *poll_stats; 503 504 if (lcore_id >= NUM_NODES) 505 return -1; 506 507 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]); 508 509 if (poll_stats->lcore_id == 0) 510 poll_stats->lcore_id = lcore_id; 511 512 return poll_stats->empty_dequeues; 513 } 514 515 uint64_t 516 rte_power_poll_stat_fetch(unsigned int lcore_id) 517 { 518 struct priority_worker *poll_stats; 519 520 if (lcore_id >= NUM_NODES) 521 return -1; 522 523 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]); 524 525 if (poll_stats->lcore_id == 0) 526 poll_stats->lcore_id = lcore_id; 527 528 return poll_stats->num_dequeue_pkts; 529 } 530