1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <string.h> 6 #include <stdio.h> 7 #include <stdint.h> 8 #include <inttypes.h> 9 #include <assert.h> 10 #include <sys/queue.h> 11 12 #include <rte_atomic.h> 13 #include <rte_common.h> 14 #include <rte_cycles.h> 15 #include <rte_per_lcore.h> 16 #include <rte_memory.h> 17 #include <rte_launch.h> 18 #include <rte_eal.h> 19 #include <rte_lcore.h> 20 #include <rte_branch_prediction.h> 21 #include <rte_spinlock.h> 22 #include <rte_random.h> 23 #include <rte_pause.h> 24 25 #include "rte_timer.h" 26 27 LIST_HEAD(rte_timer_list, rte_timer); 28 29 struct priv_timer { 30 struct rte_timer pending_head; /**< dummy timer instance to head up list */ 31 rte_spinlock_t list_lock; /**< lock to protect list access */ 32 33 /** per-core variable that true if a timer was updated on this 34 * core since last reset of the variable */ 35 int updated; 36 37 /** track the current depth of the skiplist */ 38 unsigned curr_skiplist_depth; 39 40 unsigned prev_lcore; /**< used for lcore round robin */ 41 42 /** running timer on this lcore now */ 43 struct rte_timer *running_tim; 44 45 #ifdef RTE_LIBRTE_TIMER_DEBUG 46 /** per-lcore statistics */ 47 struct rte_timer_debug_stats stats; 48 #endif 49 } __rte_cache_aligned; 50 51 /** per-lcore private info for timers */ 52 static struct priv_timer priv_timer[RTE_MAX_LCORE]; 53 54 /* when debug is enabled, store some statistics */ 55 #ifdef RTE_LIBRTE_TIMER_DEBUG 56 #define __TIMER_STAT_ADD(name, n) do { \ 57 unsigned __lcore_id = rte_lcore_id(); \ 58 if (__lcore_id < RTE_MAX_LCORE) \ 59 priv_timer[__lcore_id].stats.name += (n); \ 60 } while(0) 61 #else 62 #define __TIMER_STAT_ADD(name, n) do {} while(0) 63 #endif 64 65 /* Init the timer library. */ 66 void 67 rte_timer_subsystem_init(void) 68 { 69 unsigned lcore_id; 70 71 /* since priv_timer is static, it's zeroed by default, so only init some 72 * fields. 73 */ 74 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) { 75 rte_spinlock_init(&priv_timer[lcore_id].list_lock); 76 priv_timer[lcore_id].prev_lcore = lcore_id; 77 } 78 } 79 80 /* Initialize the timer handle tim for use */ 81 void 82 rte_timer_init(struct rte_timer *tim) 83 { 84 union rte_timer_status status; 85 86 status.state = RTE_TIMER_STOP; 87 status.owner = RTE_TIMER_NO_OWNER; 88 tim->status.u32 = status.u32; 89 } 90 91 /* 92 * if timer is pending or stopped (or running on the same core than 93 * us), mark timer as configuring, and on success return the previous 94 * status of the timer 95 */ 96 static int 97 timer_set_config_state(struct rte_timer *tim, 98 union rte_timer_status *ret_prev_status) 99 { 100 union rte_timer_status prev_status, status; 101 int success = 0; 102 unsigned lcore_id; 103 104 lcore_id = rte_lcore_id(); 105 106 /* wait that the timer is in correct status before update, 107 * and mark it as being configured */ 108 while (success == 0) { 109 prev_status.u32 = tim->status.u32; 110 111 /* timer is running on another core 112 * or ready to run on local core, exit 113 */ 114 if (prev_status.state == RTE_TIMER_RUNNING && 115 (prev_status.owner != (uint16_t)lcore_id || 116 tim != priv_timer[lcore_id].running_tim)) 117 return -1; 118 119 /* timer is being configured on another core */ 120 if (prev_status.state == RTE_TIMER_CONFIG) 121 return -1; 122 123 /* here, we know that timer is stopped or pending, 124 * mark it atomically as being configured */ 125 status.state = RTE_TIMER_CONFIG; 126 status.owner = (int16_t)lcore_id; 127 success = rte_atomic32_cmpset(&tim->status.u32, 128 prev_status.u32, 129 status.u32); 130 } 131 132 ret_prev_status->u32 = prev_status.u32; 133 return 0; 134 } 135 136 /* 137 * if timer is pending, mark timer as running 138 */ 139 static int 140 timer_set_running_state(struct rte_timer *tim) 141 { 142 union rte_timer_status prev_status, status; 143 unsigned lcore_id = rte_lcore_id(); 144 int success = 0; 145 146 /* wait that the timer is in correct status before update, 147 * and mark it as running */ 148 while (success == 0) { 149 prev_status.u32 = tim->status.u32; 150 151 /* timer is not pending anymore */ 152 if (prev_status.state != RTE_TIMER_PENDING) 153 return -1; 154 155 /* here, we know that timer is stopped or pending, 156 * mark it atomically as being configured */ 157 status.state = RTE_TIMER_RUNNING; 158 status.owner = (int16_t)lcore_id; 159 success = rte_atomic32_cmpset(&tim->status.u32, 160 prev_status.u32, 161 status.u32); 162 } 163 164 return 0; 165 } 166 167 /* 168 * Return a skiplist level for a new entry. 169 * This probabilistically gives a level with p=1/4 that an entry at level n 170 * will also appear at level n+1. 171 */ 172 static uint32_t 173 timer_get_skiplist_level(unsigned curr_depth) 174 { 175 #ifdef RTE_LIBRTE_TIMER_DEBUG 176 static uint32_t i, count = 0; 177 static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0}; 178 #endif 179 180 /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1, 181 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest 182 * bit position of a (pseudo)random number. 183 */ 184 uint32_t rand = rte_rand() & (UINT32_MAX - 1); 185 uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2; 186 187 /* limit the levels used to one above our current level, so we don't, 188 * for instance, have a level 0 and a level 7 without anything between 189 */ 190 if (level > curr_depth) 191 level = curr_depth; 192 if (level >= MAX_SKIPLIST_DEPTH) 193 level = MAX_SKIPLIST_DEPTH-1; 194 #ifdef RTE_LIBRTE_TIMER_DEBUG 195 count ++; 196 levels[level]++; 197 if (count % 10000 == 0) 198 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++) 199 printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]); 200 #endif 201 return level; 202 } 203 204 /* 205 * For a given time value, get the entries at each level which 206 * are <= that time value. 207 */ 208 static void 209 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore, 210 struct rte_timer **prev) 211 { 212 unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth; 213 prev[lvl] = &priv_timer[tim_lcore].pending_head; 214 while(lvl != 0) { 215 lvl--; 216 prev[lvl] = prev[lvl+1]; 217 while (prev[lvl]->sl_next[lvl] && 218 prev[lvl]->sl_next[lvl]->expire <= time_val) 219 prev[lvl] = prev[lvl]->sl_next[lvl]; 220 } 221 } 222 223 /* 224 * Given a timer node in the skiplist, find the previous entries for it at 225 * all skiplist levels. 226 */ 227 static void 228 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore, 229 struct rte_timer **prev) 230 { 231 int i; 232 /* to get a specific entry in the list, look for just lower than the time 233 * values, and then increment on each level individually if necessary 234 */ 235 timer_get_prev_entries(tim->expire - 1, tim_lcore, prev); 236 for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) { 237 while (prev[i]->sl_next[i] != NULL && 238 prev[i]->sl_next[i] != tim && 239 prev[i]->sl_next[i]->expire <= tim->expire) 240 prev[i] = prev[i]->sl_next[i]; 241 } 242 } 243 244 /* 245 * add in list, lock if needed 246 * timer must be in config state 247 * timer must not be in a list 248 */ 249 static void 250 timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked) 251 { 252 unsigned lcore_id = rte_lcore_id(); 253 unsigned lvl; 254 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; 255 256 /* if timer needs to be scheduled on another core, we need to 257 * lock the list; if it is on local core, we need to lock if 258 * we are not called from rte_timer_manage() */ 259 if (tim_lcore != lcore_id || !local_is_locked) 260 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock); 261 262 /* find where exactly this element goes in the list of elements 263 * for each depth. */ 264 timer_get_prev_entries(tim->expire, tim_lcore, prev); 265 266 /* now assign it a new level and add at that level */ 267 const unsigned tim_level = timer_get_skiplist_level( 268 priv_timer[tim_lcore].curr_skiplist_depth); 269 if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth) 270 priv_timer[tim_lcore].curr_skiplist_depth++; 271 272 lvl = tim_level; 273 while (lvl > 0) { 274 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl]; 275 prev[lvl]->sl_next[lvl] = tim; 276 lvl--; 277 } 278 tim->sl_next[0] = prev[0]->sl_next[0]; 279 prev[0]->sl_next[0] = tim; 280 281 /* save the lowest list entry into the expire field of the dummy hdr 282 * NOTE: this is not atomic on 32-bit*/ 283 priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\ 284 pending_head.sl_next[0]->expire; 285 286 if (tim_lcore != lcore_id || !local_is_locked) 287 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); 288 } 289 290 /* 291 * del from list, lock if needed 292 * timer must be in config state 293 * timer must be in a list 294 */ 295 static void 296 timer_del(struct rte_timer *tim, union rte_timer_status prev_status, 297 int local_is_locked) 298 { 299 unsigned lcore_id = rte_lcore_id(); 300 unsigned prev_owner = prev_status.owner; 301 int i; 302 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; 303 304 /* if timer needs is pending another core, we need to lock the 305 * list; if it is on local core, we need to lock if we are not 306 * called from rte_timer_manage() */ 307 if (prev_owner != lcore_id || !local_is_locked) 308 rte_spinlock_lock(&priv_timer[prev_owner].list_lock); 309 310 /* save the lowest list entry into the expire field of the dummy hdr. 311 * NOTE: this is not atomic on 32-bit */ 312 if (tim == priv_timer[prev_owner].pending_head.sl_next[0]) 313 priv_timer[prev_owner].pending_head.expire = 314 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire); 315 316 /* adjust pointers from previous entries to point past this */ 317 timer_get_prev_entries_for_node(tim, prev_owner, prev); 318 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) { 319 if (prev[i]->sl_next[i] == tim) 320 prev[i]->sl_next[i] = tim->sl_next[i]; 321 } 322 323 /* in case we deleted last entry at a level, adjust down max level */ 324 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) 325 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL) 326 priv_timer[prev_owner].curr_skiplist_depth --; 327 else 328 break; 329 330 if (prev_owner != lcore_id || !local_is_locked) 331 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock); 332 } 333 334 /* Reset and start the timer associated with the timer handle (private func) */ 335 static int 336 __rte_timer_reset(struct rte_timer *tim, uint64_t expire, 337 uint64_t period, unsigned tim_lcore, 338 rte_timer_cb_t fct, void *arg, 339 int local_is_locked) 340 { 341 union rte_timer_status prev_status, status; 342 int ret; 343 unsigned lcore_id = rte_lcore_id(); 344 345 /* round robin for tim_lcore */ 346 if (tim_lcore == (unsigned)LCORE_ID_ANY) { 347 if (lcore_id < RTE_MAX_LCORE) { 348 /* EAL thread with valid lcore_id */ 349 tim_lcore = rte_get_next_lcore( 350 priv_timer[lcore_id].prev_lcore, 351 0, 1); 352 priv_timer[lcore_id].prev_lcore = tim_lcore; 353 } else 354 /* non-EAL thread do not run rte_timer_manage(), 355 * so schedule the timer on the first enabled lcore. */ 356 tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1); 357 } 358 359 /* wait that the timer is in correct status before update, 360 * and mark it as being configured */ 361 ret = timer_set_config_state(tim, &prev_status); 362 if (ret < 0) 363 return -1; 364 365 __TIMER_STAT_ADD(reset, 1); 366 if (prev_status.state == RTE_TIMER_RUNNING && 367 lcore_id < RTE_MAX_LCORE) { 368 priv_timer[lcore_id].updated = 1; 369 } 370 371 /* remove it from list */ 372 if (prev_status.state == RTE_TIMER_PENDING) { 373 timer_del(tim, prev_status, local_is_locked); 374 __TIMER_STAT_ADD(pending, -1); 375 } 376 377 tim->period = period; 378 tim->expire = expire; 379 tim->f = fct; 380 tim->arg = arg; 381 382 __TIMER_STAT_ADD(pending, 1); 383 timer_add(tim, tim_lcore, local_is_locked); 384 385 /* update state: as we are in CONFIG state, only us can modify 386 * the state so we don't need to use cmpset() here */ 387 rte_wmb(); 388 status.state = RTE_TIMER_PENDING; 389 status.owner = (int16_t)tim_lcore; 390 tim->status.u32 = status.u32; 391 392 return 0; 393 } 394 395 /* Reset and start the timer associated with the timer handle tim */ 396 int 397 rte_timer_reset(struct rte_timer *tim, uint64_t ticks, 398 enum rte_timer_type type, unsigned tim_lcore, 399 rte_timer_cb_t fct, void *arg) 400 { 401 uint64_t cur_time = rte_get_timer_cycles(); 402 uint64_t period; 403 404 if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) && 405 !(rte_lcore_is_enabled(tim_lcore) || 406 rte_lcore_has_role(tim_lcore, ROLE_SERVICE)))) 407 return -1; 408 409 if (type == PERIODICAL) 410 period = ticks; 411 else 412 period = 0; 413 414 return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore, 415 fct, arg, 0); 416 } 417 418 /* loop until rte_timer_reset() succeed */ 419 void 420 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks, 421 enum rte_timer_type type, unsigned tim_lcore, 422 rte_timer_cb_t fct, void *arg) 423 { 424 while (rte_timer_reset(tim, ticks, type, tim_lcore, 425 fct, arg) != 0) 426 rte_pause(); 427 } 428 429 /* Stop the timer associated with the timer handle tim */ 430 int 431 rte_timer_stop(struct rte_timer *tim) 432 { 433 union rte_timer_status prev_status, status; 434 unsigned lcore_id = rte_lcore_id(); 435 int ret; 436 437 /* wait that the timer is in correct status before update, 438 * and mark it as being configured */ 439 ret = timer_set_config_state(tim, &prev_status); 440 if (ret < 0) 441 return -1; 442 443 __TIMER_STAT_ADD(stop, 1); 444 if (prev_status.state == RTE_TIMER_RUNNING && 445 lcore_id < RTE_MAX_LCORE) { 446 priv_timer[lcore_id].updated = 1; 447 } 448 449 /* remove it from list */ 450 if (prev_status.state == RTE_TIMER_PENDING) { 451 timer_del(tim, prev_status, 0); 452 __TIMER_STAT_ADD(pending, -1); 453 } 454 455 /* mark timer as stopped */ 456 rte_wmb(); 457 status.state = RTE_TIMER_STOP; 458 status.owner = RTE_TIMER_NO_OWNER; 459 tim->status.u32 = status.u32; 460 461 return 0; 462 } 463 464 /* loop until rte_timer_stop() succeed */ 465 void 466 rte_timer_stop_sync(struct rte_timer *tim) 467 { 468 while (rte_timer_stop(tim) != 0) 469 rte_pause(); 470 } 471 472 /* Test the PENDING status of the timer handle tim */ 473 int 474 rte_timer_pending(struct rte_timer *tim) 475 { 476 return tim->status.state == RTE_TIMER_PENDING; 477 } 478 479 /* must be called periodically, run all timer that expired */ 480 void rte_timer_manage(void) 481 { 482 union rte_timer_status status; 483 struct rte_timer *tim, *next_tim; 484 struct rte_timer *run_first_tim, **pprev; 485 unsigned lcore_id = rte_lcore_id(); 486 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1]; 487 uint64_t cur_time; 488 int i, ret; 489 490 /* timer manager only runs on EAL thread with valid lcore_id */ 491 assert(lcore_id < RTE_MAX_LCORE); 492 493 __TIMER_STAT_ADD(manage, 1); 494 /* optimize for the case where per-cpu list is empty */ 495 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) 496 return; 497 cur_time = rte_get_timer_cycles(); 498 499 #ifdef RTE_ARCH_64 500 /* on 64-bit the value cached in the pending_head.expired will be 501 * updated atomically, so we can consult that for a quick check here 502 * outside the lock */ 503 if (likely(priv_timer[lcore_id].pending_head.expire > cur_time)) 504 return; 505 #endif 506 507 /* browse ordered list, add expired timers in 'expired' list */ 508 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 509 510 /* if nothing to do just unlock and return */ 511 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL || 512 priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) { 513 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 514 return; 515 } 516 517 /* save start of list of expired timers */ 518 tim = priv_timer[lcore_id].pending_head.sl_next[0]; 519 520 /* break the existing list at current time point */ 521 timer_get_prev_entries(cur_time, lcore_id, prev); 522 for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) { 523 if (prev[i] == &priv_timer[lcore_id].pending_head) 524 continue; 525 priv_timer[lcore_id].pending_head.sl_next[i] = 526 prev[i]->sl_next[i]; 527 if (prev[i]->sl_next[i] == NULL) 528 priv_timer[lcore_id].curr_skiplist_depth--; 529 prev[i] ->sl_next[i] = NULL; 530 } 531 532 /* transition run-list from PENDING to RUNNING */ 533 run_first_tim = tim; 534 pprev = &run_first_tim; 535 536 for ( ; tim != NULL; tim = next_tim) { 537 next_tim = tim->sl_next[0]; 538 539 ret = timer_set_running_state(tim); 540 if (likely(ret == 0)) { 541 pprev = &tim->sl_next[0]; 542 } else { 543 /* another core is trying to re-config this one, 544 * remove it from local expired list 545 */ 546 *pprev = next_tim; 547 } 548 } 549 550 /* update the next to expire timer value */ 551 priv_timer[lcore_id].pending_head.expire = 552 (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 : 553 priv_timer[lcore_id].pending_head.sl_next[0]->expire; 554 555 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 556 557 /* now scan expired list and call callbacks */ 558 for (tim = run_first_tim; tim != NULL; tim = next_tim) { 559 next_tim = tim->sl_next[0]; 560 priv_timer[lcore_id].updated = 0; 561 priv_timer[lcore_id].running_tim = tim; 562 563 /* execute callback function with list unlocked */ 564 tim->f(tim, tim->arg); 565 566 __TIMER_STAT_ADD(pending, -1); 567 /* the timer was stopped or reloaded by the callback 568 * function, we have nothing to do here */ 569 if (priv_timer[lcore_id].updated == 1) 570 continue; 571 572 if (tim->period == 0) { 573 /* remove from done list and mark timer as stopped */ 574 status.state = RTE_TIMER_STOP; 575 status.owner = RTE_TIMER_NO_OWNER; 576 rte_wmb(); 577 tim->status.u32 = status.u32; 578 } 579 else { 580 /* keep it in list and mark timer as pending */ 581 rte_spinlock_lock(&priv_timer[lcore_id].list_lock); 582 status.state = RTE_TIMER_PENDING; 583 __TIMER_STAT_ADD(pending, 1); 584 status.owner = (int16_t)lcore_id; 585 rte_wmb(); 586 tim->status.u32 = status.u32; 587 __rte_timer_reset(tim, tim->expire + tim->period, 588 tim->period, lcore_id, tim->f, tim->arg, 1); 589 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); 590 } 591 } 592 priv_timer[lcore_id].running_tim = NULL; 593 } 594 595 /* dump statistics about timers */ 596 void rte_timer_dump_stats(FILE *f) 597 { 598 #ifdef RTE_LIBRTE_TIMER_DEBUG 599 struct rte_timer_debug_stats sum; 600 unsigned lcore_id; 601 602 memset(&sum, 0, sizeof(sum)); 603 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 604 sum.reset += priv_timer[lcore_id].stats.reset; 605 sum.stop += priv_timer[lcore_id].stats.stop; 606 sum.manage += priv_timer[lcore_id].stats.manage; 607 sum.pending += priv_timer[lcore_id].stats.pending; 608 } 609 fprintf(f, "Timer statistics:\n"); 610 fprintf(f, " reset = %"PRIu64"\n", sum.reset); 611 fprintf(f, " stop = %"PRIu64"\n", sum.stop); 612 fprintf(f, " manage = %"PRIu64"\n", sum.manage); 613 fprintf(f, " pending = %"PRIu64"\n", sum.pending); 614 #else 615 fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n"); 616 #endif 617 } 618