1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <stdbool.h>
8 #include <inttypes.h>
9 #include <assert.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_cycles.h>
14 #include <rte_eal_memconfig.h>
15 #include <rte_memory.h>
16 #include <rte_lcore.h>
17 #include <rte_branch_prediction.h>
18 #include <rte_spinlock.h>
19 #include <rte_random.h>
20 #include <rte_pause.h>
21 #include <rte_memzone.h>
22
23 #include "rte_timer.h"
24
25 /**
26 * Per-lcore info for timers.
27 */
28 struct priv_timer {
29 struct rte_timer pending_head; /**< dummy timer instance to head up list */
30 rte_spinlock_t list_lock; /**< lock to protect list access */
31
32 /** per-core variable that true if a timer was updated on this
33 * core since last reset of the variable */
34 int updated;
35
36 /** track the current depth of the skiplist */
37 unsigned curr_skiplist_depth;
38
39 unsigned prev_lcore; /**< used for lcore round robin */
40
41 /** running timer on this lcore now */
42 struct rte_timer *running_tim;
43
44 #ifdef RTE_LIBRTE_TIMER_DEBUG
45 /** per-lcore statistics */
46 struct rte_timer_debug_stats stats;
47 #endif
48 } __rte_cache_aligned;
49
50 #define FL_ALLOCATED (1 << 0)
51 struct rte_timer_data {
52 struct priv_timer priv_timer[RTE_MAX_LCORE];
53 uint8_t internal_flags;
54 };
55
56 #define RTE_MAX_DATA_ELS 64
57 static const struct rte_memzone *rte_timer_data_mz;
58 static int *volatile rte_timer_mz_refcnt;
59 static struct rte_timer_data *rte_timer_data_arr;
60 static const uint32_t default_data_id;
61 static uint32_t rte_timer_subsystem_initialized;
62
63 /* when debug is enabled, store some statistics */
64 #ifdef RTE_LIBRTE_TIMER_DEBUG
65 #define __TIMER_STAT_ADD(priv_timer, name, n) do { \
66 unsigned __lcore_id = rte_lcore_id(); \
67 if (__lcore_id < RTE_MAX_LCORE) \
68 priv_timer[__lcore_id].stats.name += (n); \
69 } while(0)
70 #else
71 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
72 #endif
73
74 static inline int
timer_data_valid(uint32_t id)75 timer_data_valid(uint32_t id)
76 {
77 return rte_timer_data_arr &&
78 (rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
79 }
80
81 /* validate ID and retrieve timer data pointer, or return error value */
82 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do { \
83 if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id)) \
84 return retval; \
85 timer_data = &rte_timer_data_arr[id]; \
86 } while (0)
87
88 int
rte_timer_data_alloc(uint32_t * id_ptr)89 rte_timer_data_alloc(uint32_t *id_ptr)
90 {
91 int i;
92 struct rte_timer_data *data;
93
94 if (!rte_timer_subsystem_initialized)
95 return -ENOMEM;
96
97 for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
98 data = &rte_timer_data_arr[i];
99 if (!(data->internal_flags & FL_ALLOCATED)) {
100 data->internal_flags |= FL_ALLOCATED;
101
102 if (id_ptr)
103 *id_ptr = i;
104
105 return 0;
106 }
107 }
108
109 return -ENOSPC;
110 }
111
112 int
rte_timer_data_dealloc(uint32_t id)113 rte_timer_data_dealloc(uint32_t id)
114 {
115 struct rte_timer_data *timer_data;
116 TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
117
118 timer_data->internal_flags &= ~(FL_ALLOCATED);
119
120 return 0;
121 }
122
123 /* Init the timer library. Allocate an array of timer data structs in shared
124 * memory, and allocate the zeroth entry for use with original timer
125 * APIs. Since the intersection of the sets of lcore ids in primary and
126 * secondary processes should be empty, the zeroth entry can be shared by
127 * multiple processes.
128 */
129 int
rte_timer_subsystem_init(void)130 rte_timer_subsystem_init(void)
131 {
132 const struct rte_memzone *mz;
133 struct rte_timer_data *data;
134 int i, lcore_id;
135 static const char *mz_name = "rte_timer_mz";
136 const size_t data_arr_size =
137 RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
138 const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt);
139 bool do_full_init = true;
140
141 rte_mcfg_timer_lock();
142
143 if (rte_timer_subsystem_initialized) {
144 rte_mcfg_timer_unlock();
145 return -EALREADY;
146 }
147
148 mz = rte_memzone_lookup(mz_name);
149 if (mz == NULL) {
150 mz = rte_memzone_reserve_aligned(mz_name, mem_size,
151 SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
152 if (mz == NULL) {
153 rte_mcfg_timer_unlock();
154 return -ENOMEM;
155 }
156 do_full_init = true;
157 } else
158 do_full_init = false;
159
160 rte_timer_data_mz = mz;
161 rte_timer_data_arr = mz->addr;
162 rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
163
164 if (do_full_init) {
165 for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
166 data = &rte_timer_data_arr[i];
167
168 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
169 lcore_id++) {
170 rte_spinlock_init(
171 &data->priv_timer[lcore_id].list_lock);
172 data->priv_timer[lcore_id].prev_lcore =
173 lcore_id;
174 }
175 }
176 }
177
178 rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
179 (*rte_timer_mz_refcnt)++;
180
181 rte_timer_subsystem_initialized = 1;
182
183 rte_mcfg_timer_unlock();
184
185 return 0;
186 }
187
188 void
rte_timer_subsystem_finalize(void)189 rte_timer_subsystem_finalize(void)
190 {
191 rte_mcfg_timer_lock();
192
193 if (!rte_timer_subsystem_initialized) {
194 rte_mcfg_timer_unlock();
195 return;
196 }
197
198 if (--(*rte_timer_mz_refcnt) == 0)
199 rte_memzone_free(rte_timer_data_mz);
200
201 rte_timer_subsystem_initialized = 0;
202
203 rte_mcfg_timer_unlock();
204 }
205
206 /* Initialize the timer handle tim for use */
207 void
rte_timer_init(struct rte_timer * tim)208 rte_timer_init(struct rte_timer *tim)
209 {
210 union rte_timer_status status;
211
212 status.state = RTE_TIMER_STOP;
213 status.owner = RTE_TIMER_NO_OWNER;
214 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELAXED);
215 }
216
217 /*
218 * if timer is pending or stopped (or running on the same core than
219 * us), mark timer as configuring, and on success return the previous
220 * status of the timer
221 */
222 static int
timer_set_config_state(struct rte_timer * tim,union rte_timer_status * ret_prev_status,struct priv_timer * priv_timer)223 timer_set_config_state(struct rte_timer *tim,
224 union rte_timer_status *ret_prev_status,
225 struct priv_timer *priv_timer)
226 {
227 union rte_timer_status prev_status, status;
228 int success = 0;
229 unsigned lcore_id;
230
231 lcore_id = rte_lcore_id();
232
233 /* wait that the timer is in correct status before update,
234 * and mark it as being configured */
235 prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED);
236
237 while (success == 0) {
238 /* timer is running on another core
239 * or ready to run on local core, exit
240 */
241 if (prev_status.state == RTE_TIMER_RUNNING &&
242 (prev_status.owner != (uint16_t)lcore_id ||
243 tim != priv_timer[lcore_id].running_tim))
244 return -1;
245
246 /* timer is being configured on another core */
247 if (prev_status.state == RTE_TIMER_CONFIG)
248 return -1;
249
250 /* here, we know that timer is stopped or pending,
251 * mark it atomically as being configured */
252 status.state = RTE_TIMER_CONFIG;
253 status.owner = (int16_t)lcore_id;
254 /* CONFIG states are acting as locked states. If the
255 * timer is in CONFIG state, the state cannot be changed
256 * by other threads. So, we should use ACQUIRE here.
257 */
258 success = __atomic_compare_exchange_n(&tim->status.u32,
259 &prev_status.u32,
260 status.u32, 0,
261 __ATOMIC_ACQUIRE,
262 __ATOMIC_RELAXED);
263 }
264
265 ret_prev_status->u32 = prev_status.u32;
266 return 0;
267 }
268
269 /*
270 * if timer is pending, mark timer as running
271 */
272 static int
timer_set_running_state(struct rte_timer * tim)273 timer_set_running_state(struct rte_timer *tim)
274 {
275 union rte_timer_status prev_status, status;
276 unsigned lcore_id = rte_lcore_id();
277 int success = 0;
278
279 /* wait that the timer is in correct status before update,
280 * and mark it as running */
281 prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED);
282
283 while (success == 0) {
284 /* timer is not pending anymore */
285 if (prev_status.state != RTE_TIMER_PENDING)
286 return -1;
287
288 /* we know that the timer will be pending at this point
289 * mark it atomically as being running
290 */
291 status.state = RTE_TIMER_RUNNING;
292 status.owner = (int16_t)lcore_id;
293 /* RUNNING states are acting as locked states. If the
294 * timer is in RUNNING state, the state cannot be changed
295 * by other threads. So, we should use ACQUIRE here.
296 */
297 success = __atomic_compare_exchange_n(&tim->status.u32,
298 &prev_status.u32,
299 status.u32, 0,
300 __ATOMIC_ACQUIRE,
301 __ATOMIC_RELAXED);
302 }
303
304 return 0;
305 }
306
307 /*
308 * Return a skiplist level for a new entry.
309 * This probabilistically gives a level with p=1/4 that an entry at level n
310 * will also appear at level n+1.
311 */
312 static uint32_t
timer_get_skiplist_level(unsigned curr_depth)313 timer_get_skiplist_level(unsigned curr_depth)
314 {
315 #ifdef RTE_LIBRTE_TIMER_DEBUG
316 static uint32_t i, count = 0;
317 static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
318 #endif
319
320 /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
321 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
322 * bit position of a (pseudo)random number.
323 */
324 uint32_t rand = rte_rand() & (UINT32_MAX - 1);
325 uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
326
327 /* limit the levels used to one above our current level, so we don't,
328 * for instance, have a level 0 and a level 7 without anything between
329 */
330 if (level > curr_depth)
331 level = curr_depth;
332 if (level >= MAX_SKIPLIST_DEPTH)
333 level = MAX_SKIPLIST_DEPTH-1;
334 #ifdef RTE_LIBRTE_TIMER_DEBUG
335 count ++;
336 levels[level]++;
337 if (count % 10000 == 0)
338 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
339 printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
340 #endif
341 return level;
342 }
343
344 /*
345 * For a given time value, get the entries at each level which
346 * are <= that time value.
347 */
348 static void
timer_get_prev_entries(uint64_t time_val,unsigned tim_lcore,struct rte_timer ** prev,struct priv_timer * priv_timer)349 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
350 struct rte_timer **prev, struct priv_timer *priv_timer)
351 {
352 unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
353 prev[lvl] = &priv_timer[tim_lcore].pending_head;
354 while(lvl != 0) {
355 lvl--;
356 prev[lvl] = prev[lvl+1];
357 while (prev[lvl]->sl_next[lvl] &&
358 prev[lvl]->sl_next[lvl]->expire <= time_val)
359 prev[lvl] = prev[lvl]->sl_next[lvl];
360 }
361 }
362
363 /*
364 * Given a timer node in the skiplist, find the previous entries for it at
365 * all skiplist levels.
366 */
367 static void
timer_get_prev_entries_for_node(struct rte_timer * tim,unsigned tim_lcore,struct rte_timer ** prev,struct priv_timer * priv_timer)368 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
369 struct rte_timer **prev,
370 struct priv_timer *priv_timer)
371 {
372 int i;
373
374 /* to get a specific entry in the list, look for just lower than the time
375 * values, and then increment on each level individually if necessary
376 */
377 timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
378 for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
379 while (prev[i]->sl_next[i] != NULL &&
380 prev[i]->sl_next[i] != tim &&
381 prev[i]->sl_next[i]->expire <= tim->expire)
382 prev[i] = prev[i]->sl_next[i];
383 }
384 }
385
386 /* call with lock held as necessary
387 * add in list
388 * timer must be in config state
389 * timer must not be in a list
390 */
391 static void
timer_add(struct rte_timer * tim,unsigned int tim_lcore,struct priv_timer * priv_timer)392 timer_add(struct rte_timer *tim, unsigned int tim_lcore,
393 struct priv_timer *priv_timer)
394 {
395 unsigned lvl;
396 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
397
398 /* find where exactly this element goes in the list of elements
399 * for each depth. */
400 timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
401
402 /* now assign it a new level and add at that level */
403 const unsigned tim_level = timer_get_skiplist_level(
404 priv_timer[tim_lcore].curr_skiplist_depth);
405 if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
406 priv_timer[tim_lcore].curr_skiplist_depth++;
407
408 lvl = tim_level;
409 while (lvl > 0) {
410 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
411 prev[lvl]->sl_next[lvl] = tim;
412 lvl--;
413 }
414 tim->sl_next[0] = prev[0]->sl_next[0];
415 prev[0]->sl_next[0] = tim;
416
417 /* save the lowest list entry into the expire field of the dummy hdr
418 * NOTE: this is not atomic on 32-bit*/
419 priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
420 pending_head.sl_next[0]->expire;
421 }
422
423 /*
424 * del from list, lock if needed
425 * timer must be in config state
426 * timer must be in a list
427 */
428 static void
timer_del(struct rte_timer * tim,union rte_timer_status prev_status,int local_is_locked,struct priv_timer * priv_timer)429 timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
430 int local_is_locked, struct priv_timer *priv_timer)
431 {
432 unsigned lcore_id = rte_lcore_id();
433 unsigned prev_owner = prev_status.owner;
434 int i;
435 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
436
437 /* if timer needs is pending another core, we need to lock the
438 * list; if it is on local core, we need to lock if we are not
439 * called from rte_timer_manage() */
440 if (prev_owner != lcore_id || !local_is_locked)
441 rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
442
443 /* save the lowest list entry into the expire field of the dummy hdr.
444 * NOTE: this is not atomic on 32-bit */
445 if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
446 priv_timer[prev_owner].pending_head.expire =
447 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
448
449 /* adjust pointers from previous entries to point past this */
450 timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
451 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
452 if (prev[i]->sl_next[i] == tim)
453 prev[i]->sl_next[i] = tim->sl_next[i];
454 }
455
456 /* in case we deleted last entry at a level, adjust down max level */
457 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
458 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
459 priv_timer[prev_owner].curr_skiplist_depth --;
460 else
461 break;
462
463 if (prev_owner != lcore_id || !local_is_locked)
464 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
465 }
466
467 /* Reset and start the timer associated with the timer handle (private func) */
468 static int
__rte_timer_reset(struct rte_timer * tim,uint64_t expire,uint64_t period,unsigned tim_lcore,rte_timer_cb_t fct,void * arg,int local_is_locked,struct rte_timer_data * timer_data)469 __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
470 uint64_t period, unsigned tim_lcore,
471 rte_timer_cb_t fct, void *arg,
472 int local_is_locked,
473 struct rte_timer_data *timer_data)
474 {
475 union rte_timer_status prev_status, status;
476 int ret;
477 unsigned lcore_id = rte_lcore_id();
478 struct priv_timer *priv_timer = timer_data->priv_timer;
479
480 /* round robin for tim_lcore */
481 if (tim_lcore == (unsigned)LCORE_ID_ANY) {
482 if (lcore_id < RTE_MAX_LCORE) {
483 /* EAL thread with valid lcore_id */
484 tim_lcore = rte_get_next_lcore(
485 priv_timer[lcore_id].prev_lcore,
486 0, 1);
487 priv_timer[lcore_id].prev_lcore = tim_lcore;
488 } else
489 /* non-EAL thread do not run rte_timer_manage(),
490 * so schedule the timer on the first enabled lcore. */
491 tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
492 }
493
494 /* wait that the timer is in correct status before update,
495 * and mark it as being configured */
496 ret = timer_set_config_state(tim, &prev_status, priv_timer);
497 if (ret < 0)
498 return -1;
499
500 __TIMER_STAT_ADD(priv_timer, reset, 1);
501 if (prev_status.state == RTE_TIMER_RUNNING &&
502 lcore_id < RTE_MAX_LCORE) {
503 priv_timer[lcore_id].updated = 1;
504 }
505
506 /* remove it from list */
507 if (prev_status.state == RTE_TIMER_PENDING) {
508 timer_del(tim, prev_status, local_is_locked, priv_timer);
509 __TIMER_STAT_ADD(priv_timer, pending, -1);
510 }
511
512 tim->period = period;
513 tim->expire = expire;
514 tim->f = fct;
515 tim->arg = arg;
516
517 /* if timer needs to be scheduled on another core, we need to
518 * lock the destination list; if it is on local core, we need to lock if
519 * we are not called from rte_timer_manage()
520 */
521 if (tim_lcore != lcore_id || !local_is_locked)
522 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
523
524 __TIMER_STAT_ADD(priv_timer, pending, 1);
525 timer_add(tim, tim_lcore, priv_timer);
526
527 /* update state: as we are in CONFIG state, only us can modify
528 * the state so we don't need to use cmpset() here */
529 status.state = RTE_TIMER_PENDING;
530 status.owner = (int16_t)tim_lcore;
531 /* The "RELEASE" ordering guarantees the memory operations above
532 * the status update are observed before the update by all threads
533 */
534 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE);
535
536 if (tim_lcore != lcore_id || !local_is_locked)
537 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
538
539 return 0;
540 }
541
542 /* Reset and start the timer associated with the timer handle tim */
543 int
rte_timer_reset(struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned int tim_lcore,rte_timer_cb_t fct,void * arg)544 rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
545 enum rte_timer_type type, unsigned int tim_lcore,
546 rte_timer_cb_t fct, void *arg)
547 {
548 return rte_timer_alt_reset(default_data_id, tim, ticks, type,
549 tim_lcore, fct, arg);
550 }
551
552 int
rte_timer_alt_reset(uint32_t timer_data_id,struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned int tim_lcore,rte_timer_cb_t fct,void * arg)553 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
554 uint64_t ticks, enum rte_timer_type type,
555 unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
556 {
557 uint64_t cur_time = rte_get_timer_cycles();
558 uint64_t period;
559 struct rte_timer_data *timer_data;
560
561 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
562
563 if (type == PERIODICAL)
564 period = ticks;
565 else
566 period = 0;
567
568 return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore,
569 fct, arg, 0, timer_data);
570 }
571
572 /* loop until rte_timer_reset() succeed */
573 void
rte_timer_reset_sync(struct rte_timer * tim,uint64_t ticks,enum rte_timer_type type,unsigned tim_lcore,rte_timer_cb_t fct,void * arg)574 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
575 enum rte_timer_type type, unsigned tim_lcore,
576 rte_timer_cb_t fct, void *arg)
577 {
578 while (rte_timer_reset(tim, ticks, type, tim_lcore,
579 fct, arg) != 0)
580 rte_pause();
581 }
582
583 static int
__rte_timer_stop(struct rte_timer * tim,int local_is_locked,struct rte_timer_data * timer_data)584 __rte_timer_stop(struct rte_timer *tim, int local_is_locked,
585 struct rte_timer_data *timer_data)
586 {
587 union rte_timer_status prev_status, status;
588 unsigned lcore_id = rte_lcore_id();
589 int ret;
590 struct priv_timer *priv_timer = timer_data->priv_timer;
591
592 /* wait that the timer is in correct status before update,
593 * and mark it as being configured */
594 ret = timer_set_config_state(tim, &prev_status, priv_timer);
595 if (ret < 0)
596 return -1;
597
598 __TIMER_STAT_ADD(priv_timer, stop, 1);
599 if (prev_status.state == RTE_TIMER_RUNNING &&
600 lcore_id < RTE_MAX_LCORE) {
601 priv_timer[lcore_id].updated = 1;
602 }
603
604 /* remove it from list */
605 if (prev_status.state == RTE_TIMER_PENDING) {
606 timer_del(tim, prev_status, local_is_locked, priv_timer);
607 __TIMER_STAT_ADD(priv_timer, pending, -1);
608 }
609
610 /* mark timer as stopped */
611 status.state = RTE_TIMER_STOP;
612 status.owner = RTE_TIMER_NO_OWNER;
613 /* The "RELEASE" ordering guarantees the memory operations above
614 * the status update are observed before the update by all threads
615 */
616 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE);
617
618 return 0;
619 }
620
621 /* Stop the timer associated with the timer handle tim */
622 int
rte_timer_stop(struct rte_timer * tim)623 rte_timer_stop(struct rte_timer *tim)
624 {
625 return rte_timer_alt_stop(default_data_id, tim);
626 }
627
628 int
rte_timer_alt_stop(uint32_t timer_data_id,struct rte_timer * tim)629 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
630 {
631 struct rte_timer_data *timer_data;
632
633 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
634
635 return __rte_timer_stop(tim, 0, timer_data);
636 }
637
638 /* loop until rte_timer_stop() succeed */
639 void
rte_timer_stop_sync(struct rte_timer * tim)640 rte_timer_stop_sync(struct rte_timer *tim)
641 {
642 while (rte_timer_stop(tim) != 0)
643 rte_pause();
644 }
645
646 /* Test the PENDING status of the timer handle tim */
647 int
rte_timer_pending(struct rte_timer * tim)648 rte_timer_pending(struct rte_timer *tim)
649 {
650 return __atomic_load_n(&tim->status.state,
651 __ATOMIC_RELAXED) == RTE_TIMER_PENDING;
652 }
653
654 /* must be called periodically, run all timer that expired */
655 static void
__rte_timer_manage(struct rte_timer_data * timer_data)656 __rte_timer_manage(struct rte_timer_data *timer_data)
657 {
658 union rte_timer_status status;
659 struct rte_timer *tim, *next_tim;
660 struct rte_timer *run_first_tim, **pprev;
661 unsigned lcore_id = rte_lcore_id();
662 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
663 uint64_t cur_time;
664 int i, ret;
665 struct priv_timer *priv_timer = timer_data->priv_timer;
666
667 /* timer manager only runs on EAL thread with valid lcore_id */
668 assert(lcore_id < RTE_MAX_LCORE);
669
670 __TIMER_STAT_ADD(priv_timer, manage, 1);
671 /* optimize for the case where per-cpu list is empty */
672 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
673 return;
674 cur_time = rte_get_timer_cycles();
675
676 #ifdef RTE_ARCH_64
677 /* on 64-bit the value cached in the pending_head.expired will be
678 * updated atomically, so we can consult that for a quick check here
679 * outside the lock */
680 if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
681 return;
682 #endif
683
684 /* browse ordered list, add expired timers in 'expired' list */
685 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
686
687 /* if nothing to do just unlock and return */
688 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
689 priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
690 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
691 return;
692 }
693
694 /* save start of list of expired timers */
695 tim = priv_timer[lcore_id].pending_head.sl_next[0];
696
697 /* break the existing list at current time point */
698 timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
699 for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
700 if (prev[i] == &priv_timer[lcore_id].pending_head)
701 continue;
702 priv_timer[lcore_id].pending_head.sl_next[i] =
703 prev[i]->sl_next[i];
704 if (prev[i]->sl_next[i] == NULL)
705 priv_timer[lcore_id].curr_skiplist_depth--;
706 prev[i] ->sl_next[i] = NULL;
707 }
708
709 /* transition run-list from PENDING to RUNNING */
710 run_first_tim = tim;
711 pprev = &run_first_tim;
712
713 for ( ; tim != NULL; tim = next_tim) {
714 next_tim = tim->sl_next[0];
715
716 ret = timer_set_running_state(tim);
717 if (likely(ret == 0)) {
718 pprev = &tim->sl_next[0];
719 } else {
720 /* another core is trying to re-config this one,
721 * remove it from local expired list
722 */
723 *pprev = next_tim;
724 }
725 }
726
727 /* update the next to expire timer value */
728 priv_timer[lcore_id].pending_head.expire =
729 (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
730 priv_timer[lcore_id].pending_head.sl_next[0]->expire;
731
732 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
733
734 /* now scan expired list and call callbacks */
735 for (tim = run_first_tim; tim != NULL; tim = next_tim) {
736 next_tim = tim->sl_next[0];
737 priv_timer[lcore_id].updated = 0;
738 priv_timer[lcore_id].running_tim = tim;
739
740 /* execute callback function with list unlocked */
741 tim->f(tim, tim->arg);
742
743 __TIMER_STAT_ADD(priv_timer, pending, -1);
744 /* the timer was stopped or reloaded by the callback
745 * function, we have nothing to do here */
746 if (priv_timer[lcore_id].updated == 1)
747 continue;
748
749 if (tim->period == 0) {
750 /* remove from done list and mark timer as stopped */
751 status.state = RTE_TIMER_STOP;
752 status.owner = RTE_TIMER_NO_OWNER;
753 /* The "RELEASE" ordering guarantees the memory
754 * operations above the status update are observed
755 * before the update by all threads
756 */
757 __atomic_store_n(&tim->status.u32, status.u32,
758 __ATOMIC_RELEASE);
759 }
760 else {
761 /* keep it in list and mark timer as pending */
762 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
763 status.state = RTE_TIMER_PENDING;
764 __TIMER_STAT_ADD(priv_timer, pending, 1);
765 status.owner = (int16_t)lcore_id;
766 /* The "RELEASE" ordering guarantees the memory
767 * operations above the status update are observed
768 * before the update by all threads
769 */
770 __atomic_store_n(&tim->status.u32, status.u32,
771 __ATOMIC_RELEASE);
772 __rte_timer_reset(tim, tim->expire + tim->period,
773 tim->period, lcore_id, tim->f, tim->arg, 1,
774 timer_data);
775 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
776 }
777 }
778 priv_timer[lcore_id].running_tim = NULL;
779 }
780
781 int
rte_timer_manage(void)782 rte_timer_manage(void)
783 {
784 struct rte_timer_data *timer_data;
785
786 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
787
788 __rte_timer_manage(timer_data);
789
790 return 0;
791 }
792
793 int
rte_timer_alt_manage(uint32_t timer_data_id,unsigned int * poll_lcores,int nb_poll_lcores,rte_timer_alt_manage_cb_t f)794 rte_timer_alt_manage(uint32_t timer_data_id,
795 unsigned int *poll_lcores,
796 int nb_poll_lcores,
797 rte_timer_alt_manage_cb_t f)
798 {
799 unsigned int default_poll_lcores[] = {rte_lcore_id()};
800 union rte_timer_status status;
801 struct rte_timer *tim, *next_tim, **pprev;
802 struct rte_timer *run_first_tims[RTE_MAX_LCORE];
803 unsigned int this_lcore = rte_lcore_id();
804 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
805 uint64_t cur_time;
806 int i, j, ret;
807 int nb_runlists = 0;
808 struct rte_timer_data *data;
809 struct priv_timer *privp;
810 uint32_t poll_lcore;
811
812 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
813
814 /* timer manager only runs on EAL thread with valid lcore_id */
815 assert(this_lcore < RTE_MAX_LCORE);
816
817 __TIMER_STAT_ADD(data->priv_timer, manage, 1);
818
819 if (poll_lcores == NULL) {
820 poll_lcores = default_poll_lcores;
821 nb_poll_lcores = RTE_DIM(default_poll_lcores);
822 }
823
824 for (i = 0; i < nb_poll_lcores; i++) {
825 poll_lcore = poll_lcores[i];
826 privp = &data->priv_timer[poll_lcore];
827
828 /* optimize for the case where per-cpu list is empty */
829 if (privp->pending_head.sl_next[0] == NULL)
830 continue;
831 cur_time = rte_get_timer_cycles();
832
833 #ifdef RTE_ARCH_64
834 /* on 64-bit the value cached in the pending_head.expired will
835 * be updated atomically, so we can consult that for a quick
836 * check here outside the lock
837 */
838 if (likely(privp->pending_head.expire > cur_time))
839 continue;
840 #endif
841
842 /* browse ordered list, add expired timers in 'expired' list */
843 rte_spinlock_lock(&privp->list_lock);
844
845 /* if nothing to do just unlock and return */
846 if (privp->pending_head.sl_next[0] == NULL ||
847 privp->pending_head.sl_next[0]->expire > cur_time) {
848 rte_spinlock_unlock(&privp->list_lock);
849 continue;
850 }
851
852 /* save start of list of expired timers */
853 tim = privp->pending_head.sl_next[0];
854
855 /* break the existing list at current time point */
856 timer_get_prev_entries(cur_time, poll_lcore, prev,
857 data->priv_timer);
858 for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
859 if (prev[j] == &privp->pending_head)
860 continue;
861 privp->pending_head.sl_next[j] =
862 prev[j]->sl_next[j];
863 if (prev[j]->sl_next[j] == NULL)
864 privp->curr_skiplist_depth--;
865
866 prev[j]->sl_next[j] = NULL;
867 }
868
869 /* transition run-list from PENDING to RUNNING */
870 run_first_tims[nb_runlists] = tim;
871 pprev = &run_first_tims[nb_runlists];
872 nb_runlists++;
873
874 for ( ; tim != NULL; tim = next_tim) {
875 next_tim = tim->sl_next[0];
876
877 ret = timer_set_running_state(tim);
878 if (likely(ret == 0)) {
879 pprev = &tim->sl_next[0];
880 } else {
881 /* another core is trying to re-config this one,
882 * remove it from local expired list
883 */
884 *pprev = next_tim;
885 }
886 }
887
888 /* update the next to expire timer value */
889 privp->pending_head.expire =
890 (privp->pending_head.sl_next[0] == NULL) ? 0 :
891 privp->pending_head.sl_next[0]->expire;
892
893 rte_spinlock_unlock(&privp->list_lock);
894 }
895
896 /* Now process the run lists */
897 while (1) {
898 bool done = true;
899 uint64_t min_expire = UINT64_MAX;
900 int min_idx = 0;
901
902 /* Find the next oldest timer to process */
903 for (i = 0; i < nb_runlists; i++) {
904 tim = run_first_tims[i];
905
906 if (tim != NULL && tim->expire < min_expire) {
907 min_expire = tim->expire;
908 min_idx = i;
909 done = false;
910 }
911 }
912
913 if (done)
914 break;
915
916 tim = run_first_tims[min_idx];
917
918 /* Move down the runlist from which we picked a timer to
919 * execute
920 */
921 run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
922
923 data->priv_timer[this_lcore].updated = 0;
924 data->priv_timer[this_lcore].running_tim = tim;
925
926 /* Call the provided callback function */
927 f(tim);
928
929 __TIMER_STAT_ADD(data->priv_timer, pending, -1);
930
931 /* the timer was stopped or reloaded by the callback
932 * function, we have nothing to do here
933 */
934 if (data->priv_timer[this_lcore].updated == 1)
935 continue;
936
937 if (tim->period == 0) {
938 /* remove from done list and mark timer as stopped */
939 status.state = RTE_TIMER_STOP;
940 status.owner = RTE_TIMER_NO_OWNER;
941 /* The "RELEASE" ordering guarantees the memory
942 * operations above the status update are observed
943 * before the update by all threads
944 */
945 __atomic_store_n(&tim->status.u32, status.u32,
946 __ATOMIC_RELEASE);
947 } else {
948 /* keep it in list and mark timer as pending */
949 rte_spinlock_lock(
950 &data->priv_timer[this_lcore].list_lock);
951 status.state = RTE_TIMER_PENDING;
952 __TIMER_STAT_ADD(data->priv_timer, pending, 1);
953 status.owner = (int16_t)this_lcore;
954 /* The "RELEASE" ordering guarantees the memory
955 * operations above the status update are observed
956 * before the update by all threads
957 */
958 __atomic_store_n(&tim->status.u32, status.u32,
959 __ATOMIC_RELEASE);
960 __rte_timer_reset(tim, tim->expire + tim->period,
961 tim->period, this_lcore, tim->f, tim->arg, 1,
962 data);
963 rte_spinlock_unlock(
964 &data->priv_timer[this_lcore].list_lock);
965 }
966
967 data->priv_timer[this_lcore].running_tim = NULL;
968 }
969
970 return 0;
971 }
972
973 /* Walk pending lists, stopping timers and calling user-specified function */
974 int
rte_timer_stop_all(uint32_t timer_data_id,unsigned int * walk_lcores,int nb_walk_lcores,rte_timer_stop_all_cb_t f,void * f_arg)975 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
976 int nb_walk_lcores,
977 rte_timer_stop_all_cb_t f, void *f_arg)
978 {
979 int i;
980 struct priv_timer *priv_timer;
981 uint32_t walk_lcore;
982 struct rte_timer *tim, *next_tim;
983 struct rte_timer_data *timer_data;
984
985 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
986
987 for (i = 0; i < nb_walk_lcores; i++) {
988 walk_lcore = walk_lcores[i];
989 priv_timer = &timer_data->priv_timer[walk_lcore];
990
991 rte_spinlock_lock(&priv_timer->list_lock);
992
993 for (tim = priv_timer->pending_head.sl_next[0];
994 tim != NULL;
995 tim = next_tim) {
996 next_tim = tim->sl_next[0];
997
998 /* Call timer_stop with lock held */
999 __rte_timer_stop(tim, 1, timer_data);
1000
1001 if (f)
1002 f(tim, f_arg);
1003 }
1004
1005 rte_spinlock_unlock(&priv_timer->list_lock);
1006 }
1007
1008 return 0;
1009 }
1010
1011 int64_t
rte_timer_next_ticks(void)1012 rte_timer_next_ticks(void)
1013 {
1014 unsigned int lcore_id = rte_lcore_id();
1015 struct rte_timer_data *timer_data;
1016 struct priv_timer *priv_timer;
1017 const struct rte_timer *tm;
1018 uint64_t cur_time;
1019 int64_t left = -ENOENT;
1020
1021 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
1022
1023 priv_timer = timer_data->priv_timer;
1024 cur_time = rte_get_timer_cycles();
1025
1026 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
1027 tm = priv_timer[lcore_id].pending_head.sl_next[0];
1028 if (tm) {
1029 left = tm->expire - cur_time;
1030 if (left < 0)
1031 left = 0;
1032 }
1033 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
1034
1035 return left;
1036 }
1037
1038 /* dump statistics about timers */
1039 static void
__rte_timer_dump_stats(struct rte_timer_data * timer_data __rte_unused,FILE * f)1040 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
1041 {
1042 #ifdef RTE_LIBRTE_TIMER_DEBUG
1043 struct rte_timer_debug_stats sum;
1044 unsigned lcore_id;
1045 struct priv_timer *priv_timer = timer_data->priv_timer;
1046
1047 memset(&sum, 0, sizeof(sum));
1048 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1049 sum.reset += priv_timer[lcore_id].stats.reset;
1050 sum.stop += priv_timer[lcore_id].stats.stop;
1051 sum.manage += priv_timer[lcore_id].stats.manage;
1052 sum.pending += priv_timer[lcore_id].stats.pending;
1053 }
1054 fprintf(f, "Timer statistics:\n");
1055 fprintf(f, " reset = %"PRIu64"\n", sum.reset);
1056 fprintf(f, " stop = %"PRIu64"\n", sum.stop);
1057 fprintf(f, " manage = %"PRIu64"\n", sum.manage);
1058 fprintf(f, " pending = %"PRIu64"\n", sum.pending);
1059 #else
1060 fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1061 #endif
1062 }
1063
1064 int
rte_timer_dump_stats(FILE * f)1065 rte_timer_dump_stats(FILE *f)
1066 {
1067 return rte_timer_alt_dump_stats(default_data_id, f);
1068 }
1069
1070 int
rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused,FILE * f)1071 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1072 {
1073 struct rte_timer_data *timer_data;
1074
1075 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1076
1077 __rte_timer_dump_stats(timer_data, f);
1078
1079 return 0;
1080 }
1081