xref: /f-stack/dpdk/lib/librte_timer/rte_timer.c (revision 92bcc6b4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <string.h>
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <stdbool.h>
9 #include <inttypes.h>
10 #include <assert.h>
11 #include <sys/queue.h>
12 
13 #include <rte_atomic.h>
14 #include <rte_common.h>
15 #include <rte_cycles.h>
16 #include <rte_eal_memconfig.h>
17 #include <rte_per_lcore.h>
18 #include <rte_memory.h>
19 #include <rte_launch.h>
20 #include <rte_eal.h>
21 #include <rte_lcore.h>
22 #include <rte_branch_prediction.h>
23 #include <rte_spinlock.h>
24 #include <rte_random.h>
25 #include <rte_pause.h>
26 #include <rte_memzone.h>
27 #include <rte_malloc.h>
28 #include <rte_errno.h>
29 
30 #include "rte_timer.h"
31 
32 /**
33  * Per-lcore info for timers.
34  */
35 struct priv_timer {
36 	struct rte_timer pending_head;  /**< dummy timer instance to head up list */
37 	rte_spinlock_t list_lock;       /**< lock to protect list access */
38 
39 	/** per-core variable that true if a timer was updated on this
40 	 *  core since last reset of the variable */
41 	int updated;
42 
43 	/** track the current depth of the skiplist */
44 	unsigned curr_skiplist_depth;
45 
46 	unsigned prev_lcore;              /**< used for lcore round robin */
47 
48 	/** running timer on this lcore now */
49 	struct rte_timer *running_tim;
50 
51 #ifdef RTE_LIBRTE_TIMER_DEBUG
52 	/** per-lcore statistics */
53 	struct rte_timer_debug_stats stats;
54 #endif
55 } __rte_cache_aligned;
56 
57 #define FL_ALLOCATED	(1 << 0)
58 struct rte_timer_data {
59 	struct priv_timer priv_timer[RTE_MAX_LCORE];
60 	uint8_t internal_flags;
61 };
62 
63 #define RTE_MAX_DATA_ELS 64
64 static const struct rte_memzone *rte_timer_data_mz;
65 static int *volatile rte_timer_mz_refcnt;
66 static struct rte_timer_data *rte_timer_data_arr;
67 static const uint32_t default_data_id;
68 static uint32_t rte_timer_subsystem_initialized;
69 
70 /* when debug is enabled, store some statistics */
71 #ifdef RTE_LIBRTE_TIMER_DEBUG
72 #define __TIMER_STAT_ADD(priv_timer, name, n) do {			\
73 		unsigned __lcore_id = rte_lcore_id();			\
74 		if (__lcore_id < RTE_MAX_LCORE)				\
75 			priv_timer[__lcore_id].stats.name += (n);	\
76 	} while(0)
77 #else
78 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
79 #endif
80 
81 static inline int
82 timer_data_valid(uint32_t id)
83 {
84 	return rte_timer_data_arr &&
85 		(rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
86 }
87 
88 /* validate ID and retrieve timer data pointer, or return error value */
89 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do {	\
90 	if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id))		\
91 		return retval;						\
92 	timer_data = &rte_timer_data_arr[id];				\
93 } while (0)
94 
95 int
96 rte_timer_data_alloc(uint32_t *id_ptr)
97 {
98 	int i;
99 	struct rte_timer_data *data;
100 
101 	if (!rte_timer_subsystem_initialized)
102 		return -ENOMEM;
103 
104 	for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
105 		data = &rte_timer_data_arr[i];
106 		if (!(data->internal_flags & FL_ALLOCATED)) {
107 			data->internal_flags |= FL_ALLOCATED;
108 
109 			if (id_ptr)
110 				*id_ptr = i;
111 
112 			return 0;
113 		}
114 	}
115 
116 	return -ENOSPC;
117 }
118 
119 int
120 rte_timer_data_dealloc(uint32_t id)
121 {
122 	struct rte_timer_data *timer_data;
123 	TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
124 
125 	timer_data->internal_flags &= ~(FL_ALLOCATED);
126 
127 	return 0;
128 }
129 
130 /* Init the timer library. Allocate an array of timer data structs in shared
131  * memory, and allocate the zeroth entry for use with original timer
132  * APIs. Since the intersection of the sets of lcore ids in primary and
133  * secondary processes should be empty, the zeroth entry can be shared by
134  * multiple processes.
135  */
136 int
137 rte_timer_subsystem_init(void)
138 {
139 	const struct rte_memzone *mz;
140 	struct rte_timer_data *data;
141 	int i, lcore_id;
142 	static const char *mz_name = "rte_timer_mz";
143 	const size_t data_arr_size =
144 			RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
145 	const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt);
146 	bool do_full_init = true;
147 
148 	rte_mcfg_timer_lock();
149 
150 	if (rte_timer_subsystem_initialized) {
151 		rte_mcfg_timer_unlock();
152 		return -EALREADY;
153 	}
154 
155 	mz = rte_memzone_lookup(mz_name);
156 	if (mz == NULL) {
157 		mz = rte_memzone_reserve_aligned(mz_name, mem_size,
158 				SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
159 		if (mz == NULL) {
160 			rte_mcfg_timer_unlock();
161 			return -ENOMEM;
162 		}
163 		do_full_init = true;
164 	} else
165 		do_full_init = false;
166 
167 	rte_timer_data_mz = mz;
168 	rte_timer_data_arr = mz->addr;
169 	rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
170 
171 	if (do_full_init) {
172 		for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
173 			data = &rte_timer_data_arr[i];
174 
175 			for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
176 			     lcore_id++) {
177 				rte_spinlock_init(
178 					&data->priv_timer[lcore_id].list_lock);
179 				data->priv_timer[lcore_id].prev_lcore =
180 					lcore_id;
181 			}
182 		}
183 	}
184 
185 	rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
186 	(*rte_timer_mz_refcnt)++;
187 
188 	rte_timer_subsystem_initialized = 1;
189 
190 	rte_mcfg_timer_unlock();
191 
192 	return 0;
193 }
194 
195 void
196 rte_timer_subsystem_finalize(void)
197 {
198 	rte_mcfg_timer_lock();
199 
200 	if (!rte_timer_subsystem_initialized) {
201 		rte_mcfg_timer_unlock();
202 		return;
203 	}
204 
205 	if (--(*rte_timer_mz_refcnt) == 0)
206 		rte_memzone_free(rte_timer_data_mz);
207 
208 	rte_timer_subsystem_initialized = 0;
209 
210 	rte_mcfg_timer_unlock();
211 }
212 
213 /* Initialize the timer handle tim for use */
214 void
215 rte_timer_init(struct rte_timer *tim)
216 {
217 	union rte_timer_status status;
218 
219 	status.state = RTE_TIMER_STOP;
220 	status.owner = RTE_TIMER_NO_OWNER;
221 	tim->status.u32 = status.u32;
222 }
223 
224 /*
225  * if timer is pending or stopped (or running on the same core than
226  * us), mark timer as configuring, and on success return the previous
227  * status of the timer
228  */
229 static int
230 timer_set_config_state(struct rte_timer *tim,
231 		       union rte_timer_status *ret_prev_status,
232 		       struct priv_timer *priv_timer)
233 {
234 	union rte_timer_status prev_status, status;
235 	int success = 0;
236 	unsigned lcore_id;
237 
238 	lcore_id = rte_lcore_id();
239 
240 	/* wait that the timer is in correct status before update,
241 	 * and mark it as being configured */
242 	while (success == 0) {
243 		prev_status.u32 = tim->status.u32;
244 
245 		/* timer is running on another core
246 		 * or ready to run on local core, exit
247 		 */
248 		if (prev_status.state == RTE_TIMER_RUNNING &&
249 		    (prev_status.owner != (uint16_t)lcore_id ||
250 		     tim != priv_timer[lcore_id].running_tim))
251 			return -1;
252 
253 		/* timer is being configured on another core */
254 		if (prev_status.state == RTE_TIMER_CONFIG)
255 			return -1;
256 
257 		/* here, we know that timer is stopped or pending,
258 		 * mark it atomically as being configured */
259 		status.state = RTE_TIMER_CONFIG;
260 		status.owner = (int16_t)lcore_id;
261 		success = rte_atomic32_cmpset(&tim->status.u32,
262 					      prev_status.u32,
263 					      status.u32);
264 	}
265 
266 	ret_prev_status->u32 = prev_status.u32;
267 	return 0;
268 }
269 
270 /*
271  * if timer is pending, mark timer as running
272  */
273 static int
274 timer_set_running_state(struct rte_timer *tim)
275 {
276 	union rte_timer_status prev_status, status;
277 	unsigned lcore_id = rte_lcore_id();
278 	int success = 0;
279 
280 	/* wait that the timer is in correct status before update,
281 	 * and mark it as running */
282 	while (success == 0) {
283 		prev_status.u32 = tim->status.u32;
284 
285 		/* timer is not pending anymore */
286 		if (prev_status.state != RTE_TIMER_PENDING)
287 			return -1;
288 
289 		/* here, we know that timer is stopped or pending,
290 		 * mark it atomically as being configured */
291 		status.state = RTE_TIMER_RUNNING;
292 		status.owner = (int16_t)lcore_id;
293 		success = rte_atomic32_cmpset(&tim->status.u32,
294 					      prev_status.u32,
295 					      status.u32);
296 	}
297 
298 	return 0;
299 }
300 
301 /*
302  * Return a skiplist level for a new entry.
303  * This probabilistically gives a level with p=1/4 that an entry at level n
304  * will also appear at level n+1.
305  */
306 static uint32_t
307 timer_get_skiplist_level(unsigned curr_depth)
308 {
309 #ifdef RTE_LIBRTE_TIMER_DEBUG
310 	static uint32_t i, count = 0;
311 	static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
312 #endif
313 
314 	/* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
315 	 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
316 	 * bit position of a (pseudo)random number.
317 	 */
318 	uint32_t rand = rte_rand() & (UINT32_MAX - 1);
319 	uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
320 
321 	/* limit the levels used to one above our current level, so we don't,
322 	 * for instance, have a level 0 and a level 7 without anything between
323 	 */
324 	if (level > curr_depth)
325 		level = curr_depth;
326 	if (level >= MAX_SKIPLIST_DEPTH)
327 		level = MAX_SKIPLIST_DEPTH-1;
328 #ifdef RTE_LIBRTE_TIMER_DEBUG
329 	count ++;
330 	levels[level]++;
331 	if (count % 10000 == 0)
332 		for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
333 			printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
334 #endif
335 	return level;
336 }
337 
338 /*
339  * For a given time value, get the entries at each level which
340  * are <= that time value.
341  */
342 static void
343 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
344 		       struct rte_timer **prev, struct priv_timer *priv_timer)
345 {
346 	unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
347 	prev[lvl] = &priv_timer[tim_lcore].pending_head;
348 	while(lvl != 0) {
349 		lvl--;
350 		prev[lvl] = prev[lvl+1];
351 		while (prev[lvl]->sl_next[lvl] &&
352 				prev[lvl]->sl_next[lvl]->expire <= time_val)
353 			prev[lvl] = prev[lvl]->sl_next[lvl];
354 	}
355 }
356 
357 /*
358  * Given a timer node in the skiplist, find the previous entries for it at
359  * all skiplist levels.
360  */
361 static void
362 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
363 				struct rte_timer **prev,
364 				struct priv_timer *priv_timer)
365 {
366 	int i;
367 
368 	/* to get a specific entry in the list, look for just lower than the time
369 	 * values, and then increment on each level individually if necessary
370 	 */
371 	timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
372 	for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
373 		while (prev[i]->sl_next[i] != NULL &&
374 				prev[i]->sl_next[i] != tim &&
375 				prev[i]->sl_next[i]->expire <= tim->expire)
376 			prev[i] = prev[i]->sl_next[i];
377 	}
378 }
379 
380 /* call with lock held as necessary
381  * add in list
382  * timer must be in config state
383  * timer must not be in a list
384  */
385 static void
386 timer_add(struct rte_timer *tim, unsigned int tim_lcore,
387 	  struct priv_timer *priv_timer)
388 {
389 	unsigned lvl;
390 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
391 
392 	/* find where exactly this element goes in the list of elements
393 	 * for each depth. */
394 	timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
395 
396 	/* now assign it a new level and add at that level */
397 	const unsigned tim_level = timer_get_skiplist_level(
398 			priv_timer[tim_lcore].curr_skiplist_depth);
399 	if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
400 		priv_timer[tim_lcore].curr_skiplist_depth++;
401 
402 	lvl = tim_level;
403 	while (lvl > 0) {
404 		tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
405 		prev[lvl]->sl_next[lvl] = tim;
406 		lvl--;
407 	}
408 	tim->sl_next[0] = prev[0]->sl_next[0];
409 	prev[0]->sl_next[0] = tim;
410 
411 	/* save the lowest list entry into the expire field of the dummy hdr
412 	 * NOTE: this is not atomic on 32-bit*/
413 	priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
414 			pending_head.sl_next[0]->expire;
415 }
416 
417 /*
418  * del from list, lock if needed
419  * timer must be in config state
420  * timer must be in a list
421  */
422 static void
423 timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
424 	  int local_is_locked, struct priv_timer *priv_timer)
425 {
426 	unsigned lcore_id = rte_lcore_id();
427 	unsigned prev_owner = prev_status.owner;
428 	int i;
429 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
430 
431 	/* if timer needs is pending another core, we need to lock the
432 	 * list; if it is on local core, we need to lock if we are not
433 	 * called from rte_timer_manage() */
434 	if (prev_owner != lcore_id || !local_is_locked)
435 		rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
436 
437 	/* save the lowest list entry into the expire field of the dummy hdr.
438 	 * NOTE: this is not atomic on 32-bit */
439 	if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
440 		priv_timer[prev_owner].pending_head.expire =
441 				((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
442 
443 	/* adjust pointers from previous entries to point past this */
444 	timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
445 	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
446 		if (prev[i]->sl_next[i] == tim)
447 			prev[i]->sl_next[i] = tim->sl_next[i];
448 	}
449 
450 	/* in case we deleted last entry at a level, adjust down max level */
451 	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
452 		if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
453 			priv_timer[prev_owner].curr_skiplist_depth --;
454 		else
455 			break;
456 
457 	if (prev_owner != lcore_id || !local_is_locked)
458 		rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
459 }
460 
461 /* Reset and start the timer associated with the timer handle (private func) */
462 static int
463 __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
464 		  uint64_t period, unsigned tim_lcore,
465 		  rte_timer_cb_t fct, void *arg,
466 		  int local_is_locked,
467 		  struct rte_timer_data *timer_data)
468 {
469 	union rte_timer_status prev_status, status;
470 	int ret;
471 	unsigned lcore_id = rte_lcore_id();
472 	struct priv_timer *priv_timer = timer_data->priv_timer;
473 
474 	/* round robin for tim_lcore */
475 	if (tim_lcore == (unsigned)LCORE_ID_ANY) {
476 		if (lcore_id < RTE_MAX_LCORE) {
477 			/* EAL thread with valid lcore_id */
478 			tim_lcore = rte_get_next_lcore(
479 				priv_timer[lcore_id].prev_lcore,
480 				0, 1);
481 			priv_timer[lcore_id].prev_lcore = tim_lcore;
482 		} else
483 			/* non-EAL thread do not run rte_timer_manage(),
484 			 * so schedule the timer on the first enabled lcore. */
485 			tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
486 	}
487 
488 	/* wait that the timer is in correct status before update,
489 	 * and mark it as being configured */
490 	ret = timer_set_config_state(tim, &prev_status, priv_timer);
491 	if (ret < 0)
492 		return -1;
493 
494 	__TIMER_STAT_ADD(priv_timer, reset, 1);
495 	if (prev_status.state == RTE_TIMER_RUNNING &&
496 	    lcore_id < RTE_MAX_LCORE) {
497 		priv_timer[lcore_id].updated = 1;
498 	}
499 
500 	/* remove it from list */
501 	if (prev_status.state == RTE_TIMER_PENDING) {
502 		timer_del(tim, prev_status, local_is_locked, priv_timer);
503 		__TIMER_STAT_ADD(priv_timer, pending, -1);
504 	}
505 
506 	tim->period = period;
507 	tim->expire = expire;
508 	tim->f = fct;
509 	tim->arg = arg;
510 
511 	/* if timer needs to be scheduled on another core, we need to
512 	 * lock the destination list; if it is on local core, we need to lock if
513 	 * we are not called from rte_timer_manage()
514 	 */
515 	if (tim_lcore != lcore_id || !local_is_locked)
516 		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
517 
518 	__TIMER_STAT_ADD(priv_timer, pending, 1);
519 	timer_add(tim, tim_lcore, priv_timer);
520 
521 	/* update state: as we are in CONFIG state, only us can modify
522 	 * the state so we don't need to use cmpset() here */
523 	rte_wmb();
524 	status.state = RTE_TIMER_PENDING;
525 	status.owner = (int16_t)tim_lcore;
526 	tim->status.u32 = status.u32;
527 
528 	if (tim_lcore != lcore_id || !local_is_locked)
529 		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
530 
531 	return 0;
532 }
533 
534 /* Reset and start the timer associated with the timer handle tim */
535 int
536 rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
537 		      enum rte_timer_type type, unsigned int tim_lcore,
538 		      rte_timer_cb_t fct, void *arg)
539 {
540 	return rte_timer_alt_reset(default_data_id, tim, ticks, type,
541 				   tim_lcore, fct, arg);
542 }
543 
544 int
545 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
546 		    uint64_t ticks, enum rte_timer_type type,
547 		    unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
548 {
549 	uint64_t cur_time = rte_get_timer_cycles();
550 	uint64_t period;
551 	struct rte_timer_data *timer_data;
552 
553 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
554 
555 	if (type == PERIODICAL)
556 		period = ticks;
557 	else
558 		period = 0;
559 
560 	return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
561 				 fct, arg, 0, timer_data);
562 }
563 
564 /* loop until rte_timer_reset() succeed */
565 void
566 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
567 		     enum rte_timer_type type, unsigned tim_lcore,
568 		     rte_timer_cb_t fct, void *arg)
569 {
570 	while (rte_timer_reset(tim, ticks, type, tim_lcore,
571 			       fct, arg) != 0)
572 		rte_pause();
573 }
574 
575 static int
576 __rte_timer_stop(struct rte_timer *tim, int local_is_locked,
577 		 struct rte_timer_data *timer_data)
578 {
579 	union rte_timer_status prev_status, status;
580 	unsigned lcore_id = rte_lcore_id();
581 	int ret;
582 	struct priv_timer *priv_timer = timer_data->priv_timer;
583 
584 	/* wait that the timer is in correct status before update,
585 	 * and mark it as being configured */
586 	ret = timer_set_config_state(tim, &prev_status, priv_timer);
587 	if (ret < 0)
588 		return -1;
589 
590 	__TIMER_STAT_ADD(priv_timer, stop, 1);
591 	if (prev_status.state == RTE_TIMER_RUNNING &&
592 	    lcore_id < RTE_MAX_LCORE) {
593 		priv_timer[lcore_id].updated = 1;
594 	}
595 
596 	/* remove it from list */
597 	if (prev_status.state == RTE_TIMER_PENDING) {
598 		timer_del(tim, prev_status, local_is_locked, priv_timer);
599 		__TIMER_STAT_ADD(priv_timer, pending, -1);
600 	}
601 
602 	/* mark timer as stopped */
603 	rte_wmb();
604 	status.state = RTE_TIMER_STOP;
605 	status.owner = RTE_TIMER_NO_OWNER;
606 	tim->status.u32 = status.u32;
607 
608 	return 0;
609 }
610 
611 /* Stop the timer associated with the timer handle tim */
612 int
613 rte_timer_stop(struct rte_timer *tim)
614 {
615 	return rte_timer_alt_stop(default_data_id, tim);
616 }
617 
618 int
619 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
620 {
621 	struct rte_timer_data *timer_data;
622 
623 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
624 
625 	return __rte_timer_stop(tim, 0, timer_data);
626 }
627 
628 /* loop until rte_timer_stop() succeed */
629 void
630 rte_timer_stop_sync(struct rte_timer *tim)
631 {
632 	while (rte_timer_stop(tim) != 0)
633 		rte_pause();
634 }
635 
636 /* Test the PENDING status of the timer handle tim */
637 int
638 rte_timer_pending(struct rte_timer *tim)
639 {
640 	return tim->status.state == RTE_TIMER_PENDING;
641 }
642 
643 /* must be called periodically, run all timer that expired */
644 static void
645 __rte_timer_manage(struct rte_timer_data *timer_data)
646 {
647 	union rte_timer_status status;
648 	struct rte_timer *tim, *next_tim;
649 	struct rte_timer *run_first_tim, **pprev;
650 	unsigned lcore_id = rte_lcore_id();
651 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
652 	uint64_t cur_time;
653 	int i, ret;
654 	struct priv_timer *priv_timer = timer_data->priv_timer;
655 
656 	/* timer manager only runs on EAL thread with valid lcore_id */
657 	assert(lcore_id < RTE_MAX_LCORE);
658 
659 	__TIMER_STAT_ADD(priv_timer, manage, 1);
660 	/* optimize for the case where per-cpu list is empty */
661 	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
662 		return;
663 	cur_time = rte_get_timer_cycles();
664 
665 #ifdef RTE_ARCH_64
666 	/* on 64-bit the value cached in the pending_head.expired will be
667 	 * updated atomically, so we can consult that for a quick check here
668 	 * outside the lock */
669 	if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
670 		return;
671 #endif
672 
673 	/* browse ordered list, add expired timers in 'expired' list */
674 	rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
675 
676 	/* if nothing to do just unlock and return */
677 	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
678 	    priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
679 		rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
680 		return;
681 	}
682 
683 	/* save start of list of expired timers */
684 	tim = priv_timer[lcore_id].pending_head.sl_next[0];
685 
686 	/* break the existing list at current time point */
687 	timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
688 	for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
689 		if (prev[i] == &priv_timer[lcore_id].pending_head)
690 			continue;
691 		priv_timer[lcore_id].pending_head.sl_next[i] =
692 		    prev[i]->sl_next[i];
693 		if (prev[i]->sl_next[i] == NULL)
694 			priv_timer[lcore_id].curr_skiplist_depth--;
695 		prev[i] ->sl_next[i] = NULL;
696 	}
697 
698 	/* transition run-list from PENDING to RUNNING */
699 	run_first_tim = tim;
700 	pprev = &run_first_tim;
701 
702 	for ( ; tim != NULL; tim = next_tim) {
703 		next_tim = tim->sl_next[0];
704 
705 		ret = timer_set_running_state(tim);
706 		if (likely(ret == 0)) {
707 			pprev = &tim->sl_next[0];
708 		} else {
709 			/* another core is trying to re-config this one,
710 			 * remove it from local expired list
711 			 */
712 			*pprev = next_tim;
713 		}
714 	}
715 
716 	/* update the next to expire timer value */
717 	priv_timer[lcore_id].pending_head.expire =
718 	    (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
719 		priv_timer[lcore_id].pending_head.sl_next[0]->expire;
720 
721 	rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
722 
723 	/* now scan expired list and call callbacks */
724 	for (tim = run_first_tim; tim != NULL; tim = next_tim) {
725 		next_tim = tim->sl_next[0];
726 		priv_timer[lcore_id].updated = 0;
727 		priv_timer[lcore_id].running_tim = tim;
728 
729 		/* execute callback function with list unlocked */
730 		tim->f(tim, tim->arg);
731 
732 		__TIMER_STAT_ADD(priv_timer, pending, -1);
733 		/* the timer was stopped or reloaded by the callback
734 		 * function, we have nothing to do here */
735 		if (priv_timer[lcore_id].updated == 1)
736 			continue;
737 
738 		if (tim->period == 0) {
739 			/* remove from done list and mark timer as stopped */
740 			status.state = RTE_TIMER_STOP;
741 			status.owner = RTE_TIMER_NO_OWNER;
742 			rte_wmb();
743 			tim->status.u32 = status.u32;
744 		}
745 		else {
746 			/* keep it in list and mark timer as pending */
747 			rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
748 			status.state = RTE_TIMER_PENDING;
749 			__TIMER_STAT_ADD(priv_timer, pending, 1);
750 			status.owner = (int16_t)lcore_id;
751 			rte_wmb();
752 			tim->status.u32 = status.u32;
753 			__rte_timer_reset(tim, tim->expire + tim->period,
754 				tim->period, lcore_id, tim->f, tim->arg, 1,
755 				timer_data);
756 			rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
757 		}
758 	}
759 	priv_timer[lcore_id].running_tim = NULL;
760 }
761 
762 int
763 rte_timer_manage(void)
764 {
765 	struct rte_timer_data *timer_data;
766 
767 	TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
768 
769 	__rte_timer_manage(timer_data);
770 
771 	return 0;
772 }
773 
774 int
775 rte_timer_alt_manage(uint32_t timer_data_id,
776 		     unsigned int *poll_lcores,
777 		     int nb_poll_lcores,
778 		     rte_timer_alt_manage_cb_t f)
779 {
780 	unsigned int default_poll_lcores[] = {rte_lcore_id()};
781 	union rte_timer_status status;
782 	struct rte_timer *tim, *next_tim, **pprev;
783 	struct rte_timer *run_first_tims[RTE_MAX_LCORE];
784 	unsigned int this_lcore = rte_lcore_id();
785 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
786 	uint64_t cur_time;
787 	int i, j, ret;
788 	int nb_runlists = 0;
789 	struct rte_timer_data *data;
790 	struct priv_timer *privp;
791 	uint32_t poll_lcore;
792 
793 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
794 
795 	/* timer manager only runs on EAL thread with valid lcore_id */
796 	assert(this_lcore < RTE_MAX_LCORE);
797 
798 	__TIMER_STAT_ADD(data->priv_timer, manage, 1);
799 
800 	if (poll_lcores == NULL) {
801 		poll_lcores = default_poll_lcores;
802 		nb_poll_lcores = RTE_DIM(default_poll_lcores);
803 	}
804 
805 	for (i = 0; i < nb_poll_lcores; i++) {
806 		poll_lcore = poll_lcores[i];
807 		privp = &data->priv_timer[poll_lcore];
808 
809 		/* optimize for the case where per-cpu list is empty */
810 		if (privp->pending_head.sl_next[0] == NULL)
811 			continue;
812 		cur_time = rte_get_timer_cycles();
813 
814 #ifdef RTE_ARCH_64
815 		/* on 64-bit the value cached in the pending_head.expired will
816 		 * be updated atomically, so we can consult that for a quick
817 		 * check here outside the lock
818 		 */
819 		if (likely(privp->pending_head.expire > cur_time))
820 			continue;
821 #endif
822 
823 		/* browse ordered list, add expired timers in 'expired' list */
824 		rte_spinlock_lock(&privp->list_lock);
825 
826 		/* if nothing to do just unlock and return */
827 		if (privp->pending_head.sl_next[0] == NULL ||
828 		    privp->pending_head.sl_next[0]->expire > cur_time) {
829 			rte_spinlock_unlock(&privp->list_lock);
830 			continue;
831 		}
832 
833 		/* save start of list of expired timers */
834 		tim = privp->pending_head.sl_next[0];
835 
836 		/* break the existing list at current time point */
837 		timer_get_prev_entries(cur_time, poll_lcore, prev,
838 				       data->priv_timer);
839 		for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
840 			if (prev[j] == &privp->pending_head)
841 				continue;
842 			privp->pending_head.sl_next[j] =
843 				prev[j]->sl_next[j];
844 			if (prev[j]->sl_next[j] == NULL)
845 				privp->curr_skiplist_depth--;
846 
847 			prev[j]->sl_next[j] = NULL;
848 		}
849 
850 		/* transition run-list from PENDING to RUNNING */
851 		run_first_tims[nb_runlists] = tim;
852 		pprev = &run_first_tims[nb_runlists];
853 		nb_runlists++;
854 
855 		for ( ; tim != NULL; tim = next_tim) {
856 			next_tim = tim->sl_next[0];
857 
858 			ret = timer_set_running_state(tim);
859 			if (likely(ret == 0)) {
860 				pprev = &tim->sl_next[0];
861 			} else {
862 				/* another core is trying to re-config this one,
863 				 * remove it from local expired list
864 				 */
865 				*pprev = next_tim;
866 			}
867 		}
868 
869 		/* update the next to expire timer value */
870 		privp->pending_head.expire =
871 		    (privp->pending_head.sl_next[0] == NULL) ? 0 :
872 			privp->pending_head.sl_next[0]->expire;
873 
874 		rte_spinlock_unlock(&privp->list_lock);
875 	}
876 
877 	/* Now process the run lists */
878 	while (1) {
879 		bool done = true;
880 		uint64_t min_expire = UINT64_MAX;
881 		int min_idx = 0;
882 
883 		/* Find the next oldest timer to process */
884 		for (i = 0; i < nb_runlists; i++) {
885 			tim = run_first_tims[i];
886 
887 			if (tim != NULL && tim->expire < min_expire) {
888 				min_expire = tim->expire;
889 				min_idx = i;
890 				done = false;
891 			}
892 		}
893 
894 		if (done)
895 			break;
896 
897 		tim = run_first_tims[min_idx];
898 
899 		/* Move down the runlist from which we picked a timer to
900 		 * execute
901 		 */
902 		run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
903 
904 		data->priv_timer[this_lcore].updated = 0;
905 		data->priv_timer[this_lcore].running_tim = tim;
906 
907 		/* Call the provided callback function */
908 		f(tim);
909 
910 		__TIMER_STAT_ADD(data->priv_timer, pending, -1);
911 
912 		/* the timer was stopped or reloaded by the callback
913 		 * function, we have nothing to do here
914 		 */
915 		if (data->priv_timer[this_lcore].updated == 1)
916 			continue;
917 
918 		if (tim->period == 0) {
919 			/* remove from done list and mark timer as stopped */
920 			status.state = RTE_TIMER_STOP;
921 			status.owner = RTE_TIMER_NO_OWNER;
922 			rte_wmb();
923 			tim->status.u32 = status.u32;
924 		} else {
925 			/* keep it in list and mark timer as pending */
926 			rte_spinlock_lock(
927 				&data->priv_timer[this_lcore].list_lock);
928 			status.state = RTE_TIMER_PENDING;
929 			__TIMER_STAT_ADD(data->priv_timer, pending, 1);
930 			status.owner = (int16_t)this_lcore;
931 			rte_wmb();
932 			tim->status.u32 = status.u32;
933 			__rte_timer_reset(tim, tim->expire + tim->period,
934 				tim->period, this_lcore, tim->f, tim->arg, 1,
935 				data);
936 			rte_spinlock_unlock(
937 				&data->priv_timer[this_lcore].list_lock);
938 		}
939 
940 		data->priv_timer[this_lcore].running_tim = NULL;
941 	}
942 
943 	return 0;
944 }
945 
946 /* Walk pending lists, stopping timers and calling user-specified function */
947 int
948 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
949 		   int nb_walk_lcores,
950 		   rte_timer_stop_all_cb_t f, void *f_arg)
951 {
952 	int i;
953 	struct priv_timer *priv_timer;
954 	uint32_t walk_lcore;
955 	struct rte_timer *tim, *next_tim;
956 	struct rte_timer_data *timer_data;
957 
958 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
959 
960 	for (i = 0; i < nb_walk_lcores; i++) {
961 		walk_lcore = walk_lcores[i];
962 		priv_timer = &timer_data->priv_timer[walk_lcore];
963 
964 		rte_spinlock_lock(&priv_timer->list_lock);
965 
966 		for (tim = priv_timer->pending_head.sl_next[0];
967 		     tim != NULL;
968 		     tim = next_tim) {
969 			next_tim = tim->sl_next[0];
970 
971 			/* Call timer_stop with lock held */
972 			__rte_timer_stop(tim, 1, timer_data);
973 
974 			if (f)
975 				f(tim, f_arg);
976 		}
977 
978 		rte_spinlock_unlock(&priv_timer->list_lock);
979 	}
980 
981 	return 0;
982 }
983 
984 /* dump statistics about timers */
985 static void
986 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
987 {
988 #ifdef RTE_LIBRTE_TIMER_DEBUG
989 	struct rte_timer_debug_stats sum;
990 	unsigned lcore_id;
991 	struct priv_timer *priv_timer = timer_data->priv_timer;
992 
993 	memset(&sum, 0, sizeof(sum));
994 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
995 		sum.reset += priv_timer[lcore_id].stats.reset;
996 		sum.stop += priv_timer[lcore_id].stats.stop;
997 		sum.manage += priv_timer[lcore_id].stats.manage;
998 		sum.pending += priv_timer[lcore_id].stats.pending;
999 	}
1000 	fprintf(f, "Timer statistics:\n");
1001 	fprintf(f, "  reset = %"PRIu64"\n", sum.reset);
1002 	fprintf(f, "  stop = %"PRIu64"\n", sum.stop);
1003 	fprintf(f, "  manage = %"PRIu64"\n", sum.manage);
1004 	fprintf(f, "  pending = %"PRIu64"\n", sum.pending);
1005 #else
1006 	fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1007 #endif
1008 }
1009 
1010 int
1011 rte_timer_dump_stats(FILE *f)
1012 {
1013 	return rte_timer_alt_dump_stats(default_data_id, f);
1014 }
1015 
1016 int
1017 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1018 {
1019 	struct rte_timer_data *timer_data;
1020 
1021 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1022 
1023 	__rte_timer_dump_stats(timer_data, f);
1024 
1025 	return 0;
1026 }
1027