xref: /f-stack/dpdk/lib/librte_timer/rte_timer.c (revision aa61e4b5)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <string.h>
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <stdbool.h>
9 #include <inttypes.h>
10 #include <assert.h>
11 #include <sys/queue.h>
12 
13 #include <rte_atomic.h>
14 #include <rte_common.h>
15 #include <rte_cycles.h>
16 #include <rte_eal_memconfig.h>
17 #include <rte_per_lcore.h>
18 #include <rte_memory.h>
19 #include <rte_launch.h>
20 #include <rte_eal.h>
21 #include <rte_lcore.h>
22 #include <rte_branch_prediction.h>
23 #include <rte_spinlock.h>
24 #include <rte_random.h>
25 #include <rte_pause.h>
26 #include <rte_memzone.h>
27 #include <rte_malloc.h>
28 #include <rte_errno.h>
29 
30 #include "rte_timer.h"
31 
32 /**
33  * Per-lcore info for timers.
34  */
35 struct priv_timer {
36 	struct rte_timer pending_head;  /**< dummy timer instance to head up list */
37 	rte_spinlock_t list_lock;       /**< lock to protect list access */
38 
39 	/** per-core variable that true if a timer was updated on this
40 	 *  core since last reset of the variable */
41 	int updated;
42 
43 	/** track the current depth of the skiplist */
44 	unsigned curr_skiplist_depth;
45 
46 	unsigned prev_lcore;              /**< used for lcore round robin */
47 
48 	/** running timer on this lcore now */
49 	struct rte_timer *running_tim;
50 
51 #ifdef RTE_LIBRTE_TIMER_DEBUG
52 	/** per-lcore statistics */
53 	struct rte_timer_debug_stats stats;
54 #endif
55 } __rte_cache_aligned;
56 
57 #define FL_ALLOCATED	(1 << 0)
58 struct rte_timer_data {
59 	struct priv_timer priv_timer[RTE_MAX_LCORE];
60 	uint8_t internal_flags;
61 };
62 
63 #define RTE_MAX_DATA_ELS 64
64 static const struct rte_memzone *rte_timer_data_mz;
65 static int *volatile rte_timer_mz_refcnt;
66 static struct rte_timer_data *rte_timer_data_arr;
67 static const uint32_t default_data_id;
68 static uint32_t rte_timer_subsystem_initialized;
69 
70 /* when debug is enabled, store some statistics */
71 #ifdef RTE_LIBRTE_TIMER_DEBUG
72 #define __TIMER_STAT_ADD(priv_timer, name, n) do {			\
73 		unsigned __lcore_id = rte_lcore_id();			\
74 		if (__lcore_id < RTE_MAX_LCORE)				\
75 			priv_timer[__lcore_id].stats.name += (n);	\
76 	} while(0)
77 #else
78 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
79 #endif
80 
81 static inline int
82 timer_data_valid(uint32_t id)
83 {
84 	return rte_timer_data_arr &&
85 		(rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
86 }
87 
88 /* validate ID and retrieve timer data pointer, or return error value */
89 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do {	\
90 	if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id))		\
91 		return retval;						\
92 	timer_data = &rte_timer_data_arr[id];				\
93 } while (0)
94 
95 int
96 rte_timer_data_alloc(uint32_t *id_ptr)
97 {
98 	int i;
99 	struct rte_timer_data *data;
100 
101 	if (!rte_timer_subsystem_initialized)
102 		return -ENOMEM;
103 
104 	for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
105 		data = &rte_timer_data_arr[i];
106 		if (!(data->internal_flags & FL_ALLOCATED)) {
107 			data->internal_flags |= FL_ALLOCATED;
108 
109 			if (id_ptr)
110 				*id_ptr = i;
111 
112 			return 0;
113 		}
114 	}
115 
116 	return -ENOSPC;
117 }
118 
119 int
120 rte_timer_data_dealloc(uint32_t id)
121 {
122 	struct rte_timer_data *timer_data;
123 	TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
124 
125 	timer_data->internal_flags &= ~(FL_ALLOCATED);
126 
127 	return 0;
128 }
129 
130 /* Init the timer library. Allocate an array of timer data structs in shared
131  * memory, and allocate the zeroth entry for use with original timer
132  * APIs. Since the intersection of the sets of lcore ids in primary and
133  * secondary processes should be empty, the zeroth entry can be shared by
134  * multiple processes.
135  */
136 int
137 rte_timer_subsystem_init(void)
138 {
139 	const struct rte_memzone *mz;
140 	struct rte_timer_data *data;
141 	int i, lcore_id;
142 	static const char *mz_name = "rte_timer_mz";
143 	const size_t data_arr_size =
144 			RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
145 	const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt);
146 	bool do_full_init = true;
147 
148 	if (rte_timer_subsystem_initialized)
149 		return -EALREADY;
150 
151 	rte_mcfg_timer_lock();
152 
153 	mz = rte_memzone_lookup(mz_name);
154 	if (mz == NULL) {
155 		mz = rte_memzone_reserve_aligned(mz_name, mem_size,
156 				SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
157 		if (mz == NULL) {
158 			rte_mcfg_timer_unlock();
159 			return -ENOMEM;
160 		}
161 		do_full_init = true;
162 	} else
163 		do_full_init = false;
164 
165 	rte_timer_data_mz = mz;
166 	rte_timer_data_arr = mz->addr;
167 	rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
168 
169 	if (do_full_init) {
170 		for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
171 			data = &rte_timer_data_arr[i];
172 
173 			for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
174 			     lcore_id++) {
175 				rte_spinlock_init(
176 					&data->priv_timer[lcore_id].list_lock);
177 				data->priv_timer[lcore_id].prev_lcore =
178 					lcore_id;
179 			}
180 		}
181 	}
182 
183 	rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
184 	(*rte_timer_mz_refcnt)++;
185 
186 	rte_mcfg_timer_unlock();
187 
188 	rte_timer_subsystem_initialized = 1;
189 
190 	return 0;
191 }
192 
193 void
194 rte_timer_subsystem_finalize(void)
195 {
196 	if (!rte_timer_subsystem_initialized)
197 		return;
198 
199 	rte_mcfg_timer_lock();
200 
201 	if (--(*rte_timer_mz_refcnt) == 0)
202 		rte_memzone_free(rte_timer_data_mz);
203 
204 	rte_mcfg_timer_unlock();
205 
206 	rte_timer_subsystem_initialized = 0;
207 }
208 
209 /* Initialize the timer handle tim for use */
210 void
211 rte_timer_init(struct rte_timer *tim)
212 {
213 	union rte_timer_status status;
214 
215 	status.state = RTE_TIMER_STOP;
216 	status.owner = RTE_TIMER_NO_OWNER;
217 	tim->status.u32 = status.u32;
218 }
219 
220 /*
221  * if timer is pending or stopped (or running on the same core than
222  * us), mark timer as configuring, and on success return the previous
223  * status of the timer
224  */
225 static int
226 timer_set_config_state(struct rte_timer *tim,
227 		       union rte_timer_status *ret_prev_status,
228 		       struct priv_timer *priv_timer)
229 {
230 	union rte_timer_status prev_status, status;
231 	int success = 0;
232 	unsigned lcore_id;
233 
234 	lcore_id = rte_lcore_id();
235 
236 	/* wait that the timer is in correct status before update,
237 	 * and mark it as being configured */
238 	while (success == 0) {
239 		prev_status.u32 = tim->status.u32;
240 
241 		/* timer is running on another core
242 		 * or ready to run on local core, exit
243 		 */
244 		if (prev_status.state == RTE_TIMER_RUNNING &&
245 		    (prev_status.owner != (uint16_t)lcore_id ||
246 		     tim != priv_timer[lcore_id].running_tim))
247 			return -1;
248 
249 		/* timer is being configured on another core */
250 		if (prev_status.state == RTE_TIMER_CONFIG)
251 			return -1;
252 
253 		/* here, we know that timer is stopped or pending,
254 		 * mark it atomically as being configured */
255 		status.state = RTE_TIMER_CONFIG;
256 		status.owner = (int16_t)lcore_id;
257 		success = rte_atomic32_cmpset(&tim->status.u32,
258 					      prev_status.u32,
259 					      status.u32);
260 	}
261 
262 	ret_prev_status->u32 = prev_status.u32;
263 	return 0;
264 }
265 
266 /*
267  * if timer is pending, mark timer as running
268  */
269 static int
270 timer_set_running_state(struct rte_timer *tim)
271 {
272 	union rte_timer_status prev_status, status;
273 	unsigned lcore_id = rte_lcore_id();
274 	int success = 0;
275 
276 	/* wait that the timer is in correct status before update,
277 	 * and mark it as running */
278 	while (success == 0) {
279 		prev_status.u32 = tim->status.u32;
280 
281 		/* timer is not pending anymore */
282 		if (prev_status.state != RTE_TIMER_PENDING)
283 			return -1;
284 
285 		/* here, we know that timer is stopped or pending,
286 		 * mark it atomically as being configured */
287 		status.state = RTE_TIMER_RUNNING;
288 		status.owner = (int16_t)lcore_id;
289 		success = rte_atomic32_cmpset(&tim->status.u32,
290 					      prev_status.u32,
291 					      status.u32);
292 	}
293 
294 	return 0;
295 }
296 
297 /*
298  * Return a skiplist level for a new entry.
299  * This probabilistically gives a level with p=1/4 that an entry at level n
300  * will also appear at level n+1.
301  */
302 static uint32_t
303 timer_get_skiplist_level(unsigned curr_depth)
304 {
305 #ifdef RTE_LIBRTE_TIMER_DEBUG
306 	static uint32_t i, count = 0;
307 	static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
308 #endif
309 
310 	/* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
311 	 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
312 	 * bit position of a (pseudo)random number.
313 	 */
314 	uint32_t rand = rte_rand() & (UINT32_MAX - 1);
315 	uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
316 
317 	/* limit the levels used to one above our current level, so we don't,
318 	 * for instance, have a level 0 and a level 7 without anything between
319 	 */
320 	if (level > curr_depth)
321 		level = curr_depth;
322 	if (level >= MAX_SKIPLIST_DEPTH)
323 		level = MAX_SKIPLIST_DEPTH-1;
324 #ifdef RTE_LIBRTE_TIMER_DEBUG
325 	count ++;
326 	levels[level]++;
327 	if (count % 10000 == 0)
328 		for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
329 			printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
330 #endif
331 	return level;
332 }
333 
334 /*
335  * For a given time value, get the entries at each level which
336  * are <= that time value.
337  */
338 static void
339 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
340 		       struct rte_timer **prev, struct priv_timer *priv_timer)
341 {
342 	unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
343 	prev[lvl] = &priv_timer[tim_lcore].pending_head;
344 	while(lvl != 0) {
345 		lvl--;
346 		prev[lvl] = prev[lvl+1];
347 		while (prev[lvl]->sl_next[lvl] &&
348 				prev[lvl]->sl_next[lvl]->expire <= time_val)
349 			prev[lvl] = prev[lvl]->sl_next[lvl];
350 	}
351 }
352 
353 /*
354  * Given a timer node in the skiplist, find the previous entries for it at
355  * all skiplist levels.
356  */
357 static void
358 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
359 				struct rte_timer **prev,
360 				struct priv_timer *priv_timer)
361 {
362 	int i;
363 
364 	/* to get a specific entry in the list, look for just lower than the time
365 	 * values, and then increment on each level individually if necessary
366 	 */
367 	timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
368 	for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
369 		while (prev[i]->sl_next[i] != NULL &&
370 				prev[i]->sl_next[i] != tim &&
371 				prev[i]->sl_next[i]->expire <= tim->expire)
372 			prev[i] = prev[i]->sl_next[i];
373 	}
374 }
375 
376 /* call with lock held as necessary
377  * add in list
378  * timer must be in config state
379  * timer must not be in a list
380  */
381 static void
382 timer_add(struct rte_timer *tim, unsigned int tim_lcore,
383 	  struct priv_timer *priv_timer)
384 {
385 	unsigned lvl;
386 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
387 
388 	/* find where exactly this element goes in the list of elements
389 	 * for each depth. */
390 	timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
391 
392 	/* now assign it a new level and add at that level */
393 	const unsigned tim_level = timer_get_skiplist_level(
394 			priv_timer[tim_lcore].curr_skiplist_depth);
395 	if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
396 		priv_timer[tim_lcore].curr_skiplist_depth++;
397 
398 	lvl = tim_level;
399 	while (lvl > 0) {
400 		tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
401 		prev[lvl]->sl_next[lvl] = tim;
402 		lvl--;
403 	}
404 	tim->sl_next[0] = prev[0]->sl_next[0];
405 	prev[0]->sl_next[0] = tim;
406 
407 	/* save the lowest list entry into the expire field of the dummy hdr
408 	 * NOTE: this is not atomic on 32-bit*/
409 	priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
410 			pending_head.sl_next[0]->expire;
411 }
412 
413 /*
414  * del from list, lock if needed
415  * timer must be in config state
416  * timer must be in a list
417  */
418 static void
419 timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
420 	  int local_is_locked, struct priv_timer *priv_timer)
421 {
422 	unsigned lcore_id = rte_lcore_id();
423 	unsigned prev_owner = prev_status.owner;
424 	int i;
425 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
426 
427 	/* if timer needs is pending another core, we need to lock the
428 	 * list; if it is on local core, we need to lock if we are not
429 	 * called from rte_timer_manage() */
430 	if (prev_owner != lcore_id || !local_is_locked)
431 		rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
432 
433 	/* save the lowest list entry into the expire field of the dummy hdr.
434 	 * NOTE: this is not atomic on 32-bit */
435 	if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
436 		priv_timer[prev_owner].pending_head.expire =
437 				((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
438 
439 	/* adjust pointers from previous entries to point past this */
440 	timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
441 	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
442 		if (prev[i]->sl_next[i] == tim)
443 			prev[i]->sl_next[i] = tim->sl_next[i];
444 	}
445 
446 	/* in case we deleted last entry at a level, adjust down max level */
447 	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
448 		if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
449 			priv_timer[prev_owner].curr_skiplist_depth --;
450 		else
451 			break;
452 
453 	if (prev_owner != lcore_id || !local_is_locked)
454 		rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
455 }
456 
457 /* Reset and start the timer associated with the timer handle (private func) */
458 static int
459 __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
460 		  uint64_t period, unsigned tim_lcore,
461 		  rte_timer_cb_t fct, void *arg,
462 		  int local_is_locked,
463 		  struct rte_timer_data *timer_data)
464 {
465 	union rte_timer_status prev_status, status;
466 	int ret;
467 	unsigned lcore_id = rte_lcore_id();
468 	struct priv_timer *priv_timer = timer_data->priv_timer;
469 
470 	/* round robin for tim_lcore */
471 	if (tim_lcore == (unsigned)LCORE_ID_ANY) {
472 		if (lcore_id < RTE_MAX_LCORE) {
473 			/* EAL thread with valid lcore_id */
474 			tim_lcore = rte_get_next_lcore(
475 				priv_timer[lcore_id].prev_lcore,
476 				0, 1);
477 			priv_timer[lcore_id].prev_lcore = tim_lcore;
478 		} else
479 			/* non-EAL thread do not run rte_timer_manage(),
480 			 * so schedule the timer on the first enabled lcore. */
481 			tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
482 	}
483 
484 	/* wait that the timer is in correct status before update,
485 	 * and mark it as being configured */
486 	ret = timer_set_config_state(tim, &prev_status, priv_timer);
487 	if (ret < 0)
488 		return -1;
489 
490 	__TIMER_STAT_ADD(priv_timer, reset, 1);
491 	if (prev_status.state == RTE_TIMER_RUNNING &&
492 	    lcore_id < RTE_MAX_LCORE) {
493 		priv_timer[lcore_id].updated = 1;
494 	}
495 
496 	/* remove it from list */
497 	if (prev_status.state == RTE_TIMER_PENDING) {
498 		timer_del(tim, prev_status, local_is_locked, priv_timer);
499 		__TIMER_STAT_ADD(priv_timer, pending, -1);
500 	}
501 
502 	tim->period = period;
503 	tim->expire = expire;
504 	tim->f = fct;
505 	tim->arg = arg;
506 
507 	/* if timer needs to be scheduled on another core, we need to
508 	 * lock the destination list; if it is on local core, we need to lock if
509 	 * we are not called from rte_timer_manage()
510 	 */
511 	if (tim_lcore != lcore_id || !local_is_locked)
512 		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
513 
514 	__TIMER_STAT_ADD(priv_timer, pending, 1);
515 	timer_add(tim, tim_lcore, priv_timer);
516 
517 	/* update state: as we are in CONFIG state, only us can modify
518 	 * the state so we don't need to use cmpset() here */
519 	rte_wmb();
520 	status.state = RTE_TIMER_PENDING;
521 	status.owner = (int16_t)tim_lcore;
522 	tim->status.u32 = status.u32;
523 
524 	if (tim_lcore != lcore_id || !local_is_locked)
525 		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
526 
527 	return 0;
528 }
529 
530 /* Reset and start the timer associated with the timer handle tim */
531 int
532 rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
533 		      enum rte_timer_type type, unsigned int tim_lcore,
534 		      rte_timer_cb_t fct, void *arg)
535 {
536 	return rte_timer_alt_reset(default_data_id, tim, ticks, type,
537 				   tim_lcore, fct, arg);
538 }
539 
540 int
541 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
542 		    uint64_t ticks, enum rte_timer_type type,
543 		    unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
544 {
545 	uint64_t cur_time = rte_get_timer_cycles();
546 	uint64_t period;
547 	struct rte_timer_data *timer_data;
548 
549 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
550 
551 	if (type == PERIODICAL)
552 		period = ticks;
553 	else
554 		period = 0;
555 
556 	return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
557 				 fct, arg, 0, timer_data);
558 }
559 
560 /* loop until rte_timer_reset() succeed */
561 void
562 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
563 		     enum rte_timer_type type, unsigned tim_lcore,
564 		     rte_timer_cb_t fct, void *arg)
565 {
566 	while (rte_timer_reset(tim, ticks, type, tim_lcore,
567 			       fct, arg) != 0)
568 		rte_pause();
569 }
570 
571 static int
572 __rte_timer_stop(struct rte_timer *tim, int local_is_locked,
573 		 struct rte_timer_data *timer_data)
574 {
575 	union rte_timer_status prev_status, status;
576 	unsigned lcore_id = rte_lcore_id();
577 	int ret;
578 	struct priv_timer *priv_timer = timer_data->priv_timer;
579 
580 	/* wait that the timer is in correct status before update,
581 	 * and mark it as being configured */
582 	ret = timer_set_config_state(tim, &prev_status, priv_timer);
583 	if (ret < 0)
584 		return -1;
585 
586 	__TIMER_STAT_ADD(priv_timer, stop, 1);
587 	if (prev_status.state == RTE_TIMER_RUNNING &&
588 	    lcore_id < RTE_MAX_LCORE) {
589 		priv_timer[lcore_id].updated = 1;
590 	}
591 
592 	/* remove it from list */
593 	if (prev_status.state == RTE_TIMER_PENDING) {
594 		timer_del(tim, prev_status, local_is_locked, priv_timer);
595 		__TIMER_STAT_ADD(priv_timer, pending, -1);
596 	}
597 
598 	/* mark timer as stopped */
599 	rte_wmb();
600 	status.state = RTE_TIMER_STOP;
601 	status.owner = RTE_TIMER_NO_OWNER;
602 	tim->status.u32 = status.u32;
603 
604 	return 0;
605 }
606 
607 /* Stop the timer associated with the timer handle tim */
608 int
609 rte_timer_stop(struct rte_timer *tim)
610 {
611 	return rte_timer_alt_stop(default_data_id, tim);
612 }
613 
614 int
615 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
616 {
617 	struct rte_timer_data *timer_data;
618 
619 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
620 
621 	return __rte_timer_stop(tim, 0, timer_data);
622 }
623 
624 /* loop until rte_timer_stop() succeed */
625 void
626 rte_timer_stop_sync(struct rte_timer *tim)
627 {
628 	while (rte_timer_stop(tim) != 0)
629 		rte_pause();
630 }
631 
632 /* Test the PENDING status of the timer handle tim */
633 int
634 rte_timer_pending(struct rte_timer *tim)
635 {
636 	return tim->status.state == RTE_TIMER_PENDING;
637 }
638 
639 /* must be called periodically, run all timer that expired */
640 static void
641 __rte_timer_manage(struct rte_timer_data *timer_data)
642 {
643 	union rte_timer_status status;
644 	struct rte_timer *tim, *next_tim;
645 	struct rte_timer *run_first_tim, **pprev;
646 	unsigned lcore_id = rte_lcore_id();
647 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
648 	uint64_t cur_time;
649 	int i, ret;
650 	struct priv_timer *priv_timer = timer_data->priv_timer;
651 
652 	/* timer manager only runs on EAL thread with valid lcore_id */
653 	assert(lcore_id < RTE_MAX_LCORE);
654 
655 	__TIMER_STAT_ADD(priv_timer, manage, 1);
656 	/* optimize for the case where per-cpu list is empty */
657 	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
658 		return;
659 	cur_time = rte_get_timer_cycles();
660 
661 #ifdef RTE_ARCH_64
662 	/* on 64-bit the value cached in the pending_head.expired will be
663 	 * updated atomically, so we can consult that for a quick check here
664 	 * outside the lock */
665 	if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
666 		return;
667 #endif
668 
669 	/* browse ordered list, add expired timers in 'expired' list */
670 	rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
671 
672 	/* if nothing to do just unlock and return */
673 	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
674 	    priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
675 		rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
676 		return;
677 	}
678 
679 	/* save start of list of expired timers */
680 	tim = priv_timer[lcore_id].pending_head.sl_next[0];
681 
682 	/* break the existing list at current time point */
683 	timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
684 	for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
685 		if (prev[i] == &priv_timer[lcore_id].pending_head)
686 			continue;
687 		priv_timer[lcore_id].pending_head.sl_next[i] =
688 		    prev[i]->sl_next[i];
689 		if (prev[i]->sl_next[i] == NULL)
690 			priv_timer[lcore_id].curr_skiplist_depth--;
691 		prev[i] ->sl_next[i] = NULL;
692 	}
693 
694 	/* transition run-list from PENDING to RUNNING */
695 	run_first_tim = tim;
696 	pprev = &run_first_tim;
697 
698 	for ( ; tim != NULL; tim = next_tim) {
699 		next_tim = tim->sl_next[0];
700 
701 		ret = timer_set_running_state(tim);
702 		if (likely(ret == 0)) {
703 			pprev = &tim->sl_next[0];
704 		} else {
705 			/* another core is trying to re-config this one,
706 			 * remove it from local expired list
707 			 */
708 			*pprev = next_tim;
709 		}
710 	}
711 
712 	/* update the next to expire timer value */
713 	priv_timer[lcore_id].pending_head.expire =
714 	    (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
715 		priv_timer[lcore_id].pending_head.sl_next[0]->expire;
716 
717 	rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
718 
719 	/* now scan expired list and call callbacks */
720 	for (tim = run_first_tim; tim != NULL; tim = next_tim) {
721 		next_tim = tim->sl_next[0];
722 		priv_timer[lcore_id].updated = 0;
723 		priv_timer[lcore_id].running_tim = tim;
724 
725 		/* execute callback function with list unlocked */
726 		tim->f(tim, tim->arg);
727 
728 		__TIMER_STAT_ADD(priv_timer, pending, -1);
729 		/* the timer was stopped or reloaded by the callback
730 		 * function, we have nothing to do here */
731 		if (priv_timer[lcore_id].updated == 1)
732 			continue;
733 
734 		if (tim->period == 0) {
735 			/* remove from done list and mark timer as stopped */
736 			status.state = RTE_TIMER_STOP;
737 			status.owner = RTE_TIMER_NO_OWNER;
738 			rte_wmb();
739 			tim->status.u32 = status.u32;
740 		}
741 		else {
742 			/* keep it in list and mark timer as pending */
743 			rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
744 			status.state = RTE_TIMER_PENDING;
745 			__TIMER_STAT_ADD(priv_timer, pending, 1);
746 			status.owner = (int16_t)lcore_id;
747 			rte_wmb();
748 			tim->status.u32 = status.u32;
749 			__rte_timer_reset(tim, tim->expire + tim->period,
750 				tim->period, lcore_id, tim->f, tim->arg, 1,
751 				timer_data);
752 			rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
753 		}
754 	}
755 	priv_timer[lcore_id].running_tim = NULL;
756 }
757 
758 int
759 rte_timer_manage(void)
760 {
761 	struct rte_timer_data *timer_data;
762 
763 	TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
764 
765 	__rte_timer_manage(timer_data);
766 
767 	return 0;
768 }
769 
770 int
771 rte_timer_alt_manage(uint32_t timer_data_id,
772 		     unsigned int *poll_lcores,
773 		     int nb_poll_lcores,
774 		     rte_timer_alt_manage_cb_t f)
775 {
776 	unsigned int default_poll_lcores[] = {rte_lcore_id()};
777 	union rte_timer_status status;
778 	struct rte_timer *tim, *next_tim, **pprev;
779 	struct rte_timer *run_first_tims[RTE_MAX_LCORE];
780 	unsigned int this_lcore = rte_lcore_id();
781 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
782 	uint64_t cur_time;
783 	int i, j, ret;
784 	int nb_runlists = 0;
785 	struct rte_timer_data *data;
786 	struct priv_timer *privp;
787 	uint32_t poll_lcore;
788 
789 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
790 
791 	/* timer manager only runs on EAL thread with valid lcore_id */
792 	assert(this_lcore < RTE_MAX_LCORE);
793 
794 	__TIMER_STAT_ADD(data->priv_timer, manage, 1);
795 
796 	if (poll_lcores == NULL) {
797 		poll_lcores = default_poll_lcores;
798 		nb_poll_lcores = RTE_DIM(default_poll_lcores);
799 	}
800 
801 	for (i = 0; i < nb_poll_lcores; i++) {
802 		poll_lcore = poll_lcores[i];
803 		privp = &data->priv_timer[poll_lcore];
804 
805 		/* optimize for the case where per-cpu list is empty */
806 		if (privp->pending_head.sl_next[0] == NULL)
807 			continue;
808 		cur_time = rte_get_timer_cycles();
809 
810 #ifdef RTE_ARCH_64
811 		/* on 64-bit the value cached in the pending_head.expired will
812 		 * be updated atomically, so we can consult that for a quick
813 		 * check here outside the lock
814 		 */
815 		if (likely(privp->pending_head.expire > cur_time))
816 			continue;
817 #endif
818 
819 		/* browse ordered list, add expired timers in 'expired' list */
820 		rte_spinlock_lock(&privp->list_lock);
821 
822 		/* if nothing to do just unlock and return */
823 		if (privp->pending_head.sl_next[0] == NULL ||
824 		    privp->pending_head.sl_next[0]->expire > cur_time) {
825 			rte_spinlock_unlock(&privp->list_lock);
826 			continue;
827 		}
828 
829 		/* save start of list of expired timers */
830 		tim = privp->pending_head.sl_next[0];
831 
832 		/* break the existing list at current time point */
833 		timer_get_prev_entries(cur_time, poll_lcore, prev,
834 				       data->priv_timer);
835 		for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
836 			if (prev[j] == &privp->pending_head)
837 				continue;
838 			privp->pending_head.sl_next[j] =
839 				prev[j]->sl_next[j];
840 			if (prev[j]->sl_next[j] == NULL)
841 				privp->curr_skiplist_depth--;
842 
843 			prev[j]->sl_next[j] = NULL;
844 		}
845 
846 		/* transition run-list from PENDING to RUNNING */
847 		run_first_tims[nb_runlists] = tim;
848 		pprev = &run_first_tims[nb_runlists];
849 		nb_runlists++;
850 
851 		for ( ; tim != NULL; tim = next_tim) {
852 			next_tim = tim->sl_next[0];
853 
854 			ret = timer_set_running_state(tim);
855 			if (likely(ret == 0)) {
856 				pprev = &tim->sl_next[0];
857 			} else {
858 				/* another core is trying to re-config this one,
859 				 * remove it from local expired list
860 				 */
861 				*pprev = next_tim;
862 			}
863 		}
864 
865 		/* update the next to expire timer value */
866 		privp->pending_head.expire =
867 		    (privp->pending_head.sl_next[0] == NULL) ? 0 :
868 			privp->pending_head.sl_next[0]->expire;
869 
870 		rte_spinlock_unlock(&privp->list_lock);
871 	}
872 
873 	/* Now process the run lists */
874 	while (1) {
875 		bool done = true;
876 		uint64_t min_expire = UINT64_MAX;
877 		int min_idx = 0;
878 
879 		/* Find the next oldest timer to process */
880 		for (i = 0; i < nb_runlists; i++) {
881 			tim = run_first_tims[i];
882 
883 			if (tim != NULL && tim->expire < min_expire) {
884 				min_expire = tim->expire;
885 				min_idx = i;
886 				done = false;
887 			}
888 		}
889 
890 		if (done)
891 			break;
892 
893 		tim = run_first_tims[min_idx];
894 
895 		/* Move down the runlist from which we picked a timer to
896 		 * execute
897 		 */
898 		run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
899 
900 		data->priv_timer[this_lcore].updated = 0;
901 		data->priv_timer[this_lcore].running_tim = tim;
902 
903 		/* Call the provided callback function */
904 		f(tim);
905 
906 		__TIMER_STAT_ADD(data->priv_timer, pending, -1);
907 
908 		/* the timer was stopped or reloaded by the callback
909 		 * function, we have nothing to do here
910 		 */
911 		if (data->priv_timer[this_lcore].updated == 1)
912 			continue;
913 
914 		if (tim->period == 0) {
915 			/* remove from done list and mark timer as stopped */
916 			status.state = RTE_TIMER_STOP;
917 			status.owner = RTE_TIMER_NO_OWNER;
918 			rte_wmb();
919 			tim->status.u32 = status.u32;
920 		} else {
921 			/* keep it in list and mark timer as pending */
922 			rte_spinlock_lock(
923 				&data->priv_timer[this_lcore].list_lock);
924 			status.state = RTE_TIMER_PENDING;
925 			__TIMER_STAT_ADD(data->priv_timer, pending, 1);
926 			status.owner = (int16_t)this_lcore;
927 			rte_wmb();
928 			tim->status.u32 = status.u32;
929 			__rte_timer_reset(tim, tim->expire + tim->period,
930 				tim->period, this_lcore, tim->f, tim->arg, 1,
931 				data);
932 			rte_spinlock_unlock(
933 				&data->priv_timer[this_lcore].list_lock);
934 		}
935 
936 		data->priv_timer[this_lcore].running_tim = NULL;
937 	}
938 
939 	return 0;
940 }
941 
942 /* Walk pending lists, stopping timers and calling user-specified function */
943 int
944 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
945 		   int nb_walk_lcores,
946 		   rte_timer_stop_all_cb_t f, void *f_arg)
947 {
948 	int i;
949 	struct priv_timer *priv_timer;
950 	uint32_t walk_lcore;
951 	struct rte_timer *tim, *next_tim;
952 	struct rte_timer_data *timer_data;
953 
954 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
955 
956 	for (i = 0; i < nb_walk_lcores; i++) {
957 		walk_lcore = walk_lcores[i];
958 		priv_timer = &timer_data->priv_timer[walk_lcore];
959 
960 		rte_spinlock_lock(&priv_timer->list_lock);
961 
962 		for (tim = priv_timer->pending_head.sl_next[0];
963 		     tim != NULL;
964 		     tim = next_tim) {
965 			next_tim = tim->sl_next[0];
966 
967 			/* Call timer_stop with lock held */
968 			__rte_timer_stop(tim, 1, timer_data);
969 
970 			if (f)
971 				f(tim, f_arg);
972 		}
973 
974 		rte_spinlock_unlock(&priv_timer->list_lock);
975 	}
976 
977 	return 0;
978 }
979 
980 /* dump statistics about timers */
981 static void
982 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
983 {
984 #ifdef RTE_LIBRTE_TIMER_DEBUG
985 	struct rte_timer_debug_stats sum;
986 	unsigned lcore_id;
987 	struct priv_timer *priv_timer = timer_data->priv_timer;
988 
989 	memset(&sum, 0, sizeof(sum));
990 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
991 		sum.reset += priv_timer[lcore_id].stats.reset;
992 		sum.stop += priv_timer[lcore_id].stats.stop;
993 		sum.manage += priv_timer[lcore_id].stats.manage;
994 		sum.pending += priv_timer[lcore_id].stats.pending;
995 	}
996 	fprintf(f, "Timer statistics:\n");
997 	fprintf(f, "  reset = %"PRIu64"\n", sum.reset);
998 	fprintf(f, "  stop = %"PRIu64"\n", sum.stop);
999 	fprintf(f, "  manage = %"PRIu64"\n", sum.manage);
1000 	fprintf(f, "  pending = %"PRIu64"\n", sum.pending);
1001 #else
1002 	fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1003 #endif
1004 }
1005 
1006 int
1007 rte_timer_dump_stats(FILE *f)
1008 {
1009 	return rte_timer_alt_dump_stats(default_data_id, f);
1010 }
1011 
1012 int
1013 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1014 {
1015 	struct rte_timer_data *timer_data;
1016 
1017 	TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1018 
1019 	__rte_timer_dump_stats(timer_data, f);
1020 
1021 	return 0;
1022 }
1023