xref: /f-stack/dpdk/lib/librte_timer/rte_timer.c (revision 490ee526)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <string.h>
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <inttypes.h>
9 #include <assert.h>
10 #include <sys/queue.h>
11 
12 #include <rte_atomic.h>
13 #include <rte_common.h>
14 #include <rte_cycles.h>
15 #include <rte_per_lcore.h>
16 #include <rte_memory.h>
17 #include <rte_launch.h>
18 #include <rte_eal.h>
19 #include <rte_lcore.h>
20 #include <rte_branch_prediction.h>
21 #include <rte_spinlock.h>
22 #include <rte_random.h>
23 #include <rte_pause.h>
24 
25 #include "rte_timer.h"
26 
27 LIST_HEAD(rte_timer_list, rte_timer);
28 
29 struct priv_timer {
30 	struct rte_timer pending_head;  /**< dummy timer instance to head up list */
31 	rte_spinlock_t list_lock;       /**< lock to protect list access */
32 
33 	/** per-core variable that true if a timer was updated on this
34 	 *  core since last reset of the variable */
35 	int updated;
36 
37 	/** track the current depth of the skiplist */
38 	unsigned curr_skiplist_depth;
39 
40 	unsigned prev_lcore;              /**< used for lcore round robin */
41 
42 	/** running timer on this lcore now */
43 	struct rte_timer *running_tim;
44 
45 #ifdef RTE_LIBRTE_TIMER_DEBUG
46 	/** per-lcore statistics */
47 	struct rte_timer_debug_stats stats;
48 #endif
49 } __rte_cache_aligned;
50 
51 /** per-lcore private info for timers */
52 static struct priv_timer priv_timer[RTE_MAX_LCORE];
53 
54 /* when debug is enabled, store some statistics */
55 #ifdef RTE_LIBRTE_TIMER_DEBUG
56 #define __TIMER_STAT_ADD(name, n) do {					\
57 		unsigned __lcore_id = rte_lcore_id();			\
58 		if (__lcore_id < RTE_MAX_LCORE)				\
59 			priv_timer[__lcore_id].stats.name += (n);	\
60 	} while(0)
61 #else
62 #define __TIMER_STAT_ADD(name, n) do {} while(0)
63 #endif
64 
65 /* Init the timer library. */
66 void
67 rte_timer_subsystem_init(void)
68 {
69 	unsigned lcore_id;
70 
71 	/* since priv_timer is static, it's zeroed by default, so only init some
72 	 * fields.
73 	 */
74 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) {
75 		rte_spinlock_init(&priv_timer[lcore_id].list_lock);
76 		priv_timer[lcore_id].prev_lcore = lcore_id;
77 	}
78 }
79 
80 /* Initialize the timer handle tim for use */
81 void
82 rte_timer_init(struct rte_timer *tim)
83 {
84 	union rte_timer_status status;
85 
86 	status.state = RTE_TIMER_STOP;
87 	status.owner = RTE_TIMER_NO_OWNER;
88 	tim->status.u32 = status.u32;
89 }
90 
91 /*
92  * if timer is pending or stopped (or running on the same core than
93  * us), mark timer as configuring, and on success return the previous
94  * status of the timer
95  */
96 static int
97 timer_set_config_state(struct rte_timer *tim,
98 		       union rte_timer_status *ret_prev_status)
99 {
100 	union rte_timer_status prev_status, status;
101 	int success = 0;
102 	unsigned lcore_id;
103 
104 	lcore_id = rte_lcore_id();
105 
106 	/* wait that the timer is in correct status before update,
107 	 * and mark it as being configured */
108 	while (success == 0) {
109 		prev_status.u32 = tim->status.u32;
110 
111 		/* timer is running on another core
112 		 * or ready to run on local core, exit
113 		 */
114 		if (prev_status.state == RTE_TIMER_RUNNING &&
115 		    (prev_status.owner != (uint16_t)lcore_id ||
116 		     tim != priv_timer[lcore_id].running_tim))
117 			return -1;
118 
119 		/* timer is being configured on another core */
120 		if (prev_status.state == RTE_TIMER_CONFIG)
121 			return -1;
122 
123 		/* here, we know that timer is stopped or pending,
124 		 * mark it atomically as being configured */
125 		status.state = RTE_TIMER_CONFIG;
126 		status.owner = (int16_t)lcore_id;
127 		success = rte_atomic32_cmpset(&tim->status.u32,
128 					      prev_status.u32,
129 					      status.u32);
130 	}
131 
132 	ret_prev_status->u32 = prev_status.u32;
133 	return 0;
134 }
135 
136 /*
137  * if timer is pending, mark timer as running
138  */
139 static int
140 timer_set_running_state(struct rte_timer *tim)
141 {
142 	union rte_timer_status prev_status, status;
143 	unsigned lcore_id = rte_lcore_id();
144 	int success = 0;
145 
146 	/* wait that the timer is in correct status before update,
147 	 * and mark it as running */
148 	while (success == 0) {
149 		prev_status.u32 = tim->status.u32;
150 
151 		/* timer is not pending anymore */
152 		if (prev_status.state != RTE_TIMER_PENDING)
153 			return -1;
154 
155 		/* here, we know that timer is stopped or pending,
156 		 * mark it atomically as being configured */
157 		status.state = RTE_TIMER_RUNNING;
158 		status.owner = (int16_t)lcore_id;
159 		success = rte_atomic32_cmpset(&tim->status.u32,
160 					      prev_status.u32,
161 					      status.u32);
162 	}
163 
164 	return 0;
165 }
166 
167 /*
168  * Return a skiplist level for a new entry.
169  * This probabilistically gives a level with p=1/4 that an entry at level n
170  * will also appear at level n+1.
171  */
172 static uint32_t
173 timer_get_skiplist_level(unsigned curr_depth)
174 {
175 #ifdef RTE_LIBRTE_TIMER_DEBUG
176 	static uint32_t i, count = 0;
177 	static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
178 #endif
179 
180 	/* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
181 	 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
182 	 * bit position of a (pseudo)random number.
183 	 */
184 	uint32_t rand = rte_rand() & (UINT32_MAX - 1);
185 	uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
186 
187 	/* limit the levels used to one above our current level, so we don't,
188 	 * for instance, have a level 0 and a level 7 without anything between
189 	 */
190 	if (level > curr_depth)
191 		level = curr_depth;
192 	if (level >= MAX_SKIPLIST_DEPTH)
193 		level = MAX_SKIPLIST_DEPTH-1;
194 #ifdef RTE_LIBRTE_TIMER_DEBUG
195 	count ++;
196 	levels[level]++;
197 	if (count % 10000 == 0)
198 		for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
199 			printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
200 #endif
201 	return level;
202 }
203 
204 /*
205  * For a given time value, get the entries at each level which
206  * are <= that time value.
207  */
208 static void
209 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
210 		struct rte_timer **prev)
211 {
212 	unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
213 	prev[lvl] = &priv_timer[tim_lcore].pending_head;
214 	while(lvl != 0) {
215 		lvl--;
216 		prev[lvl] = prev[lvl+1];
217 		while (prev[lvl]->sl_next[lvl] &&
218 				prev[lvl]->sl_next[lvl]->expire <= time_val)
219 			prev[lvl] = prev[lvl]->sl_next[lvl];
220 	}
221 }
222 
223 /*
224  * Given a timer node in the skiplist, find the previous entries for it at
225  * all skiplist levels.
226  */
227 static void
228 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
229 		struct rte_timer **prev)
230 {
231 	int i;
232 	/* to get a specific entry in the list, look for just lower than the time
233 	 * values, and then increment on each level individually if necessary
234 	 */
235 	timer_get_prev_entries(tim->expire - 1, tim_lcore, prev);
236 	for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
237 		while (prev[i]->sl_next[i] != NULL &&
238 				prev[i]->sl_next[i] != tim &&
239 				prev[i]->sl_next[i]->expire <= tim->expire)
240 			prev[i] = prev[i]->sl_next[i];
241 	}
242 }
243 
244 /*
245  * add in list, lock if needed
246  * timer must be in config state
247  * timer must not be in a list
248  */
249 static void
250 timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
251 {
252 	unsigned lcore_id = rte_lcore_id();
253 	unsigned lvl;
254 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
255 
256 	/* if timer needs to be scheduled on another core, we need to
257 	 * lock the list; if it is on local core, we need to lock if
258 	 * we are not called from rte_timer_manage() */
259 	if (tim_lcore != lcore_id || !local_is_locked)
260 		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
261 
262 	/* find where exactly this element goes in the list of elements
263 	 * for each depth. */
264 	timer_get_prev_entries(tim->expire, tim_lcore, prev);
265 
266 	/* now assign it a new level and add at that level */
267 	const unsigned tim_level = timer_get_skiplist_level(
268 			priv_timer[tim_lcore].curr_skiplist_depth);
269 	if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
270 		priv_timer[tim_lcore].curr_skiplist_depth++;
271 
272 	lvl = tim_level;
273 	while (lvl > 0) {
274 		tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
275 		prev[lvl]->sl_next[lvl] = tim;
276 		lvl--;
277 	}
278 	tim->sl_next[0] = prev[0]->sl_next[0];
279 	prev[0]->sl_next[0] = tim;
280 
281 	/* save the lowest list entry into the expire field of the dummy hdr
282 	 * NOTE: this is not atomic on 32-bit*/
283 	priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
284 			pending_head.sl_next[0]->expire;
285 
286 	if (tim_lcore != lcore_id || !local_is_locked)
287 		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
288 }
289 
290 /*
291  * del from list, lock if needed
292  * timer must be in config state
293  * timer must be in a list
294  */
295 static void
296 timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
297 		int local_is_locked)
298 {
299 	unsigned lcore_id = rte_lcore_id();
300 	unsigned prev_owner = prev_status.owner;
301 	int i;
302 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
303 
304 	/* if timer needs is pending another core, we need to lock the
305 	 * list; if it is on local core, we need to lock if we are not
306 	 * called from rte_timer_manage() */
307 	if (prev_owner != lcore_id || !local_is_locked)
308 		rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
309 
310 	/* save the lowest list entry into the expire field of the dummy hdr.
311 	 * NOTE: this is not atomic on 32-bit */
312 	if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
313 		priv_timer[prev_owner].pending_head.expire =
314 				((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
315 
316 	/* adjust pointers from previous entries to point past this */
317 	timer_get_prev_entries_for_node(tim, prev_owner, prev);
318 	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
319 		if (prev[i]->sl_next[i] == tim)
320 			prev[i]->sl_next[i] = tim->sl_next[i];
321 	}
322 
323 	/* in case we deleted last entry at a level, adjust down max level */
324 	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
325 		if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
326 			priv_timer[prev_owner].curr_skiplist_depth --;
327 		else
328 			break;
329 
330 	if (prev_owner != lcore_id || !local_is_locked)
331 		rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
332 }
333 
334 /* Reset and start the timer associated with the timer handle (private func) */
335 static int
336 __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
337 		  uint64_t period, unsigned tim_lcore,
338 		  rte_timer_cb_t fct, void *arg,
339 		  int local_is_locked)
340 {
341 	union rte_timer_status prev_status, status;
342 	int ret;
343 	unsigned lcore_id = rte_lcore_id();
344 
345 	/* round robin for tim_lcore */
346 	if (tim_lcore == (unsigned)LCORE_ID_ANY) {
347 		if (lcore_id < RTE_MAX_LCORE) {
348 			/* EAL thread with valid lcore_id */
349 			tim_lcore = rte_get_next_lcore(
350 				priv_timer[lcore_id].prev_lcore,
351 				0, 1);
352 			priv_timer[lcore_id].prev_lcore = tim_lcore;
353 		} else
354 			/* non-EAL thread do not run rte_timer_manage(),
355 			 * so schedule the timer on the first enabled lcore. */
356 			tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
357 	}
358 
359 	/* wait that the timer is in correct status before update,
360 	 * and mark it as being configured */
361 	ret = timer_set_config_state(tim, &prev_status);
362 	if (ret < 0)
363 		return -1;
364 
365 	__TIMER_STAT_ADD(reset, 1);
366 	if (prev_status.state == RTE_TIMER_RUNNING &&
367 	    lcore_id < RTE_MAX_LCORE) {
368 		priv_timer[lcore_id].updated = 1;
369 	}
370 
371 	/* remove it from list */
372 	if (prev_status.state == RTE_TIMER_PENDING) {
373 		timer_del(tim, prev_status, local_is_locked);
374 		__TIMER_STAT_ADD(pending, -1);
375 	}
376 
377 	tim->period = period;
378 	tim->expire = expire;
379 	tim->f = fct;
380 	tim->arg = arg;
381 
382 	__TIMER_STAT_ADD(pending, 1);
383 	timer_add(tim, tim_lcore, local_is_locked);
384 
385 	/* update state: as we are in CONFIG state, only us can modify
386 	 * the state so we don't need to use cmpset() here */
387 	rte_wmb();
388 	status.state = RTE_TIMER_PENDING;
389 	status.owner = (int16_t)tim_lcore;
390 	tim->status.u32 = status.u32;
391 
392 	return 0;
393 }
394 
395 /* Reset and start the timer associated with the timer handle tim */
396 int
397 rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
398 		enum rte_timer_type type, unsigned tim_lcore,
399 		rte_timer_cb_t fct, void *arg)
400 {
401 	uint64_t cur_time = rte_get_timer_cycles();
402 	uint64_t period;
403 
404 	if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) &&
405 			!(rte_lcore_is_enabled(tim_lcore) ||
406 			  rte_lcore_has_role(tim_lcore, ROLE_SERVICE))))
407 		return -1;
408 
409 	if (type == PERIODICAL)
410 		period = ticks;
411 	else
412 		period = 0;
413 
414 	return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
415 			  fct, arg, 0);
416 }
417 
418 /* loop until rte_timer_reset() succeed */
419 void
420 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
421 		     enum rte_timer_type type, unsigned tim_lcore,
422 		     rte_timer_cb_t fct, void *arg)
423 {
424 	while (rte_timer_reset(tim, ticks, type, tim_lcore,
425 			       fct, arg) != 0)
426 		rte_pause();
427 }
428 
429 /* Stop the timer associated with the timer handle tim */
430 int
431 rte_timer_stop(struct rte_timer *tim)
432 {
433 	union rte_timer_status prev_status, status;
434 	unsigned lcore_id = rte_lcore_id();
435 	int ret;
436 
437 	/* wait that the timer is in correct status before update,
438 	 * and mark it as being configured */
439 	ret = timer_set_config_state(tim, &prev_status);
440 	if (ret < 0)
441 		return -1;
442 
443 	__TIMER_STAT_ADD(stop, 1);
444 	if (prev_status.state == RTE_TIMER_RUNNING &&
445 	    lcore_id < RTE_MAX_LCORE) {
446 		priv_timer[lcore_id].updated = 1;
447 	}
448 
449 	/* remove it from list */
450 	if (prev_status.state == RTE_TIMER_PENDING) {
451 		timer_del(tim, prev_status, 0);
452 		__TIMER_STAT_ADD(pending, -1);
453 	}
454 
455 	/* mark timer as stopped */
456 	rte_wmb();
457 	status.state = RTE_TIMER_STOP;
458 	status.owner = RTE_TIMER_NO_OWNER;
459 	tim->status.u32 = status.u32;
460 
461 	return 0;
462 }
463 
464 /* loop until rte_timer_stop() succeed */
465 void
466 rte_timer_stop_sync(struct rte_timer *tim)
467 {
468 	while (rte_timer_stop(tim) != 0)
469 		rte_pause();
470 }
471 
472 /* Test the PENDING status of the timer handle tim */
473 int
474 rte_timer_pending(struct rte_timer *tim)
475 {
476 	return tim->status.state == RTE_TIMER_PENDING;
477 }
478 
479 /* must be called periodically, run all timer that expired */
480 void rte_timer_manage(void)
481 {
482 	union rte_timer_status status;
483 	struct rte_timer *tim, *next_tim;
484 	struct rte_timer *run_first_tim, **pprev;
485 	unsigned lcore_id = rte_lcore_id();
486 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
487 	uint64_t cur_time;
488 	int i, ret;
489 
490 	/* timer manager only runs on EAL thread with valid lcore_id */
491 	assert(lcore_id < RTE_MAX_LCORE);
492 
493 	__TIMER_STAT_ADD(manage, 1);
494 	/* optimize for the case where per-cpu list is empty */
495 	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
496 		return;
497 	cur_time = rte_get_timer_cycles();
498 
499 #ifdef RTE_ARCH_64
500 	/* on 64-bit the value cached in the pending_head.expired will be
501 	 * updated atomically, so we can consult that for a quick check here
502 	 * outside the lock */
503 	if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
504 		return;
505 #endif
506 
507 	/* browse ordered list, add expired timers in 'expired' list */
508 	rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
509 
510 	/* if nothing to do just unlock and return */
511 	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
512 	    priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
513 		rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
514 		return;
515 	}
516 
517 	/* save start of list of expired timers */
518 	tim = priv_timer[lcore_id].pending_head.sl_next[0];
519 
520 	/* break the existing list at current time point */
521 	timer_get_prev_entries(cur_time, lcore_id, prev);
522 	for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
523 		if (prev[i] == &priv_timer[lcore_id].pending_head)
524 			continue;
525 		priv_timer[lcore_id].pending_head.sl_next[i] =
526 		    prev[i]->sl_next[i];
527 		if (prev[i]->sl_next[i] == NULL)
528 			priv_timer[lcore_id].curr_skiplist_depth--;
529 		prev[i] ->sl_next[i] = NULL;
530 	}
531 
532 	/* transition run-list from PENDING to RUNNING */
533 	run_first_tim = tim;
534 	pprev = &run_first_tim;
535 
536 	for ( ; tim != NULL; tim = next_tim) {
537 		next_tim = tim->sl_next[0];
538 
539 		ret = timer_set_running_state(tim);
540 		if (likely(ret == 0)) {
541 			pprev = &tim->sl_next[0];
542 		} else {
543 			/* another core is trying to re-config this one,
544 			 * remove it from local expired list
545 			 */
546 			*pprev = next_tim;
547 		}
548 	}
549 
550 	/* update the next to expire timer value */
551 	priv_timer[lcore_id].pending_head.expire =
552 	    (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
553 		priv_timer[lcore_id].pending_head.sl_next[0]->expire;
554 
555 	rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
556 
557 	/* now scan expired list and call callbacks */
558 	for (tim = run_first_tim; tim != NULL; tim = next_tim) {
559 		next_tim = tim->sl_next[0];
560 		priv_timer[lcore_id].updated = 0;
561 		priv_timer[lcore_id].running_tim = tim;
562 
563 		/* execute callback function with list unlocked */
564 		tim->f(tim, tim->arg);
565 
566 		__TIMER_STAT_ADD(pending, -1);
567 		/* the timer was stopped or reloaded by the callback
568 		 * function, we have nothing to do here */
569 		if (priv_timer[lcore_id].updated == 1)
570 			continue;
571 
572 		if (tim->period == 0) {
573 			/* remove from done list and mark timer as stopped */
574 			status.state = RTE_TIMER_STOP;
575 			status.owner = RTE_TIMER_NO_OWNER;
576 			rte_wmb();
577 			tim->status.u32 = status.u32;
578 		}
579 		else {
580 			/* keep it in list and mark timer as pending */
581 			rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
582 			status.state = RTE_TIMER_PENDING;
583 			__TIMER_STAT_ADD(pending, 1);
584 			status.owner = (int16_t)lcore_id;
585 			rte_wmb();
586 			tim->status.u32 = status.u32;
587 			__rte_timer_reset(tim, tim->expire + tim->period,
588 				tim->period, lcore_id, tim->f, tim->arg, 1);
589 			rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
590 		}
591 	}
592 	priv_timer[lcore_id].running_tim = NULL;
593 }
594 
595 /* dump statistics about timers */
596 void rte_timer_dump_stats(FILE *f)
597 {
598 #ifdef RTE_LIBRTE_TIMER_DEBUG
599 	struct rte_timer_debug_stats sum;
600 	unsigned lcore_id;
601 
602 	memset(&sum, 0, sizeof(sum));
603 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
604 		sum.reset += priv_timer[lcore_id].stats.reset;
605 		sum.stop += priv_timer[lcore_id].stats.stop;
606 		sum.manage += priv_timer[lcore_id].stats.manage;
607 		sum.pending += priv_timer[lcore_id].stats.pending;
608 	}
609 	fprintf(f, "Timer statistics:\n");
610 	fprintf(f, "  reset = %"PRIu64"\n", sum.reset);
611 	fprintf(f, "  stop = %"PRIu64"\n", sum.stop);
612 	fprintf(f, "  manage = %"PRIu64"\n", sum.manage);
613 	fprintf(f, "  pending = %"PRIu64"\n", sum.pending);
614 #else
615 	fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
616 #endif
617 }
618