15af785ecSfengbojiang(姜凤波) /*
2*d30ea906Sjfb8856606  * SPDX-License-Identifier: BSD-3-Clause
3*d30ea906Sjfb8856606  * Copyright 2015 Intel Corporation.
4*d30ea906Sjfb8856606  * Copyright 2012 Hasan Alayli <[email protected]>
55af785ecSfengbojiang(姜凤波)  */
65af785ecSfengbojiang(姜凤波) 
7a9643ea8Slogwang #define RTE_MEM 1
8a9643ea8Slogwang 
9a9643ea8Slogwang #include <stdio.h>
10a9643ea8Slogwang #include <stdlib.h>
11a9643ea8Slogwang #include <string.h>
12a9643ea8Slogwang #include <stdint.h>
13a9643ea8Slogwang #include <stddef.h>
14a9643ea8Slogwang #include <limits.h>
15a9643ea8Slogwang #include <inttypes.h>
16a9643ea8Slogwang #include <unistd.h>
17a9643ea8Slogwang #include <pthread.h>
18a9643ea8Slogwang #include <fcntl.h>
19a9643ea8Slogwang #include <sys/time.h>
20a9643ea8Slogwang #include <sys/mman.h>
21a9643ea8Slogwang #include <sched.h>
22a9643ea8Slogwang 
23a9643ea8Slogwang #include <rte_prefetch.h>
24a9643ea8Slogwang #include <rte_per_lcore.h>
25a9643ea8Slogwang #include <rte_atomic.h>
26a9643ea8Slogwang #include <rte_atomic_64.h>
27a9643ea8Slogwang #include <rte_log.h>
28a9643ea8Slogwang #include <rte_common.h>
29a9643ea8Slogwang #include <rte_branch_prediction.h>
30a9643ea8Slogwang 
31a9643ea8Slogwang #include "lthread_api.h"
32a9643ea8Slogwang #include "lthread_int.h"
33a9643ea8Slogwang #include "lthread_sched.h"
34a9643ea8Slogwang #include "lthread_objcache.h"
35a9643ea8Slogwang #include "lthread_timer.h"
36a9643ea8Slogwang #include "lthread_mutex.h"
37a9643ea8Slogwang #include "lthread_cond.h"
38a9643ea8Slogwang #include "lthread_tls.h"
39a9643ea8Slogwang #include "lthread_diag.h"
40a9643ea8Slogwang 
41a9643ea8Slogwang /*
42a9643ea8Slogwang  * This file implements the lthread scheduler
43a9643ea8Slogwang  * The scheduler is the function lthread_run()
44a9643ea8Slogwang  * This must be run as the main loop of an EAL thread.
45a9643ea8Slogwang  *
46a9643ea8Slogwang  * Currently once a scheduler is created it cannot be destroyed
47a9643ea8Slogwang  * When a scheduler shuts down it is assumed that the application is terminating
48a9643ea8Slogwang  */
49a9643ea8Slogwang 
50a9643ea8Slogwang static rte_atomic16_t num_schedulers;
51a9643ea8Slogwang static rte_atomic16_t active_schedulers;
52a9643ea8Slogwang 
53a9643ea8Slogwang /* one scheduler per lcore */
54a9643ea8Slogwang RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
55a9643ea8Slogwang 
56a9643ea8Slogwang struct lthread_sched *schedcore[LTHREAD_MAX_LCORES];
57a9643ea8Slogwang 
58a9643ea8Slogwang diag_callback diag_cb;
59a9643ea8Slogwang 
60a9643ea8Slogwang uint64_t diag_mask;
61a9643ea8Slogwang 
62a9643ea8Slogwang 
63a9643ea8Slogwang /* constructor */
RTE_INIT(lthread_sched_ctor)642bfe3f2eSlogwang RTE_INIT(lthread_sched_ctor)
65a9643ea8Slogwang {
66a9643ea8Slogwang 	memset(schedcore, 0, sizeof(schedcore));
67a9643ea8Slogwang 	rte_atomic16_init(&num_schedulers);
68a9643ea8Slogwang 	rte_atomic16_set(&num_schedulers, 1);
69a9643ea8Slogwang 	rte_atomic16_init(&active_schedulers);
70a9643ea8Slogwang 	rte_atomic16_set(&active_schedulers, 0);
71a9643ea8Slogwang 	diag_cb = NULL;
72a9643ea8Slogwang }
73a9643ea8Slogwang 
74a9643ea8Slogwang 
75a9643ea8Slogwang enum sched_alloc_phase {
76a9643ea8Slogwang 	SCHED_ALLOC_OK,
77a9643ea8Slogwang 	SCHED_ALLOC_QNODE_POOL,
78a9643ea8Slogwang 	SCHED_ALLOC_READY_QUEUE,
79a9643ea8Slogwang 	SCHED_ALLOC_PREADY_QUEUE,
80a9643ea8Slogwang 	SCHED_ALLOC_LTHREAD_CACHE,
81a9643ea8Slogwang 	SCHED_ALLOC_STACK_CACHE,
82a9643ea8Slogwang 	SCHED_ALLOC_PERLT_CACHE,
83a9643ea8Slogwang 	SCHED_ALLOC_TLS_CACHE,
84a9643ea8Slogwang 	SCHED_ALLOC_COND_CACHE,
85a9643ea8Slogwang 	SCHED_ALLOC_MUTEX_CACHE,
86a9643ea8Slogwang };
87a9643ea8Slogwang 
88a9643ea8Slogwang static int
_lthread_sched_alloc_resources(struct lthread_sched * new_sched)89a9643ea8Slogwang _lthread_sched_alloc_resources(struct lthread_sched *new_sched)
90a9643ea8Slogwang {
91a9643ea8Slogwang 	int alloc_status;
92a9643ea8Slogwang 
93a9643ea8Slogwang 	do {
94a9643ea8Slogwang 		/* Initialize per scheduler queue node pool */
95a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_QNODE_POOL;
96a9643ea8Slogwang 		new_sched->qnode_pool =
97a9643ea8Slogwang 			_qnode_pool_create("qnode pool", LTHREAD_PREALLOC);
98a9643ea8Slogwang 		if (new_sched->qnode_pool == NULL)
99a9643ea8Slogwang 			break;
100a9643ea8Slogwang 
101a9643ea8Slogwang 		/* Initialize per scheduler local ready queue */
102a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_READY_QUEUE;
103a9643ea8Slogwang 		new_sched->ready = _lthread_queue_create("ready queue");
104a9643ea8Slogwang 		if (new_sched->ready == NULL)
105a9643ea8Slogwang 			break;
106a9643ea8Slogwang 
107a9643ea8Slogwang 		/* Initialize per scheduler local peer ready queue */
108a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_PREADY_QUEUE;
109a9643ea8Slogwang 		new_sched->pready = _lthread_queue_create("pready queue");
110a9643ea8Slogwang 		if (new_sched->pready == NULL)
111a9643ea8Slogwang 			break;
112a9643ea8Slogwang 
113a9643ea8Slogwang 		/* Initialize per scheduler local free lthread cache */
114a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_LTHREAD_CACHE;
115a9643ea8Slogwang 		new_sched->lthread_cache =
116a9643ea8Slogwang 			_lthread_objcache_create("lthread cache",
117a9643ea8Slogwang 						sizeof(struct lthread),
118a9643ea8Slogwang 						LTHREAD_PREALLOC);
119a9643ea8Slogwang 		if (new_sched->lthread_cache == NULL)
120a9643ea8Slogwang 			break;
121a9643ea8Slogwang 
122a9643ea8Slogwang 		/* Initialize per scheduler local free stack cache */
123a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_STACK_CACHE;
124a9643ea8Slogwang 		new_sched->stack_cache =
125a9643ea8Slogwang 			_lthread_objcache_create("stack_cache",
126a9643ea8Slogwang 						sizeof(struct lthread_stack),
127a9643ea8Slogwang 						LTHREAD_PREALLOC);
128a9643ea8Slogwang 		if (new_sched->stack_cache == NULL)
129a9643ea8Slogwang 			break;
130a9643ea8Slogwang 
131a9643ea8Slogwang 		/* Initialize per scheduler local free per lthread data cache */
132a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_PERLT_CACHE;
133a9643ea8Slogwang 		new_sched->per_lthread_cache =
134a9643ea8Slogwang 			_lthread_objcache_create("per_lt cache",
135a9643ea8Slogwang 						RTE_PER_LTHREAD_SECTION_SIZE,
136a9643ea8Slogwang 						LTHREAD_PREALLOC);
137a9643ea8Slogwang 		if (new_sched->per_lthread_cache == NULL)
138a9643ea8Slogwang 			break;
139a9643ea8Slogwang 
140a9643ea8Slogwang 		/* Initialize per scheduler local free tls cache */
141a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_TLS_CACHE;
142a9643ea8Slogwang 		new_sched->tls_cache =
143a9643ea8Slogwang 			_lthread_objcache_create("TLS cache",
144a9643ea8Slogwang 						sizeof(struct lthread_tls),
145a9643ea8Slogwang 						LTHREAD_PREALLOC);
146a9643ea8Slogwang 		if (new_sched->tls_cache == NULL)
147a9643ea8Slogwang 			break;
148a9643ea8Slogwang 
149a9643ea8Slogwang 		/* Initialize per scheduler local free cond var cache */
150a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_COND_CACHE;
151a9643ea8Slogwang 		new_sched->cond_cache =
152a9643ea8Slogwang 			_lthread_objcache_create("cond cache",
153a9643ea8Slogwang 						sizeof(struct lthread_cond),
154a9643ea8Slogwang 						LTHREAD_PREALLOC);
155a9643ea8Slogwang 		if (new_sched->cond_cache == NULL)
156a9643ea8Slogwang 			break;
157a9643ea8Slogwang 
158a9643ea8Slogwang 		/* Initialize per scheduler local free mutex cache */
159a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_MUTEX_CACHE;
160a9643ea8Slogwang 		new_sched->mutex_cache =
161a9643ea8Slogwang 			_lthread_objcache_create("mutex cache",
162a9643ea8Slogwang 						sizeof(struct lthread_mutex),
163a9643ea8Slogwang 						LTHREAD_PREALLOC);
164a9643ea8Slogwang 		if (new_sched->mutex_cache == NULL)
165a9643ea8Slogwang 			break;
166a9643ea8Slogwang 
167a9643ea8Slogwang 		alloc_status = SCHED_ALLOC_OK;
168a9643ea8Slogwang 	} while (0);
169a9643ea8Slogwang 
170a9643ea8Slogwang 	/* roll back on any failure */
171a9643ea8Slogwang 	switch (alloc_status) {
172a9643ea8Slogwang 	case SCHED_ALLOC_MUTEX_CACHE:
173a9643ea8Slogwang 		_lthread_objcache_destroy(new_sched->cond_cache);
174a9643ea8Slogwang 		/* fall through */
175a9643ea8Slogwang 	case SCHED_ALLOC_COND_CACHE:
176a9643ea8Slogwang 		_lthread_objcache_destroy(new_sched->tls_cache);
177a9643ea8Slogwang 		/* fall through */
178a9643ea8Slogwang 	case SCHED_ALLOC_TLS_CACHE:
179a9643ea8Slogwang 		_lthread_objcache_destroy(new_sched->per_lthread_cache);
180a9643ea8Slogwang 		/* fall through */
181a9643ea8Slogwang 	case SCHED_ALLOC_PERLT_CACHE:
182a9643ea8Slogwang 		_lthread_objcache_destroy(new_sched->stack_cache);
183a9643ea8Slogwang 		/* fall through */
184a9643ea8Slogwang 	case SCHED_ALLOC_STACK_CACHE:
185a9643ea8Slogwang 		_lthread_objcache_destroy(new_sched->lthread_cache);
186a9643ea8Slogwang 		/* fall through */
187a9643ea8Slogwang 	case SCHED_ALLOC_LTHREAD_CACHE:
188a9643ea8Slogwang 		_lthread_queue_destroy(new_sched->pready);
189a9643ea8Slogwang 		/* fall through */
190a9643ea8Slogwang 	case SCHED_ALLOC_PREADY_QUEUE:
191a9643ea8Slogwang 		_lthread_queue_destroy(new_sched->ready);
192a9643ea8Slogwang 		/* fall through */
193a9643ea8Slogwang 	case SCHED_ALLOC_READY_QUEUE:
194a9643ea8Slogwang 		_qnode_pool_destroy(new_sched->qnode_pool);
195a9643ea8Slogwang 		/* fall through */
196a9643ea8Slogwang 	case SCHED_ALLOC_QNODE_POOL:
197a9643ea8Slogwang 		/* fall through */
198a9643ea8Slogwang 	case SCHED_ALLOC_OK:
199a9643ea8Slogwang 		break;
200a9643ea8Slogwang 	}
201a9643ea8Slogwang 	return alloc_status;
202a9643ea8Slogwang }
203a9643ea8Slogwang 
204a9643ea8Slogwang 
205a9643ea8Slogwang /*
206a9643ea8Slogwang  * Create a scheduler on the current lcore
207a9643ea8Slogwang  */
_lthread_sched_create(size_t stack_size)208a9643ea8Slogwang struct lthread_sched *_lthread_sched_create(size_t stack_size)
209a9643ea8Slogwang {
210a9643ea8Slogwang 	int status;
211a9643ea8Slogwang 	struct lthread_sched *new_sched;
212a9643ea8Slogwang 	unsigned lcoreid = rte_lcore_id();
213a9643ea8Slogwang 
214a9643ea8Slogwang 	RTE_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE);
215a9643ea8Slogwang 
216a9643ea8Slogwang 	if (stack_size == 0)
217a9643ea8Slogwang 		stack_size = LTHREAD_MAX_STACK_SIZE;
218a9643ea8Slogwang 
219a9643ea8Slogwang 	new_sched =
220a9643ea8Slogwang 	     rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched),
221a9643ea8Slogwang 				RTE_CACHE_LINE_SIZE,
222a9643ea8Slogwang 				rte_socket_id());
223a9643ea8Slogwang 	if (new_sched == NULL) {
224a9643ea8Slogwang 		RTE_LOG(CRIT, LTHREAD,
225a9643ea8Slogwang 			"Failed to allocate memory for scheduler\n");
226a9643ea8Slogwang 		return NULL;
227a9643ea8Slogwang 	}
228a9643ea8Slogwang 
229a9643ea8Slogwang 	_lthread_key_pool_init();
230a9643ea8Slogwang 
231a9643ea8Slogwang 	new_sched->stack_size = stack_size;
232a9643ea8Slogwang 	new_sched->birth = rte_rdtsc();
233a9643ea8Slogwang 	THIS_SCHED = new_sched;
234a9643ea8Slogwang 
235a9643ea8Slogwang 	status = _lthread_sched_alloc_resources(new_sched);
236a9643ea8Slogwang 	if (status != SCHED_ALLOC_OK) {
237a9643ea8Slogwang 		RTE_LOG(CRIT, LTHREAD,
238a9643ea8Slogwang 			"Failed to allocate resources for scheduler code = %d\n",
239a9643ea8Slogwang 			status);
240a9643ea8Slogwang 		rte_free(new_sched);
241a9643ea8Slogwang 		return NULL;
242a9643ea8Slogwang 	}
243a9643ea8Slogwang 
244a9643ea8Slogwang 	bzero(&new_sched->ctx, sizeof(struct ctx));
245a9643ea8Slogwang 
246a9643ea8Slogwang 	new_sched->lcore_id = lcoreid;
247a9643ea8Slogwang 
248a9643ea8Slogwang 	schedcore[lcoreid] = new_sched;
249a9643ea8Slogwang 
250a9643ea8Slogwang 	new_sched->run_flag = 1;
251a9643ea8Slogwang 
252a9643ea8Slogwang 	DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0);
253a9643ea8Slogwang 
254a9643ea8Slogwang 	rte_wmb();
255a9643ea8Slogwang 	return new_sched;
256a9643ea8Slogwang }
257a9643ea8Slogwang 
258a9643ea8Slogwang /*
259a9643ea8Slogwang  * Set the number of schedulers in the system
260a9643ea8Slogwang  */
lthread_num_schedulers_set(int num)261a9643ea8Slogwang int lthread_num_schedulers_set(int num)
262a9643ea8Slogwang {
263a9643ea8Slogwang 	rte_atomic16_set(&num_schedulers, num);
264a9643ea8Slogwang 	return (int)rte_atomic16_read(&num_schedulers);
265a9643ea8Slogwang }
266a9643ea8Slogwang 
267a9643ea8Slogwang /*
268a9643ea8Slogwang  * Return the number of schedulers active
269a9643ea8Slogwang  */
lthread_active_schedulers(void)270a9643ea8Slogwang int lthread_active_schedulers(void)
271a9643ea8Slogwang {
272a9643ea8Slogwang 	return (int)rte_atomic16_read(&active_schedulers);
273a9643ea8Slogwang }
274a9643ea8Slogwang 
275a9643ea8Slogwang 
276a9643ea8Slogwang /**
277a9643ea8Slogwang  * shutdown the scheduler running on the specified lcore
278a9643ea8Slogwang  */
lthread_scheduler_shutdown(unsigned lcoreid)279a9643ea8Slogwang void lthread_scheduler_shutdown(unsigned lcoreid)
280a9643ea8Slogwang {
281a9643ea8Slogwang 	uint64_t coreid = (uint64_t) lcoreid;
282a9643ea8Slogwang 
283a9643ea8Slogwang 	if (coreid < LTHREAD_MAX_LCORES) {
284a9643ea8Slogwang 		if (schedcore[coreid] != NULL)
285a9643ea8Slogwang 			schedcore[coreid]->run_flag = 0;
286a9643ea8Slogwang 	}
287a9643ea8Slogwang }
288a9643ea8Slogwang 
289a9643ea8Slogwang /**
290a9643ea8Slogwang  * shutdown all schedulers
291a9643ea8Slogwang  */
lthread_scheduler_shutdown_all(void)292a9643ea8Slogwang void lthread_scheduler_shutdown_all(void)
293a9643ea8Slogwang {
294a9643ea8Slogwang 	uint64_t i;
295a9643ea8Slogwang 
296a9643ea8Slogwang 	/*
297a9643ea8Slogwang 	 * give time for all schedulers to have started
298a9643ea8Slogwang 	 * Note we use sched_yield() rather than pthread_yield() to allow
299a9643ea8Slogwang 	 * for the possibility of a pthread wrapper on lthread_yield(),
300a9643ea8Slogwang 	 * something that is not possible unless the scheduler is running.
301a9643ea8Slogwang 	 */
302a9643ea8Slogwang 	while (rte_atomic16_read(&active_schedulers) <
303a9643ea8Slogwang 	       rte_atomic16_read(&num_schedulers))
304a9643ea8Slogwang 		sched_yield();
305a9643ea8Slogwang 
306a9643ea8Slogwang 	for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
307a9643ea8Slogwang 		if (schedcore[i] != NULL)
308a9643ea8Slogwang 			schedcore[i]->run_flag = 0;
309a9643ea8Slogwang 	}
310a9643ea8Slogwang }
311a9643ea8Slogwang 
312a9643ea8Slogwang /*
313a9643ea8Slogwang  * Resume a suspended lthread
314a9643ea8Slogwang  */
3152bfe3f2eSlogwang static __rte_always_inline void
3162bfe3f2eSlogwang _lthread_resume(struct lthread *lt);
_lthread_resume(struct lthread * lt)317a9643ea8Slogwang static inline void _lthread_resume(struct lthread *lt)
318a9643ea8Slogwang {
319a9643ea8Slogwang 	struct lthread_sched *sched = THIS_SCHED;
320a9643ea8Slogwang 	struct lthread_stack *s;
321a9643ea8Slogwang 	uint64_t state = lt->state;
322a9643ea8Slogwang #if LTHREAD_DIAG
323a9643ea8Slogwang 	int init = 0;
324a9643ea8Slogwang #endif
325a9643ea8Slogwang 
326a9643ea8Slogwang 	sched->current_lthread = lt;
327a9643ea8Slogwang 
328a9643ea8Slogwang 	if (state & (BIT(ST_LT_CANCELLED) | BIT(ST_LT_EXITED))) {
329a9643ea8Slogwang 		/* if detached we can free the thread now */
330a9643ea8Slogwang 		if (state & BIT(ST_LT_DETACH)) {
331a9643ea8Slogwang 			_lthread_free(lt);
332a9643ea8Slogwang 			sched->current_lthread = NULL;
333a9643ea8Slogwang 			return;
334a9643ea8Slogwang 		}
335a9643ea8Slogwang 	}
336a9643ea8Slogwang 
337a9643ea8Slogwang 	if (state & BIT(ST_LT_INIT)) {
338a9643ea8Slogwang 		/* first time this thread has been run */
339a9643ea8Slogwang 		/* assign thread to this scheduler */
340a9643ea8Slogwang 		lt->sched = THIS_SCHED;
341a9643ea8Slogwang 
342a9643ea8Slogwang 		/* allocate stack */
343a9643ea8Slogwang 		s = _stack_alloc();
344a9643ea8Slogwang 
345a9643ea8Slogwang 		lt->stack_container = s;
346a9643ea8Slogwang 		_lthread_set_stack(lt, s->stack, s->stack_size);
347a9643ea8Slogwang 
348a9643ea8Slogwang 		/* allocate memory for TLS used by this thread */
349a9643ea8Slogwang 		_lthread_tls_alloc(lt);
350a9643ea8Slogwang 
351a9643ea8Slogwang 		lt->state = BIT(ST_LT_READY);
352a9643ea8Slogwang #if LTHREAD_DIAG
353a9643ea8Slogwang 		init = 1;
354a9643ea8Slogwang #endif
355a9643ea8Slogwang 	}
356a9643ea8Slogwang 
357a9643ea8Slogwang 	DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESUMED, init, lt);
358a9643ea8Slogwang 
359a9643ea8Slogwang 	/* switch to the new thread */
360a9643ea8Slogwang 	ctx_switch(&lt->ctx, &sched->ctx);
361a9643ea8Slogwang 
362a9643ea8Slogwang 	/* If posting to a queue that could be read by another lcore
363a9643ea8Slogwang 	 * we defer the queue write till now to ensure the context has been
364a9643ea8Slogwang 	 * saved before the other core tries to resume it
365a9643ea8Slogwang 	 * This applies to blocking on mutex, cond, and to set_affinity
366a9643ea8Slogwang 	 */
367a9643ea8Slogwang 	if (lt->pending_wr_queue != NULL) {
368a9643ea8Slogwang 		struct lthread_queue *dest = lt->pending_wr_queue;
369a9643ea8Slogwang 
370a9643ea8Slogwang 		lt->pending_wr_queue = NULL;
371a9643ea8Slogwang 
372a9643ea8Slogwang 		/* queue the current thread to the specified queue */
373a9643ea8Slogwang 		_lthread_queue_insert_mp(dest, lt);
374a9643ea8Slogwang 	}
375a9643ea8Slogwang 
376a9643ea8Slogwang 	sched->current_lthread = NULL;
377a9643ea8Slogwang }
378a9643ea8Slogwang 
379a9643ea8Slogwang /*
380a9643ea8Slogwang  * Handle sleep timer expiry
381a9643ea8Slogwang */
382a9643ea8Slogwang void
_sched_timer_cb(struct rte_timer * tim,void * arg)383a9643ea8Slogwang _sched_timer_cb(struct rte_timer *tim, void *arg)
384a9643ea8Slogwang {
385a9643ea8Slogwang 	struct lthread *lt = (struct lthread *) arg;
386a9643ea8Slogwang 	uint64_t state = lt->state;
387a9643ea8Slogwang 
388a9643ea8Slogwang 	DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_EXPIRED, &lt->tim, 0);
389a9643ea8Slogwang 
390a9643ea8Slogwang 	rte_timer_stop(tim);
391a9643ea8Slogwang 
392a9643ea8Slogwang 	if (lt->state & BIT(ST_LT_CANCELLED))
393a9643ea8Slogwang 		(THIS_SCHED)->nb_blocked_threads--;
394a9643ea8Slogwang 
395a9643ea8Slogwang 	lt->state = state | BIT(ST_LT_EXPIRED);
396a9643ea8Slogwang 	_lthread_resume(lt);
397a9643ea8Slogwang 	lt->state = state & CLEARBIT(ST_LT_EXPIRED);
398a9643ea8Slogwang }
399a9643ea8Slogwang 
400a9643ea8Slogwang 
401a9643ea8Slogwang 
402a9643ea8Slogwang /*
403a9643ea8Slogwang  * Returns 0 if there is a pending job in scheduler or 1 if done and can exit.
404a9643ea8Slogwang  */
_lthread_sched_isdone(struct lthread_sched * sched)405a9643ea8Slogwang static inline int _lthread_sched_isdone(struct lthread_sched *sched)
406a9643ea8Slogwang {
407a9643ea8Slogwang 	return (sched->run_flag == 0) &&
408a9643ea8Slogwang 			(_lthread_queue_empty(sched->ready)) &&
409a9643ea8Slogwang 			(_lthread_queue_empty(sched->pready)) &&
410a9643ea8Slogwang 			(sched->nb_blocked_threads == 0);
411a9643ea8Slogwang }
412a9643ea8Slogwang 
413a9643ea8Slogwang /*
414a9643ea8Slogwang  * Wait for all schedulers to start
415a9643ea8Slogwang  */
_lthread_schedulers_sync_start(void)416a9643ea8Slogwang static inline void _lthread_schedulers_sync_start(void)
417a9643ea8Slogwang {
418a9643ea8Slogwang 	rte_atomic16_inc(&active_schedulers);
419a9643ea8Slogwang 
420a9643ea8Slogwang 	/* wait for lthread schedulers
421a9643ea8Slogwang 	 * Note we use sched_yield() rather than pthread_yield() to allow
422a9643ea8Slogwang 	 * for the possibility of a pthread wrapper on lthread_yield(),
423a9643ea8Slogwang 	 * something that is not possible unless the scheduler is running.
424a9643ea8Slogwang 	 */
425a9643ea8Slogwang 	while (rte_atomic16_read(&active_schedulers) <
426a9643ea8Slogwang 	       rte_atomic16_read(&num_schedulers))
427a9643ea8Slogwang 		sched_yield();
428a9643ea8Slogwang 
429a9643ea8Slogwang }
430a9643ea8Slogwang 
431a9643ea8Slogwang /*
432a9643ea8Slogwang  * Wait for all schedulers to stop
433a9643ea8Slogwang  */
_lthread_schedulers_sync_stop(void)434a9643ea8Slogwang static inline void _lthread_schedulers_sync_stop(void)
435a9643ea8Slogwang {
436a9643ea8Slogwang 	rte_atomic16_dec(&active_schedulers);
437a9643ea8Slogwang 	rte_atomic16_dec(&num_schedulers);
438a9643ea8Slogwang 
439a9643ea8Slogwang 	/* wait for schedulers
440a9643ea8Slogwang 	 * Note we use sched_yield() rather than pthread_yield() to allow
441a9643ea8Slogwang 	 * for the possibility of a pthread wrapper on lthread_yield(),
442a9643ea8Slogwang 	 * something that is not possible unless the scheduler is running.
443a9643ea8Slogwang 	 */
444a9643ea8Slogwang 	while (rte_atomic16_read(&active_schedulers) > 0)
445a9643ea8Slogwang 		sched_yield();
446a9643ea8Slogwang 
447a9643ea8Slogwang }
448a9643ea8Slogwang 
449a9643ea8Slogwang 
450a9643ea8Slogwang /*
451a9643ea8Slogwang  * Run the lthread scheduler
452a9643ea8Slogwang  * This loop is the heart of the system
453a9643ea8Slogwang  */
lthread_run(void)454a9643ea8Slogwang void lthread_run(void)
455a9643ea8Slogwang {
456a9643ea8Slogwang 
457a9643ea8Slogwang 	struct lthread_sched *sched = THIS_SCHED;
458a9643ea8Slogwang 	struct lthread *lt = NULL;
459a9643ea8Slogwang 
460a9643ea8Slogwang 	RTE_LOG(INFO, LTHREAD,
461a9643ea8Slogwang 		"starting scheduler %p on lcore %u phys core %u\n",
462a9643ea8Slogwang 		sched, rte_lcore_id(),
463a9643ea8Slogwang 		rte_lcore_index(rte_lcore_id()));
464a9643ea8Slogwang 
465a9643ea8Slogwang 	/* if more than one, wait for all schedulers to start */
466a9643ea8Slogwang 	_lthread_schedulers_sync_start();
467a9643ea8Slogwang 
468a9643ea8Slogwang 
469a9643ea8Slogwang 	/*
470a9643ea8Slogwang 	 * This is the main scheduling loop
471a9643ea8Slogwang 	 * So long as there are tasks in existence we run this loop.
472a9643ea8Slogwang 	 * We check for:-
473a9643ea8Slogwang 	 *   expired timers,
474a9643ea8Slogwang 	 *   the local ready queue,
475a9643ea8Slogwang 	 *   and the peer ready queue,
476a9643ea8Slogwang 	 *
477a9643ea8Slogwang 	 * and resume lthreads ad infinitum.
478a9643ea8Slogwang 	 */
479a9643ea8Slogwang 	while (!_lthread_sched_isdone(sched)) {
480a9643ea8Slogwang 
481a9643ea8Slogwang 		rte_timer_manage();
482a9643ea8Slogwang 
483a9643ea8Slogwang 		lt = _lthread_queue_poll(sched->ready);
484a9643ea8Slogwang 		if (lt != NULL)
485a9643ea8Slogwang 			_lthread_resume(lt);
486a9643ea8Slogwang 		lt = _lthread_queue_poll(sched->pready);
487a9643ea8Slogwang 		if (lt != NULL)
488a9643ea8Slogwang 			_lthread_resume(lt);
489a9643ea8Slogwang 	}
490a9643ea8Slogwang 
491a9643ea8Slogwang 
492a9643ea8Slogwang 	/* if more than one wait for all schedulers to stop */
493a9643ea8Slogwang 	_lthread_schedulers_sync_stop();
494a9643ea8Slogwang 
495a9643ea8Slogwang 	(THIS_SCHED) = NULL;
496a9643ea8Slogwang 
497a9643ea8Slogwang 	RTE_LOG(INFO, LTHREAD,
498a9643ea8Slogwang 		"stopping scheduler %p on lcore %u phys core %u\n",
499a9643ea8Slogwang 		sched, rte_lcore_id(),
500a9643ea8Slogwang 		rte_lcore_index(rte_lcore_id()));
501a9643ea8Slogwang 	fflush(stdout);
502a9643ea8Slogwang }
503a9643ea8Slogwang 
504a9643ea8Slogwang /*
505a9643ea8Slogwang  * Return the scheduler for this lcore
506a9643ea8Slogwang  *
507a9643ea8Slogwang  */
_lthread_sched_get(unsigned int lcore_id)5082bfe3f2eSlogwang struct lthread_sched *_lthread_sched_get(unsigned int lcore_id)
509a9643ea8Slogwang {
5102bfe3f2eSlogwang 	struct lthread_sched *res = NULL;
5112bfe3f2eSlogwang 
5122bfe3f2eSlogwang 	if (lcore_id < LTHREAD_MAX_LCORES)
5132bfe3f2eSlogwang 		res = schedcore[lcore_id];
5142bfe3f2eSlogwang 
5152bfe3f2eSlogwang 	return res;
516a9643ea8Slogwang }
517a9643ea8Slogwang 
518a9643ea8Slogwang /*
519a9643ea8Slogwang  * migrate the current thread to another scheduler running
520a9643ea8Slogwang  * on the specified lcore.
521a9643ea8Slogwang  */
lthread_set_affinity(unsigned lcoreid)522a9643ea8Slogwang int lthread_set_affinity(unsigned lcoreid)
523a9643ea8Slogwang {
524a9643ea8Slogwang 	struct lthread *lt = THIS_LTHREAD;
525a9643ea8Slogwang 	struct lthread_sched *dest_sched;
526a9643ea8Slogwang 
5272bfe3f2eSlogwang 	if (unlikely(lcoreid >= LTHREAD_MAX_LCORES))
528a9643ea8Slogwang 		return POSIX_ERRNO(EINVAL);
529a9643ea8Slogwang 
530a9643ea8Slogwang 	DIAG_EVENT(lt, LT_DIAG_LTHREAD_AFFINITY, lcoreid, 0);
531a9643ea8Slogwang 
532a9643ea8Slogwang 	dest_sched = schedcore[lcoreid];
533a9643ea8Slogwang 
534a9643ea8Slogwang 	if (unlikely(dest_sched == NULL))
535a9643ea8Slogwang 		return POSIX_ERRNO(EINVAL);
536a9643ea8Slogwang 
537a9643ea8Slogwang 	if (likely(dest_sched != THIS_SCHED)) {
538a9643ea8Slogwang 		lt->sched = dest_sched;
539a9643ea8Slogwang 		lt->pending_wr_queue = dest_sched->pready;
540a9643ea8Slogwang 		_affinitize();
541a9643ea8Slogwang 		return 0;
542a9643ea8Slogwang 	}
543a9643ea8Slogwang 	return 0;
544a9643ea8Slogwang }
545