xref: /freebsd-12.1/sys/kern/subr_gtaskqueue.c (revision 8b976a2b)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50 
51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
52 static void	gtaskqueue_thread_enqueue(void *);
53 static void	gtaskqueue_thread_loop(void *arg);
54 static int	task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
55 static void	gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
56 
57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
58 TASKQGROUP_DEFINE(config, 1, 1);
59 
60 struct gtaskqueue_busy {
61 	struct gtask	*tb_running;
62 	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
63 };
64 
65 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
66 
67 typedef void (*gtaskqueue_enqueue_fn)(void *context);
68 
69 struct gtaskqueue {
70 	STAILQ_HEAD(, gtask)	tq_queue;
71 	gtaskqueue_enqueue_fn	tq_enqueue;
72 	void			*tq_context;
73 	char			*tq_name;
74 	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
75 	struct mtx		tq_mutex;
76 	struct thread		**tq_threads;
77 	int			tq_tcount;
78 	int			tq_spin;
79 	int			tq_flags;
80 	int			tq_callouts;
81 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
82 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
83 };
84 
85 #define	TQ_FLAGS_ACTIVE		(1 << 0)
86 #define	TQ_FLAGS_BLOCKED	(1 << 1)
87 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
88 
89 #define	DT_CALLOUT_ARMED	(1 << 0)
90 
91 #define	TQ_LOCK(tq)							\
92 	do {								\
93 		if ((tq)->tq_spin)					\
94 			mtx_lock_spin(&(tq)->tq_mutex);			\
95 		else							\
96 			mtx_lock(&(tq)->tq_mutex);			\
97 	} while (0)
98 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
99 
100 #define	TQ_UNLOCK(tq)							\
101 	do {								\
102 		if ((tq)->tq_spin)					\
103 			mtx_unlock_spin(&(tq)->tq_mutex);		\
104 		else							\
105 			mtx_unlock(&(tq)->tq_mutex);			\
106 	} while (0)
107 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
108 
109 #ifdef INVARIANTS
110 static void
gtask_dump(struct gtask * gtask)111 gtask_dump(struct gtask *gtask)
112 {
113 	printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
114 	       gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
115 }
116 #endif
117 
118 static __inline int
TQ_SLEEP(struct gtaskqueue * tq,void * p,struct mtx * m,int pri,const char * wm,int t)119 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
120     int t)
121 {
122 	if (tq->tq_spin)
123 		return (msleep_spin(p, m, wm, t));
124 	return (msleep(p, m, pri, wm, t));
125 }
126 
127 static struct gtaskqueue *
_gtaskqueue_create(const char * name,int mflags,taskqueue_enqueue_fn enqueue,void * context,int mtxflags,const char * mtxname __unused)128 _gtaskqueue_create(const char *name, int mflags,
129 		 taskqueue_enqueue_fn enqueue, void *context,
130 		 int mtxflags, const char *mtxname __unused)
131 {
132 	struct gtaskqueue *queue;
133 	char *tq_name;
134 
135 	tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
136 	if (!tq_name)
137 		return (NULL);
138 
139 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
140 
141 	queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
142 	if (!queue) {
143 		free(tq_name, M_GTASKQUEUE);
144 		return (NULL);
145 	}
146 
147 	STAILQ_INIT(&queue->tq_queue);
148 	TAILQ_INIT(&queue->tq_active);
149 	queue->tq_enqueue = enqueue;
150 	queue->tq_context = context;
151 	queue->tq_name = tq_name;
152 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
153 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
154 	if (enqueue == gtaskqueue_thread_enqueue)
155 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
156 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
157 
158 	return (queue);
159 }
160 
161 
162 /*
163  * Signal a taskqueue thread to terminate.
164  */
165 static void
gtaskqueue_terminate(struct thread ** pp,struct gtaskqueue * tq)166 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
167 {
168 
169 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
170 		wakeup(tq);
171 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
172 	}
173 }
174 
175 static void
gtaskqueue_free(struct gtaskqueue * queue)176 gtaskqueue_free(struct gtaskqueue *queue)
177 {
178 
179 	TQ_LOCK(queue);
180 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
181 	gtaskqueue_terminate(queue->tq_threads, queue);
182 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
183 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
184 	mtx_destroy(&queue->tq_mutex);
185 	free(queue->tq_threads, M_GTASKQUEUE);
186 	free(queue->tq_name, M_GTASKQUEUE);
187 	free(queue, M_GTASKQUEUE);
188 }
189 
190 /*
191  * Wait for all to complete, then prevent it from being enqueued
192  */
193 void
grouptask_block(struct grouptask * grouptask)194 grouptask_block(struct grouptask *grouptask)
195 {
196 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
197 	struct gtask *gtask = &grouptask->gt_task;
198 
199 #ifdef INVARIANTS
200 	if (queue == NULL) {
201 		gtask_dump(gtask);
202 		panic("queue == NULL");
203 	}
204 #endif
205 	TQ_LOCK(queue);
206 	gtask->ta_flags |= TASK_NOENQUEUE;
207   	gtaskqueue_drain_locked(queue, gtask);
208 	TQ_UNLOCK(queue);
209 }
210 
211 void
grouptask_unblock(struct grouptask * grouptask)212 grouptask_unblock(struct grouptask *grouptask)
213 {
214 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
215 	struct gtask *gtask = &grouptask->gt_task;
216 
217 #ifdef INVARIANTS
218 	if (queue == NULL) {
219 		gtask_dump(gtask);
220 		panic("queue == NULL");
221 	}
222 #endif
223 	TQ_LOCK(queue);
224 	gtask->ta_flags &= ~TASK_NOENQUEUE;
225 	TQ_UNLOCK(queue);
226 }
227 
228 int
grouptaskqueue_enqueue(struct gtaskqueue * queue,struct gtask * gtask)229 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
230 {
231 #ifdef INVARIANTS
232 	if (queue == NULL) {
233 		gtask_dump(gtask);
234 		panic("queue == NULL");
235 	}
236 #endif
237 	TQ_LOCK(queue);
238 	if (gtask->ta_flags & TASK_ENQUEUED) {
239 		TQ_UNLOCK(queue);
240 		return (0);
241 	}
242 	if (gtask->ta_flags & TASK_NOENQUEUE) {
243 		TQ_UNLOCK(queue);
244 		return (EAGAIN);
245 	}
246 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
247 	gtask->ta_flags |= TASK_ENQUEUED;
248 	TQ_UNLOCK(queue);
249 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
250 		queue->tq_enqueue(queue->tq_context);
251 	return (0);
252 }
253 
254 static void
gtaskqueue_task_nop_fn(void * context)255 gtaskqueue_task_nop_fn(void *context)
256 {
257 }
258 
259 /*
260  * Block until all currently queued tasks in this taskqueue
261  * have begun execution.  Tasks queued during execution of
262  * this function are ignored.
263  */
264 static void
gtaskqueue_drain_tq_queue(struct gtaskqueue * queue)265 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
266 {
267 	struct gtask t_barrier;
268 
269 	if (STAILQ_EMPTY(&queue->tq_queue))
270 		return;
271 
272 	/*
273 	 * Enqueue our barrier after all current tasks, but with
274 	 * the highest priority so that newly queued tasks cannot
275 	 * pass it.  Because of the high priority, we can not use
276 	 * taskqueue_enqueue_locked directly (which drops the lock
277 	 * anyway) so just insert it at tail while we have the
278 	 * queue lock.
279 	 */
280 	GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
281 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
282 	t_barrier.ta_flags |= TASK_ENQUEUED;
283 
284 	/*
285 	 * Once the barrier has executed, all previously queued tasks
286 	 * have completed or are currently executing.
287 	 */
288 	while (t_barrier.ta_flags & TASK_ENQUEUED)
289 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
290 }
291 
292 /*
293  * Block until all currently executing tasks for this taskqueue
294  * complete.  Tasks that begin execution during the execution
295  * of this function are ignored.
296  */
297 static void
gtaskqueue_drain_tq_active(struct gtaskqueue * queue)298 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
299 {
300 	struct gtaskqueue_busy tb_marker, *tb_first;
301 
302 	if (TAILQ_EMPTY(&queue->tq_active))
303 		return;
304 
305 	/* Block taskq_terminate().*/
306 	queue->tq_callouts++;
307 
308 	/*
309 	 * Wait for all currently executing taskqueue threads
310 	 * to go idle.
311 	 */
312 	tb_marker.tb_running = TB_DRAIN_WAITER;
313 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
314 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
315 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
316 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
317 
318 	/*
319 	 * Wakeup any other drain waiter that happened to queue up
320 	 * without any intervening active thread.
321 	 */
322 	tb_first = TAILQ_FIRST(&queue->tq_active);
323 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
324 		wakeup(tb_first);
325 
326 	/* Release taskqueue_terminate(). */
327 	queue->tq_callouts--;
328 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
329 		wakeup_one(queue->tq_threads);
330 }
331 
332 void
gtaskqueue_block(struct gtaskqueue * queue)333 gtaskqueue_block(struct gtaskqueue *queue)
334 {
335 
336 	TQ_LOCK(queue);
337 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
338 	TQ_UNLOCK(queue);
339 }
340 
341 void
gtaskqueue_unblock(struct gtaskqueue * queue)342 gtaskqueue_unblock(struct gtaskqueue *queue)
343 {
344 
345 	TQ_LOCK(queue);
346 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
347 	if (!STAILQ_EMPTY(&queue->tq_queue))
348 		queue->tq_enqueue(queue->tq_context);
349 	TQ_UNLOCK(queue);
350 }
351 
352 static void
gtaskqueue_run_locked(struct gtaskqueue * queue)353 gtaskqueue_run_locked(struct gtaskqueue *queue)
354 {
355 	struct gtaskqueue_busy tb;
356 	struct gtaskqueue_busy *tb_first;
357 	struct gtask *gtask;
358 
359 	KASSERT(queue != NULL, ("tq is NULL"));
360 	TQ_ASSERT_LOCKED(queue);
361 	tb.tb_running = NULL;
362 
363 	while (STAILQ_FIRST(&queue->tq_queue)) {
364 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
365 
366 		/*
367 		 * Carefully remove the first task from the queue and
368 		 * clear its TASK_ENQUEUED flag
369 		 */
370 		gtask = STAILQ_FIRST(&queue->tq_queue);
371 		KASSERT(gtask != NULL, ("task is NULL"));
372 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
373 		gtask->ta_flags &= ~TASK_ENQUEUED;
374 		tb.tb_running = gtask;
375 		TQ_UNLOCK(queue);
376 
377 		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
378 		gtask->ta_func(gtask->ta_context);
379 
380 		TQ_LOCK(queue);
381 		tb.tb_running = NULL;
382 		wakeup(gtask);
383 
384 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
385 		tb_first = TAILQ_FIRST(&queue->tq_active);
386 		if (tb_first != NULL &&
387 		    tb_first->tb_running == TB_DRAIN_WAITER)
388 			wakeup(tb_first);
389 	}
390 }
391 
392 static int
task_is_running(struct gtaskqueue * queue,struct gtask * gtask)393 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
394 {
395 	struct gtaskqueue_busy *tb;
396 
397 	TQ_ASSERT_LOCKED(queue);
398 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
399 		if (tb->tb_running == gtask)
400 			return (1);
401 	}
402 	return (0);
403 }
404 
405 static int
gtaskqueue_cancel_locked(struct gtaskqueue * queue,struct gtask * gtask)406 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
407 {
408 
409 	if (gtask->ta_flags & TASK_ENQUEUED)
410 		STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
411 	gtask->ta_flags &= ~TASK_ENQUEUED;
412 	return (task_is_running(queue, gtask) ? EBUSY : 0);
413 }
414 
415 int
gtaskqueue_cancel(struct gtaskqueue * queue,struct gtask * gtask)416 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
417 {
418 	int error;
419 
420 	TQ_LOCK(queue);
421 	error = gtaskqueue_cancel_locked(queue, gtask);
422 	TQ_UNLOCK(queue);
423 
424 	return (error);
425 }
426 
427 static void
gtaskqueue_drain_locked(struct gtaskqueue * queue,struct gtask * gtask)428 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
429 {
430 	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
431 		TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
432 }
433 
434 void
gtaskqueue_drain(struct gtaskqueue * queue,struct gtask * gtask)435 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
436 {
437 
438 	if (!queue->tq_spin)
439 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
440 
441 	TQ_LOCK(queue);
442 	gtaskqueue_drain_locked(queue, gtask);
443 	TQ_UNLOCK(queue);
444 }
445 
446 void
gtaskqueue_drain_all(struct gtaskqueue * queue)447 gtaskqueue_drain_all(struct gtaskqueue *queue)
448 {
449 
450 	if (!queue->tq_spin)
451 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
452 
453 	TQ_LOCK(queue);
454 	gtaskqueue_drain_tq_queue(queue);
455 	gtaskqueue_drain_tq_active(queue);
456 	TQ_UNLOCK(queue);
457 }
458 
459 static int
_gtaskqueue_start_threads(struct gtaskqueue ** tqp,int count,int pri,cpuset_t * mask,const char * name,va_list ap)460 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
461     cpuset_t *mask, const char *name, va_list ap)
462 {
463 	char ktname[MAXCOMLEN + 1];
464 	struct thread *td;
465 	struct gtaskqueue *tq;
466 	int i, error;
467 
468 	if (count <= 0)
469 		return (EINVAL);
470 
471 	vsnprintf(ktname, sizeof(ktname), name, ap);
472 	tq = *tqp;
473 
474 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
475 	    M_NOWAIT | M_ZERO);
476 	if (tq->tq_threads == NULL) {
477 		printf("%s: no memory for %s threads\n", __func__, ktname);
478 		return (ENOMEM);
479 	}
480 
481 	for (i = 0; i < count; i++) {
482 		if (count == 1)
483 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
484 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
485 		else
486 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
487 			    &tq->tq_threads[i], RFSTOPPED, 0,
488 			    "%s_%d", ktname, i);
489 		if (error) {
490 			/* should be ok to continue, taskqueue_free will dtrt */
491 			printf("%s: kthread_add(%s): error %d", __func__,
492 			    ktname, error);
493 			tq->tq_threads[i] = NULL;		/* paranoid */
494 		} else
495 			tq->tq_tcount++;
496 	}
497 	for (i = 0; i < count; i++) {
498 		if (tq->tq_threads[i] == NULL)
499 			continue;
500 		td = tq->tq_threads[i];
501 		if (mask) {
502 			error = cpuset_setthread(td->td_tid, mask);
503 			/*
504 			 * Failing to pin is rarely an actual fatal error;
505 			 * it'll just affect performance.
506 			 */
507 			if (error)
508 				printf("%s: curthread=%llu: can't pin; "
509 				    "error=%d\n",
510 				    __func__,
511 				    (unsigned long long) td->td_tid,
512 				    error);
513 		}
514 		thread_lock(td);
515 		sched_prio(td, pri);
516 		sched_add(td, SRQ_BORING);
517 		thread_unlock(td);
518 	}
519 
520 	return (0);
521 }
522 
523 static int
gtaskqueue_start_threads(struct gtaskqueue ** tqp,int count,int pri,const char * name,...)524 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
525     const char *name, ...)
526 {
527 	va_list ap;
528 	int error;
529 
530 	va_start(ap, name);
531 	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
532 	va_end(ap);
533 	return (error);
534 }
535 
536 static inline void
gtaskqueue_run_callback(struct gtaskqueue * tq,enum taskqueue_callback_type cb_type)537 gtaskqueue_run_callback(struct gtaskqueue *tq,
538     enum taskqueue_callback_type cb_type)
539 {
540 	taskqueue_callback_fn tq_callback;
541 
542 	TQ_ASSERT_UNLOCKED(tq);
543 	tq_callback = tq->tq_callbacks[cb_type];
544 	if (tq_callback != NULL)
545 		tq_callback(tq->tq_cb_contexts[cb_type]);
546 }
547 
548 static void
gtaskqueue_thread_loop(void * arg)549 gtaskqueue_thread_loop(void *arg)
550 {
551 	struct gtaskqueue **tqp, *tq;
552 
553 	tqp = arg;
554 	tq = *tqp;
555 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
556 	TQ_LOCK(tq);
557 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
558 		/* XXX ? */
559 		gtaskqueue_run_locked(tq);
560 		/*
561 		 * Because taskqueue_run() can drop tq_mutex, we need to
562 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
563 		 * meantime, which means we missed a wakeup.
564 		 */
565 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
566 			break;
567 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
568 	}
569 	gtaskqueue_run_locked(tq);
570 	/*
571 	 * This thread is on its way out, so just drop the lock temporarily
572 	 * in order to call the shutdown callback.  This allows the callback
573 	 * to look at the taskqueue, even just before it dies.
574 	 */
575 	TQ_UNLOCK(tq);
576 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
577 	TQ_LOCK(tq);
578 
579 	/* rendezvous with thread that asked us to terminate */
580 	tq->tq_tcount--;
581 	wakeup_one(tq->tq_threads);
582 	TQ_UNLOCK(tq);
583 	kthread_exit();
584 }
585 
586 static void
gtaskqueue_thread_enqueue(void * context)587 gtaskqueue_thread_enqueue(void *context)
588 {
589 	struct gtaskqueue **tqp, *tq;
590 
591 	tqp = context;
592 	tq = *tqp;
593 	wakeup_one(tq);
594 }
595 
596 
597 static struct gtaskqueue *
gtaskqueue_create_fast(const char * name,int mflags,taskqueue_enqueue_fn enqueue,void * context)598 gtaskqueue_create_fast(const char *name, int mflags,
599 		 taskqueue_enqueue_fn enqueue, void *context)
600 {
601 	return _gtaskqueue_create(name, mflags, enqueue, context,
602 			MTX_SPIN, "fast_taskqueue");
603 }
604 
605 
606 struct taskqgroup_cpu {
607 	LIST_HEAD(, grouptask)	tgc_tasks;
608 	struct gtaskqueue	*tgc_taskq;
609 	int	tgc_cnt;
610 	int	tgc_cpu;
611 };
612 
613 struct taskqgroup {
614 	struct taskqgroup_cpu tqg_queue[MAXCPU];
615 	struct mtx	tqg_lock;
616 	const char *	tqg_name;
617 	int		tqg_adjusting;
618 	int		tqg_stride;
619 	int		tqg_cnt;
620 };
621 
622 struct taskq_bind_task {
623 	struct gtask bt_task;
624 	int	bt_cpuid;
625 };
626 
627 static void
taskqgroup_cpu_create(struct taskqgroup * qgroup,int idx,int cpu)628 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
629 {
630 	struct taskqgroup_cpu *qcpu;
631 
632 	qcpu = &qgroup->tqg_queue[idx];
633 	LIST_INIT(&qcpu->tgc_tasks);
634 	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
635 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
636 	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
637 	    "%s_%d", qgroup->tqg_name, idx);
638 	qcpu->tgc_cpu = cpu;
639 }
640 
641 static void
taskqgroup_cpu_remove(struct taskqgroup * qgroup,int idx)642 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
643 {
644 
645 	gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
646 }
647 
648 /*
649  * Find the taskq with least # of tasks that doesn't currently have any
650  * other queues from the uniq identifier.
651  */
652 static int
taskqgroup_find(struct taskqgroup * qgroup,void * uniq)653 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
654 {
655 	struct grouptask *n;
656 	int i, idx, mincnt;
657 	int strict;
658 
659 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
660 	if (qgroup->tqg_cnt == 0)
661 		return (0);
662 	idx = -1;
663 	mincnt = INT_MAX;
664 	/*
665 	 * Two passes;  First scan for a queue with the least tasks that
666 	 * does not already service this uniq id.  If that fails simply find
667 	 * the queue with the least total tasks;
668 	 */
669 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
670 		for (i = 0; i < qgroup->tqg_cnt; i++) {
671 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
672 				continue;
673 			if (strict) {
674 				LIST_FOREACH(n,
675 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
676 					if (n->gt_uniq == uniq)
677 						break;
678 				if (n != NULL)
679 					continue;
680 			}
681 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
682 			idx = i;
683 		}
684 	}
685 	if (idx == -1)
686 		panic("%s: failed to pick a qid.", __func__);
687 
688 	return (idx);
689 }
690 
691 /*
692  * smp_started is unusable since it is not set for UP kernels or even for
693  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
694  * (mp_ncpus == 1) test, but that would be broken here since we need to
695  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
696  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
697  *
698  * So maintain our own flag.  It must be set after all CPUs are started
699  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
700  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
701  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
702  * simpler for adjustment to pass a flag indicating if it is delayed.
703  */
704 
705 static int tqg_smp_started;
706 
707 static void
tqg_record_smp_started(void * arg)708 tqg_record_smp_started(void *arg)
709 {
710 	tqg_smp_started = 1;
711 }
712 
713 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
714 	tqg_record_smp_started, NULL);
715 
716 void
taskqgroup_attach(struct taskqgroup * qgroup,struct grouptask * gtask,void * uniq,int irq,const char * name)717 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
718     void *uniq, int irq, const char *name)
719 {
720 	cpuset_t mask;
721 	int qid, error;
722 
723 	gtask->gt_uniq = uniq;
724 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
725 	gtask->gt_irq = irq;
726 	gtask->gt_cpu = -1;
727 	mtx_lock(&qgroup->tqg_lock);
728 	qid = taskqgroup_find(qgroup, uniq);
729 	qgroup->tqg_queue[qid].tgc_cnt++;
730 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
731 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
732 	if (irq != -1 && tqg_smp_started) {
733 		gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
734 		CPU_ZERO(&mask);
735 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
736 		mtx_unlock(&qgroup->tqg_lock);
737 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
738 		if (error)
739 			printf("%s: binding interrupt failed for %s: %d\n",
740 			    __func__, gtask->gt_name, error);
741 	} else
742 		mtx_unlock(&qgroup->tqg_lock);
743 }
744 
745 static void
taskqgroup_attach_deferred(struct taskqgroup * qgroup,struct grouptask * gtask)746 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
747 {
748 	cpuset_t mask;
749 	int qid, cpu, error;
750 
751 	mtx_lock(&qgroup->tqg_lock);
752 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
753 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
754 	if (gtask->gt_irq != -1) {
755 		mtx_unlock(&qgroup->tqg_lock);
756 
757 		CPU_ZERO(&mask);
758 		CPU_SET(cpu, &mask);
759 		error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
760 		mtx_lock(&qgroup->tqg_lock);
761 		if (error)
762 			printf("%s: binding interrupt failed for %s: %d\n",
763 			    __func__, gtask->gt_name, error);
764 
765 	}
766 	qgroup->tqg_queue[qid].tgc_cnt++;
767 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
768 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
769 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
770 	mtx_unlock(&qgroup->tqg_lock);
771 }
772 
773 int
taskqgroup_attach_cpu(struct taskqgroup * qgroup,struct grouptask * gtask,void * uniq,int cpu,int irq,const char * name)774 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
775     void *uniq, int cpu, int irq, const char *name)
776 {
777 	cpuset_t mask;
778 	int i, qid, error;
779 
780 	qid = -1;
781 	gtask->gt_uniq = uniq;
782 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
783 	gtask->gt_irq = irq;
784 	gtask->gt_cpu = cpu;
785 	mtx_lock(&qgroup->tqg_lock);
786 	if (tqg_smp_started) {
787 		for (i = 0; i < qgroup->tqg_cnt; i++)
788 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
789 				qid = i;
790 				break;
791 			}
792 		if (qid == -1) {
793 			mtx_unlock(&qgroup->tqg_lock);
794 			printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
795 			return (EINVAL);
796 		}
797 	} else
798 		qid = 0;
799 	qgroup->tqg_queue[qid].tgc_cnt++;
800 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
801 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
802 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
803 	mtx_unlock(&qgroup->tqg_lock);
804 
805 	CPU_ZERO(&mask);
806 	CPU_SET(cpu, &mask);
807 	if (irq != -1 && tqg_smp_started) {
808 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
809 		if (error)
810 			printf("%s: binding interrupt failed for %s: %d\n",
811 			    __func__, gtask->gt_name, error);
812 	}
813 	return (0);
814 }
815 
816 static int
taskqgroup_attach_cpu_deferred(struct taskqgroup * qgroup,struct grouptask * gtask)817 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
818 {
819 	cpuset_t mask;
820 	int i, qid, irq, cpu, error;
821 
822 	qid = -1;
823 	irq = gtask->gt_irq;
824 	cpu = gtask->gt_cpu;
825 	MPASS(tqg_smp_started);
826 	mtx_lock(&qgroup->tqg_lock);
827 	for (i = 0; i < qgroup->tqg_cnt; i++)
828 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
829 			qid = i;
830 			break;
831 		}
832 	if (qid == -1) {
833 		mtx_unlock(&qgroup->tqg_lock);
834 		printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
835 		return (EINVAL);
836 	}
837 	qgroup->tqg_queue[qid].tgc_cnt++;
838 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
839 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
840 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
841 	mtx_unlock(&qgroup->tqg_lock);
842 
843 	CPU_ZERO(&mask);
844 	CPU_SET(cpu, &mask);
845 
846 	if (irq != -1) {
847 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
848 		if (error)
849 			printf("%s: binding interrupt failed for %s: %d\n",
850 			    __func__, gtask->gt_name, error);
851 	}
852 	return (0);
853 }
854 
855 void
taskqgroup_detach(struct taskqgroup * qgroup,struct grouptask * gtask)856 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
857 {
858 	int i;
859 
860 	grouptask_block(gtask);
861 	mtx_lock(&qgroup->tqg_lock);
862 	for (i = 0; i < qgroup->tqg_cnt; i++)
863 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
864 			break;
865 	if (i == qgroup->tqg_cnt)
866 		panic("%s: task %s not in group", __func__, gtask->gt_name);
867 	qgroup->tqg_queue[i].tgc_cnt--;
868 	LIST_REMOVE(gtask, gt_list);
869 	mtx_unlock(&qgroup->tqg_lock);
870 	gtask->gt_taskqueue = NULL;
871 	gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
872 }
873 
874 static void
taskqgroup_binder(void * ctx)875 taskqgroup_binder(void *ctx)
876 {
877 	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
878 	cpuset_t mask;
879 	int error;
880 
881 	CPU_ZERO(&mask);
882 	CPU_SET(gtask->bt_cpuid, &mask);
883 	error = cpuset_setthread(curthread->td_tid, &mask);
884 	thread_lock(curthread);
885 	sched_bind(curthread, gtask->bt_cpuid);
886 	thread_unlock(curthread);
887 
888 	if (error)
889 		printf("%s: binding curthread failed: %d\n", __func__, error);
890 	free(gtask, M_DEVBUF);
891 }
892 
893 static void
taskqgroup_bind(struct taskqgroup * qgroup)894 taskqgroup_bind(struct taskqgroup *qgroup)
895 {
896 	struct taskq_bind_task *gtask;
897 	int i;
898 
899 	/*
900 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
901 	 * one.
902 	 */
903 	if (qgroup->tqg_cnt == 1)
904 		return;
905 
906 	for (i = 0; i < qgroup->tqg_cnt; i++) {
907 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
908 		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
909 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
910 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
911 		    &gtask->bt_task);
912 	}
913 }
914 
915 static void
taskqgroup_config_init(void * arg)916 taskqgroup_config_init(void *arg)
917 {
918 	struct taskqgroup *qgroup = qgroup_config;
919 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
920 
921 	LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
922 	    grouptask, gt_list);
923 	qgroup->tqg_queue[0].tgc_cnt = 0;
924 	taskqgroup_cpu_create(qgroup, 0, 0);
925 
926 	qgroup->tqg_cnt = 1;
927 	qgroup->tqg_stride = 1;
928 }
929 
930 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
931 	taskqgroup_config_init, NULL);
932 
933 static int
_taskqgroup_adjust(struct taskqgroup * qgroup,int cnt,int stride)934 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
935 {
936 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
937 	struct grouptask *gtask;
938 	int i, k, old_cnt, old_cpu, cpu;
939 
940 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
941 
942 	if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
943 		printf("%s: failed cnt: %d stride: %d "
944 		    "mp_ncpus: %d tqg_smp_started: %d\n",
945 		    __func__, cnt, stride, mp_ncpus, tqg_smp_started);
946 		return (EINVAL);
947 	}
948 	if (qgroup->tqg_adjusting) {
949 		printf("%s failed: adjusting\n", __func__);
950 		return (EBUSY);
951 	}
952 	qgroup->tqg_adjusting = 1;
953 	old_cnt = qgroup->tqg_cnt;
954 	old_cpu = 0;
955 	if (old_cnt < cnt)
956 		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
957 	mtx_unlock(&qgroup->tqg_lock);
958 	/*
959 	 * Set up queue for tasks added before boot.
960 	 */
961 	if (old_cnt == 0) {
962 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
963 		    grouptask, gt_list);
964 		qgroup->tqg_queue[0].tgc_cnt = 0;
965 	}
966 
967 	/*
968 	 * If new taskq threads have been added.
969 	 */
970 	cpu = old_cpu;
971 	for (i = old_cnt; i < cnt; i++) {
972 		taskqgroup_cpu_create(qgroup, i, cpu);
973 
974 		for (k = 0; k < stride; k++)
975 			cpu = CPU_NEXT(cpu);
976 	}
977 	mtx_lock(&qgroup->tqg_lock);
978 	qgroup->tqg_cnt = cnt;
979 	qgroup->tqg_stride = stride;
980 
981 	/*
982 	 * Adjust drivers to use new taskqs.
983 	 */
984 	for (i = 0; i < old_cnt; i++) {
985 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
986 			LIST_REMOVE(gtask, gt_list);
987 			qgroup->tqg_queue[i].tgc_cnt--;
988 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
989 		}
990 	}
991 	mtx_unlock(&qgroup->tqg_lock);
992 
993 	while ((gtask = LIST_FIRST(&gtask_head))) {
994 		LIST_REMOVE(gtask, gt_list);
995 		if (gtask->gt_cpu == -1)
996 			taskqgroup_attach_deferred(qgroup, gtask);
997 		else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
998 			taskqgroup_attach_deferred(qgroup, gtask);
999 	}
1000 
1001 #ifdef INVARIANTS
1002 	mtx_lock(&qgroup->tqg_lock);
1003 	for (i = 0; i < qgroup->tqg_cnt; i++) {
1004 		MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
1005 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
1006 			MPASS(gtask->gt_taskqueue != NULL);
1007 	}
1008 	mtx_unlock(&qgroup->tqg_lock);
1009 #endif
1010 	/*
1011 	 * If taskq thread count has been reduced.
1012 	 */
1013 	for (i = cnt; i < old_cnt; i++)
1014 		taskqgroup_cpu_remove(qgroup, i);
1015 
1016 	taskqgroup_bind(qgroup);
1017 
1018 	mtx_lock(&qgroup->tqg_lock);
1019 	qgroup->tqg_adjusting = 0;
1020 
1021 	return (0);
1022 }
1023 
1024 int
taskqgroup_adjust(struct taskqgroup * qgroup,int cnt,int stride)1025 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1026 {
1027 	int error;
1028 
1029 	mtx_lock(&qgroup->tqg_lock);
1030 	error = _taskqgroup_adjust(qgroup, cnt, stride);
1031 	mtx_unlock(&qgroup->tqg_lock);
1032 
1033 	return (error);
1034 }
1035 
1036 struct taskqgroup *
taskqgroup_create(const char * name)1037 taskqgroup_create(const char *name)
1038 {
1039 	struct taskqgroup *qgroup;
1040 
1041 	qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1042 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1043 	qgroup->tqg_name = name;
1044 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1045 
1046 	return (qgroup);
1047 }
1048 
1049 void
taskqgroup_destroy(struct taskqgroup * qgroup)1050 taskqgroup_destroy(struct taskqgroup *qgroup)
1051 {
1052 
1053 }
1054 
1055 void
taskqgroup_config_gtask_init(void * ctx,struct grouptask * gtask,gtask_fn_t * fn,const char * name)1056 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1057     const char *name)
1058 {
1059 
1060 	GROUPTASK_INIT(gtask, 0, fn, ctx);
1061 	taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
1062 }
1063 
1064 void
taskqgroup_config_gtask_deinit(struct grouptask * gtask)1065 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1066 {
1067 
1068 	taskqgroup_detach(qgroup_config, gtask);
1069 }
1070