xref: /linux-6.15/kernel/kthread.c (revision beda0e72)
1 /* Kernel thread helper functions.
2  *   Copyright (C) 2004 IBM Corporation, Rusty Russell.
3  *
4  * Creation is done via kthreadd, so that we get a clean environment
5  * even if we're invoked from userspace (think modprobe, hotplug cpu,
6  * etc.).
7  */
8 #include <uapi/linux/sched/types.h>
9 #include <linux/sched.h>
10 #include <linux/sched/task.h>
11 #include <linux/kthread.h>
12 #include <linux/completion.h>
13 #include <linux/err.h>
14 #include <linux/cpuset.h>
15 #include <linux/unistd.h>
16 #include <linux/file.h>
17 #include <linux/export.h>
18 #include <linux/mutex.h>
19 #include <linux/slab.h>
20 #include <linux/freezer.h>
21 #include <linux/ptrace.h>
22 #include <linux/uaccess.h>
23 #include <trace/events/sched.h>
24 
25 static DEFINE_SPINLOCK(kthread_create_lock);
26 static LIST_HEAD(kthread_create_list);
27 struct task_struct *kthreadd_task;
28 
29 struct kthread_create_info
30 {
31 	/* Information passed to kthread() from kthreadd. */
32 	int (*threadfn)(void *data);
33 	void *data;
34 	int node;
35 
36 	/* Result passed back to kthread_create() from kthreadd. */
37 	struct task_struct *result;
38 	struct completion *done;
39 
40 	struct list_head list;
41 };
42 
43 struct kthread {
44 	unsigned long flags;
45 	unsigned int cpu;
46 	void *data;
47 	struct completion parked;
48 	struct completion exited;
49 #ifdef CONFIG_BLK_CGROUP
50 	struct cgroup_subsys_state *blkcg_css;
51 #endif
52 };
53 
54 enum KTHREAD_BITS {
55 	KTHREAD_IS_PER_CPU = 0,
56 	KTHREAD_SHOULD_STOP,
57 	KTHREAD_SHOULD_PARK,
58 };
59 
60 static inline void set_kthread_struct(void *kthread)
61 {
62 	/*
63 	 * We abuse ->set_child_tid to avoid the new member and because it
64 	 * can't be wrongly copied by copy_process(). We also rely on fact
65 	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
66 	 */
67 	current->set_child_tid = (__force void __user *)kthread;
68 }
69 
70 static inline struct kthread *to_kthread(struct task_struct *k)
71 {
72 	WARN_ON(!(k->flags & PF_KTHREAD));
73 	return (__force void *)k->set_child_tid;
74 }
75 
76 void free_kthread_struct(struct task_struct *k)
77 {
78 	struct kthread *kthread;
79 
80 	/*
81 	 * Can be NULL if this kthread was created by kernel_thread()
82 	 * or if kmalloc() in kthread() failed.
83 	 */
84 	kthread = to_kthread(k);
85 #ifdef CONFIG_BLK_CGROUP
86 	WARN_ON_ONCE(kthread && kthread->blkcg_css);
87 #endif
88 	kfree(kthread);
89 }
90 
91 /**
92  * kthread_should_stop - should this kthread return now?
93  *
94  * When someone calls kthread_stop() on your kthread, it will be woken
95  * and this will return true.  You should then return, and your return
96  * value will be passed through to kthread_stop().
97  */
98 bool kthread_should_stop(void)
99 {
100 	return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags);
101 }
102 EXPORT_SYMBOL(kthread_should_stop);
103 
104 bool __kthread_should_park(struct task_struct *k)
105 {
106 	return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(k)->flags);
107 }
108 EXPORT_SYMBOL_GPL(__kthread_should_park);
109 
110 /**
111  * kthread_should_park - should this kthread park now?
112  *
113  * When someone calls kthread_park() on your kthread, it will be woken
114  * and this will return true.  You should then do the necessary
115  * cleanup and call kthread_parkme()
116  *
117  * Similar to kthread_should_stop(), but this keeps the thread alive
118  * and in a park position. kthread_unpark() "restarts" the thread and
119  * calls the thread function again.
120  */
121 bool kthread_should_park(void)
122 {
123 	return __kthread_should_park(current);
124 }
125 EXPORT_SYMBOL_GPL(kthread_should_park);
126 
127 /**
128  * kthread_freezable_should_stop - should this freezable kthread return now?
129  * @was_frozen: optional out parameter, indicates whether %current was frozen
130  *
131  * kthread_should_stop() for freezable kthreads, which will enter
132  * refrigerator if necessary.  This function is safe from kthread_stop() /
133  * freezer deadlock and freezable kthreads should use this function instead
134  * of calling try_to_freeze() directly.
135  */
136 bool kthread_freezable_should_stop(bool *was_frozen)
137 {
138 	bool frozen = false;
139 
140 	might_sleep();
141 
142 	if (unlikely(freezing(current)))
143 		frozen = __refrigerator(true);
144 
145 	if (was_frozen)
146 		*was_frozen = frozen;
147 
148 	return kthread_should_stop();
149 }
150 EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
151 
152 /**
153  * kthread_data - return data value specified on kthread creation
154  * @task: kthread task in question
155  *
156  * Return the data value specified when kthread @task was created.
157  * The caller is responsible for ensuring the validity of @task when
158  * calling this function.
159  */
160 void *kthread_data(struct task_struct *task)
161 {
162 	return to_kthread(task)->data;
163 }
164 
165 /**
166  * kthread_probe_data - speculative version of kthread_data()
167  * @task: possible kthread task in question
168  *
169  * @task could be a kthread task.  Return the data value specified when it
170  * was created if accessible.  If @task isn't a kthread task or its data is
171  * inaccessible for any reason, %NULL is returned.  This function requires
172  * that @task itself is safe to dereference.
173  */
174 void *kthread_probe_data(struct task_struct *task)
175 {
176 	struct kthread *kthread = to_kthread(task);
177 	void *data = NULL;
178 
179 	probe_kernel_read(&data, &kthread->data, sizeof(data));
180 	return data;
181 }
182 
183 static void __kthread_parkme(struct kthread *self)
184 {
185 	for (;;) {
186 		/*
187 		 * TASK_PARKED is a special state; we must serialize against
188 		 * possible pending wakeups to avoid store-store collisions on
189 		 * task->state.
190 		 *
191 		 * Such a collision might possibly result in the task state
192 		 * changin from TASK_PARKED and us failing the
193 		 * wait_task_inactive() in kthread_park().
194 		 */
195 		set_special_state(TASK_PARKED);
196 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
197 			break;
198 
199 		complete(&self->parked);
200 		schedule();
201 	}
202 	__set_current_state(TASK_RUNNING);
203 }
204 
205 void kthread_parkme(void)
206 {
207 	__kthread_parkme(to_kthread(current));
208 }
209 EXPORT_SYMBOL_GPL(kthread_parkme);
210 
211 static int kthread(void *_create)
212 {
213 	/* Copy data: it's on kthread's stack */
214 	struct kthread_create_info *create = _create;
215 	int (*threadfn)(void *data) = create->threadfn;
216 	void *data = create->data;
217 	struct completion *done;
218 	struct kthread *self;
219 	int ret;
220 
221 	self = kzalloc(sizeof(*self), GFP_KERNEL);
222 	set_kthread_struct(self);
223 
224 	/* If user was SIGKILLed, I release the structure. */
225 	done = xchg(&create->done, NULL);
226 	if (!done) {
227 		kfree(create);
228 		do_exit(-EINTR);
229 	}
230 
231 	if (!self) {
232 		create->result = ERR_PTR(-ENOMEM);
233 		complete(done);
234 		do_exit(-ENOMEM);
235 	}
236 
237 	self->data = data;
238 	init_completion(&self->exited);
239 	init_completion(&self->parked);
240 	current->vfork_done = &self->exited;
241 
242 	/* OK, tell user we're spawned, wait for stop or wakeup */
243 	__set_current_state(TASK_UNINTERRUPTIBLE);
244 	create->result = current;
245 	complete(done);
246 	schedule();
247 
248 	ret = -EINTR;
249 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
250 		cgroup_kthread_ready();
251 		__kthread_parkme(self);
252 		ret = threadfn(data);
253 	}
254 	do_exit(ret);
255 }
256 
257 /* called from do_fork() to get node information for about to be created task */
258 int tsk_fork_get_node(struct task_struct *tsk)
259 {
260 #ifdef CONFIG_NUMA
261 	if (tsk == kthreadd_task)
262 		return tsk->pref_node_fork;
263 #endif
264 	return NUMA_NO_NODE;
265 }
266 
267 static void create_kthread(struct kthread_create_info *create)
268 {
269 	int pid;
270 
271 #ifdef CONFIG_NUMA
272 	current->pref_node_fork = create->node;
273 #endif
274 	/* We want our own signal handler (we take no signals by default). */
275 	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
276 	if (pid < 0) {
277 		/* If user was SIGKILLed, I release the structure. */
278 		struct completion *done = xchg(&create->done, NULL);
279 
280 		if (!done) {
281 			kfree(create);
282 			return;
283 		}
284 		create->result = ERR_PTR(pid);
285 		complete(done);
286 	}
287 }
288 
289 static __printf(4, 0)
290 struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
291 						    void *data, int node,
292 						    const char namefmt[],
293 						    va_list args)
294 {
295 	DECLARE_COMPLETION_ONSTACK(done);
296 	struct task_struct *task;
297 	struct kthread_create_info *create = kmalloc(sizeof(*create),
298 						     GFP_KERNEL);
299 
300 	if (!create)
301 		return ERR_PTR(-ENOMEM);
302 	create->threadfn = threadfn;
303 	create->data = data;
304 	create->node = node;
305 	create->done = &done;
306 
307 	spin_lock(&kthread_create_lock);
308 	list_add_tail(&create->list, &kthread_create_list);
309 	spin_unlock(&kthread_create_lock);
310 
311 	wake_up_process(kthreadd_task);
312 	/*
313 	 * Wait for completion in killable state, for I might be chosen by
314 	 * the OOM killer while kthreadd is trying to allocate memory for
315 	 * new kernel thread.
316 	 */
317 	if (unlikely(wait_for_completion_killable(&done))) {
318 		/*
319 		 * If I was SIGKILLed before kthreadd (or new kernel thread)
320 		 * calls complete(), leave the cleanup of this structure to
321 		 * that thread.
322 		 */
323 		if (xchg(&create->done, NULL))
324 			return ERR_PTR(-EINTR);
325 		/*
326 		 * kthreadd (or new kernel thread) will call complete()
327 		 * shortly.
328 		 */
329 		wait_for_completion(&done);
330 	}
331 	task = create->result;
332 	if (!IS_ERR(task)) {
333 		static const struct sched_param param = { .sched_priority = 0 };
334 		char name[TASK_COMM_LEN];
335 
336 		/*
337 		 * task is already visible to other tasks, so updating
338 		 * COMM must be protected.
339 		 */
340 		vsnprintf(name, sizeof(name), namefmt, args);
341 		set_task_comm(task, name);
342 		/*
343 		 * root may have changed our (kthreadd's) priority or CPU mask.
344 		 * The kernel thread should not inherit these properties.
345 		 */
346 		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
347 		set_cpus_allowed_ptr(task, cpu_all_mask);
348 	}
349 	kfree(create);
350 	return task;
351 }
352 
353 /**
354  * kthread_create_on_node - create a kthread.
355  * @threadfn: the function to run until signal_pending(current).
356  * @data: data ptr for @threadfn.
357  * @node: task and thread structures for the thread are allocated on this node
358  * @namefmt: printf-style name for the thread.
359  *
360  * Description: This helper function creates and names a kernel
361  * thread.  The thread will be stopped: use wake_up_process() to start
362  * it.  See also kthread_run().  The new thread has SCHED_NORMAL policy and
363  * is affine to all CPUs.
364  *
365  * If thread is going to be bound on a particular cpu, give its node
366  * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
367  * When woken, the thread will run @threadfn() with @data as its
368  * argument. @threadfn() can either call do_exit() directly if it is a
369  * standalone thread for which no one will call kthread_stop(), or
370  * return when 'kthread_should_stop()' is true (which means
371  * kthread_stop() has been called).  The return value should be zero
372  * or a negative error number; it will be passed to kthread_stop().
373  *
374  * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
375  */
376 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
377 					   void *data, int node,
378 					   const char namefmt[],
379 					   ...)
380 {
381 	struct task_struct *task;
382 	va_list args;
383 
384 	va_start(args, namefmt);
385 	task = __kthread_create_on_node(threadfn, data, node, namefmt, args);
386 	va_end(args);
387 
388 	return task;
389 }
390 EXPORT_SYMBOL(kthread_create_on_node);
391 
392 static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
393 {
394 	unsigned long flags;
395 
396 	if (!wait_task_inactive(p, state)) {
397 		WARN_ON(1);
398 		return;
399 	}
400 
401 	/* It's safe because the task is inactive. */
402 	raw_spin_lock_irqsave(&p->pi_lock, flags);
403 	do_set_cpus_allowed(p, mask);
404 	p->flags |= PF_NO_SETAFFINITY;
405 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
406 }
407 
408 static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
409 {
410 	__kthread_bind_mask(p, cpumask_of(cpu), state);
411 }
412 
413 void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
414 {
415 	__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
416 }
417 
418 /**
419  * kthread_bind - bind a just-created kthread to a cpu.
420  * @p: thread created by kthread_create().
421  * @cpu: cpu (might not be online, must be possible) for @k to run on.
422  *
423  * Description: This function is equivalent to set_cpus_allowed(),
424  * except that @cpu doesn't need to be online, and the thread must be
425  * stopped (i.e., just returned from kthread_create()).
426  */
427 void kthread_bind(struct task_struct *p, unsigned int cpu)
428 {
429 	__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
430 }
431 EXPORT_SYMBOL(kthread_bind);
432 
433 /**
434  * kthread_create_on_cpu - Create a cpu bound kthread
435  * @threadfn: the function to run until signal_pending(current).
436  * @data: data ptr for @threadfn.
437  * @cpu: The cpu on which the thread should be bound,
438  * @namefmt: printf-style name for the thread. Format is restricted
439  *	     to "name.*%u". Code fills in cpu number.
440  *
441  * Description: This helper function creates and names a kernel thread
442  * The thread will be woken and put into park mode.
443  */
444 struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
445 					  void *data, unsigned int cpu,
446 					  const char *namefmt)
447 {
448 	struct task_struct *p;
449 
450 	p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt,
451 				   cpu);
452 	if (IS_ERR(p))
453 		return p;
454 	kthread_bind(p, cpu);
455 	/* CPU hotplug need to bind once again when unparking the thread. */
456 	set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
457 	to_kthread(p)->cpu = cpu;
458 	return p;
459 }
460 
461 /**
462  * kthread_unpark - unpark a thread created by kthread_create().
463  * @k:		thread created by kthread_create().
464  *
465  * Sets kthread_should_park() for @k to return false, wakes it, and
466  * waits for it to return. If the thread is marked percpu then its
467  * bound to the cpu again.
468  */
469 void kthread_unpark(struct task_struct *k)
470 {
471 	struct kthread *kthread = to_kthread(k);
472 
473 	/*
474 	 * Newly created kthread was parked when the CPU was offline.
475 	 * The binding was lost and we need to set it again.
476 	 */
477 	if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
478 		__kthread_bind(k, kthread->cpu, TASK_PARKED);
479 
480 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
481 	/*
482 	 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
483 	 */
484 	wake_up_state(k, TASK_PARKED);
485 }
486 EXPORT_SYMBOL_GPL(kthread_unpark);
487 
488 /**
489  * kthread_park - park a thread created by kthread_create().
490  * @k: thread created by kthread_create().
491  *
492  * Sets kthread_should_park() for @k to return true, wakes it, and
493  * waits for it to return. This can also be called after kthread_create()
494  * instead of calling wake_up_process(): the thread will park without
495  * calling threadfn().
496  *
497  * Returns 0 if the thread is parked, -ENOSYS if the thread exited.
498  * If called by the kthread itself just the park bit is set.
499  */
500 int kthread_park(struct task_struct *k)
501 {
502 	struct kthread *kthread = to_kthread(k);
503 
504 	if (WARN_ON(k->flags & PF_EXITING))
505 		return -ENOSYS;
506 
507 	if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
508 		return -EBUSY;
509 
510 	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
511 	if (k != current) {
512 		wake_up_process(k);
513 		/*
514 		 * Wait for __kthread_parkme() to complete(), this means we
515 		 * _will_ have TASK_PARKED and are about to call schedule().
516 		 */
517 		wait_for_completion(&kthread->parked);
518 		/*
519 		 * Now wait for that schedule() to complete and the task to
520 		 * get scheduled out.
521 		 */
522 		WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
523 	}
524 
525 	return 0;
526 }
527 EXPORT_SYMBOL_GPL(kthread_park);
528 
529 /**
530  * kthread_stop - stop a thread created by kthread_create().
531  * @k: thread created by kthread_create().
532  *
533  * Sets kthread_should_stop() for @k to return true, wakes it, and
534  * waits for it to exit. This can also be called after kthread_create()
535  * instead of calling wake_up_process(): the thread will exit without
536  * calling threadfn().
537  *
538  * If threadfn() may call do_exit() itself, the caller must ensure
539  * task_struct can't go away.
540  *
541  * Returns the result of threadfn(), or %-EINTR if wake_up_process()
542  * was never called.
543  */
544 int kthread_stop(struct task_struct *k)
545 {
546 	struct kthread *kthread;
547 	int ret;
548 
549 	trace_sched_kthread_stop(k);
550 
551 	get_task_struct(k);
552 	kthread = to_kthread(k);
553 	set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
554 	kthread_unpark(k);
555 	wake_up_process(k);
556 	wait_for_completion(&kthread->exited);
557 	ret = k->exit_code;
558 	put_task_struct(k);
559 
560 	trace_sched_kthread_stop_ret(ret);
561 	return ret;
562 }
563 EXPORT_SYMBOL(kthread_stop);
564 
565 int kthreadd(void *unused)
566 {
567 	struct task_struct *tsk = current;
568 
569 	/* Setup a clean context for our children to inherit. */
570 	set_task_comm(tsk, "kthreadd");
571 	ignore_signals(tsk);
572 	set_cpus_allowed_ptr(tsk, cpu_all_mask);
573 	set_mems_allowed(node_states[N_MEMORY]);
574 
575 	current->flags |= PF_NOFREEZE;
576 	cgroup_init_kthreadd();
577 
578 	for (;;) {
579 		set_current_state(TASK_INTERRUPTIBLE);
580 		if (list_empty(&kthread_create_list))
581 			schedule();
582 		__set_current_state(TASK_RUNNING);
583 
584 		spin_lock(&kthread_create_lock);
585 		while (!list_empty(&kthread_create_list)) {
586 			struct kthread_create_info *create;
587 
588 			create = list_entry(kthread_create_list.next,
589 					    struct kthread_create_info, list);
590 			list_del_init(&create->list);
591 			spin_unlock(&kthread_create_lock);
592 
593 			create_kthread(create);
594 
595 			spin_lock(&kthread_create_lock);
596 		}
597 		spin_unlock(&kthread_create_lock);
598 	}
599 
600 	return 0;
601 }
602 
603 void __kthread_init_worker(struct kthread_worker *worker,
604 				const char *name,
605 				struct lock_class_key *key)
606 {
607 	memset(worker, 0, sizeof(struct kthread_worker));
608 	raw_spin_lock_init(&worker->lock);
609 	lockdep_set_class_and_name(&worker->lock, key, name);
610 	INIT_LIST_HEAD(&worker->work_list);
611 	INIT_LIST_HEAD(&worker->delayed_work_list);
612 }
613 EXPORT_SYMBOL_GPL(__kthread_init_worker);
614 
615 /**
616  * kthread_worker_fn - kthread function to process kthread_worker
617  * @worker_ptr: pointer to initialized kthread_worker
618  *
619  * This function implements the main cycle of kthread worker. It processes
620  * work_list until it is stopped with kthread_stop(). It sleeps when the queue
621  * is empty.
622  *
623  * The works are not allowed to keep any locks, disable preemption or interrupts
624  * when they finish. There is defined a safe point for freezing when one work
625  * finishes and before a new one is started.
626  *
627  * Also the works must not be handled by more than one worker at the same time,
628  * see also kthread_queue_work().
629  */
630 int kthread_worker_fn(void *worker_ptr)
631 {
632 	struct kthread_worker *worker = worker_ptr;
633 	struct kthread_work *work;
634 
635 	/*
636 	 * FIXME: Update the check and remove the assignment when all kthread
637 	 * worker users are created using kthread_create_worker*() functions.
638 	 */
639 	WARN_ON(worker->task && worker->task != current);
640 	worker->task = current;
641 
642 	if (worker->flags & KTW_FREEZABLE)
643 		set_freezable();
644 
645 repeat:
646 	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
647 
648 	if (kthread_should_stop()) {
649 		__set_current_state(TASK_RUNNING);
650 		raw_spin_lock_irq(&worker->lock);
651 		worker->task = NULL;
652 		raw_spin_unlock_irq(&worker->lock);
653 		return 0;
654 	}
655 
656 	work = NULL;
657 	raw_spin_lock_irq(&worker->lock);
658 	if (!list_empty(&worker->work_list)) {
659 		work = list_first_entry(&worker->work_list,
660 					struct kthread_work, node);
661 		list_del_init(&work->node);
662 	}
663 	worker->current_work = work;
664 	raw_spin_unlock_irq(&worker->lock);
665 
666 	if (work) {
667 		__set_current_state(TASK_RUNNING);
668 		work->func(work);
669 	} else if (!freezing(current))
670 		schedule();
671 
672 	try_to_freeze();
673 	cond_resched();
674 	goto repeat;
675 }
676 EXPORT_SYMBOL_GPL(kthread_worker_fn);
677 
678 static __printf(3, 0) struct kthread_worker *
679 __kthread_create_worker(int cpu, unsigned int flags,
680 			const char namefmt[], va_list args)
681 {
682 	struct kthread_worker *worker;
683 	struct task_struct *task;
684 	int node = -1;
685 
686 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
687 	if (!worker)
688 		return ERR_PTR(-ENOMEM);
689 
690 	kthread_init_worker(worker);
691 
692 	if (cpu >= 0)
693 		node = cpu_to_node(cpu);
694 
695 	task = __kthread_create_on_node(kthread_worker_fn, worker,
696 						node, namefmt, args);
697 	if (IS_ERR(task))
698 		goto fail_task;
699 
700 	if (cpu >= 0)
701 		kthread_bind(task, cpu);
702 
703 	worker->flags = flags;
704 	worker->task = task;
705 	wake_up_process(task);
706 	return worker;
707 
708 fail_task:
709 	kfree(worker);
710 	return ERR_CAST(task);
711 }
712 
713 /**
714  * kthread_create_worker - create a kthread worker
715  * @flags: flags modifying the default behavior of the worker
716  * @namefmt: printf-style name for the kthread worker (task).
717  *
718  * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
719  * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
720  * when the worker was SIGKILLed.
721  */
722 struct kthread_worker *
723 kthread_create_worker(unsigned int flags, const char namefmt[], ...)
724 {
725 	struct kthread_worker *worker;
726 	va_list args;
727 
728 	va_start(args, namefmt);
729 	worker = __kthread_create_worker(-1, flags, namefmt, args);
730 	va_end(args);
731 
732 	return worker;
733 }
734 EXPORT_SYMBOL(kthread_create_worker);
735 
736 /**
737  * kthread_create_worker_on_cpu - create a kthread worker and bind it
738  *	it to a given CPU and the associated NUMA node.
739  * @cpu: CPU number
740  * @flags: flags modifying the default behavior of the worker
741  * @namefmt: printf-style name for the kthread worker (task).
742  *
743  * Use a valid CPU number if you want to bind the kthread worker
744  * to the given CPU and the associated NUMA node.
745  *
746  * A good practice is to add the cpu number also into the worker name.
747  * For example, use kthread_create_worker_on_cpu(cpu, "helper/%d", cpu).
748  *
749  * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
750  * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
751  * when the worker was SIGKILLed.
752  */
753 struct kthread_worker *
754 kthread_create_worker_on_cpu(int cpu, unsigned int flags,
755 			     const char namefmt[], ...)
756 {
757 	struct kthread_worker *worker;
758 	va_list args;
759 
760 	va_start(args, namefmt);
761 	worker = __kthread_create_worker(cpu, flags, namefmt, args);
762 	va_end(args);
763 
764 	return worker;
765 }
766 EXPORT_SYMBOL(kthread_create_worker_on_cpu);
767 
768 /*
769  * Returns true when the work could not be queued at the moment.
770  * It happens when it is already pending in a worker list
771  * or when it is being cancelled.
772  */
773 static inline bool queuing_blocked(struct kthread_worker *worker,
774 				   struct kthread_work *work)
775 {
776 	lockdep_assert_held(&worker->lock);
777 
778 	return !list_empty(&work->node) || work->canceling;
779 }
780 
781 static void kthread_insert_work_sanity_check(struct kthread_worker *worker,
782 					     struct kthread_work *work)
783 {
784 	lockdep_assert_held(&worker->lock);
785 	WARN_ON_ONCE(!list_empty(&work->node));
786 	/* Do not use a work with >1 worker, see kthread_queue_work() */
787 	WARN_ON_ONCE(work->worker && work->worker != worker);
788 }
789 
790 /* insert @work before @pos in @worker */
791 static void kthread_insert_work(struct kthread_worker *worker,
792 				struct kthread_work *work,
793 				struct list_head *pos)
794 {
795 	kthread_insert_work_sanity_check(worker, work);
796 
797 	list_add_tail(&work->node, pos);
798 	work->worker = worker;
799 	if (!worker->current_work && likely(worker->task))
800 		wake_up_process(worker->task);
801 }
802 
803 /**
804  * kthread_queue_work - queue a kthread_work
805  * @worker: target kthread_worker
806  * @work: kthread_work to queue
807  *
808  * Queue @work to work processor @task for async execution.  @task
809  * must have been created with kthread_worker_create().  Returns %true
810  * if @work was successfully queued, %false if it was already pending.
811  *
812  * Reinitialize the work if it needs to be used by another worker.
813  * For example, when the worker was stopped and started again.
814  */
815 bool kthread_queue_work(struct kthread_worker *worker,
816 			struct kthread_work *work)
817 {
818 	bool ret = false;
819 	unsigned long flags;
820 
821 	raw_spin_lock_irqsave(&worker->lock, flags);
822 	if (!queuing_blocked(worker, work)) {
823 		kthread_insert_work(worker, work, &worker->work_list);
824 		ret = true;
825 	}
826 	raw_spin_unlock_irqrestore(&worker->lock, flags);
827 	return ret;
828 }
829 EXPORT_SYMBOL_GPL(kthread_queue_work);
830 
831 /**
832  * kthread_delayed_work_timer_fn - callback that queues the associated kthread
833  *	delayed work when the timer expires.
834  * @t: pointer to the expired timer
835  *
836  * The format of the function is defined by struct timer_list.
837  * It should have been called from irqsafe timer with irq already off.
838  */
839 void kthread_delayed_work_timer_fn(struct timer_list *t)
840 {
841 	struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
842 	struct kthread_work *work = &dwork->work;
843 	struct kthread_worker *worker = work->worker;
844 	unsigned long flags;
845 
846 	/*
847 	 * This might happen when a pending work is reinitialized.
848 	 * It means that it is used a wrong way.
849 	 */
850 	if (WARN_ON_ONCE(!worker))
851 		return;
852 
853 	raw_spin_lock_irqsave(&worker->lock, flags);
854 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
855 	WARN_ON_ONCE(work->worker != worker);
856 
857 	/* Move the work from worker->delayed_work_list. */
858 	WARN_ON_ONCE(list_empty(&work->node));
859 	list_del_init(&work->node);
860 	kthread_insert_work(worker, work, &worker->work_list);
861 
862 	raw_spin_unlock_irqrestore(&worker->lock, flags);
863 }
864 EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
865 
866 void __kthread_queue_delayed_work(struct kthread_worker *worker,
867 				  struct kthread_delayed_work *dwork,
868 				  unsigned long delay)
869 {
870 	struct timer_list *timer = &dwork->timer;
871 	struct kthread_work *work = &dwork->work;
872 
873 	WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn);
874 
875 	/*
876 	 * If @delay is 0, queue @dwork->work immediately.  This is for
877 	 * both optimization and correctness.  The earliest @timer can
878 	 * expire is on the closest next tick and delayed_work users depend
879 	 * on that there's no such delay when @delay is 0.
880 	 */
881 	if (!delay) {
882 		kthread_insert_work(worker, work, &worker->work_list);
883 		return;
884 	}
885 
886 	/* Be paranoid and try to detect possible races already now. */
887 	kthread_insert_work_sanity_check(worker, work);
888 
889 	list_add(&work->node, &worker->delayed_work_list);
890 	work->worker = worker;
891 	timer->expires = jiffies + delay;
892 	add_timer(timer);
893 }
894 
895 /**
896  * kthread_queue_delayed_work - queue the associated kthread work
897  *	after a delay.
898  * @worker: target kthread_worker
899  * @dwork: kthread_delayed_work to queue
900  * @delay: number of jiffies to wait before queuing
901  *
902  * If the work has not been pending it starts a timer that will queue
903  * the work after the given @delay. If @delay is zero, it queues the
904  * work immediately.
905  *
906  * Return: %false if the @work has already been pending. It means that
907  * either the timer was running or the work was queued. It returns %true
908  * otherwise.
909  */
910 bool kthread_queue_delayed_work(struct kthread_worker *worker,
911 				struct kthread_delayed_work *dwork,
912 				unsigned long delay)
913 {
914 	struct kthread_work *work = &dwork->work;
915 	unsigned long flags;
916 	bool ret = false;
917 
918 	raw_spin_lock_irqsave(&worker->lock, flags);
919 
920 	if (!queuing_blocked(worker, work)) {
921 		__kthread_queue_delayed_work(worker, dwork, delay);
922 		ret = true;
923 	}
924 
925 	raw_spin_unlock_irqrestore(&worker->lock, flags);
926 	return ret;
927 }
928 EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
929 
930 struct kthread_flush_work {
931 	struct kthread_work	work;
932 	struct completion	done;
933 };
934 
935 static void kthread_flush_work_fn(struct kthread_work *work)
936 {
937 	struct kthread_flush_work *fwork =
938 		container_of(work, struct kthread_flush_work, work);
939 	complete(&fwork->done);
940 }
941 
942 /**
943  * kthread_flush_work - flush a kthread_work
944  * @work: work to flush
945  *
946  * If @work is queued or executing, wait for it to finish execution.
947  */
948 void kthread_flush_work(struct kthread_work *work)
949 {
950 	struct kthread_flush_work fwork = {
951 		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
952 		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
953 	};
954 	struct kthread_worker *worker;
955 	bool noop = false;
956 
957 	worker = work->worker;
958 	if (!worker)
959 		return;
960 
961 	raw_spin_lock_irq(&worker->lock);
962 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
963 	WARN_ON_ONCE(work->worker != worker);
964 
965 	if (!list_empty(&work->node))
966 		kthread_insert_work(worker, &fwork.work, work->node.next);
967 	else if (worker->current_work == work)
968 		kthread_insert_work(worker, &fwork.work,
969 				    worker->work_list.next);
970 	else
971 		noop = true;
972 
973 	raw_spin_unlock_irq(&worker->lock);
974 
975 	if (!noop)
976 		wait_for_completion(&fwork.done);
977 }
978 EXPORT_SYMBOL_GPL(kthread_flush_work);
979 
980 /*
981  * This function removes the work from the worker queue. Also it makes sure
982  * that it won't get queued later via the delayed work's timer.
983  *
984  * The work might still be in use when this function finishes. See the
985  * current_work proceed by the worker.
986  *
987  * Return: %true if @work was pending and successfully canceled,
988  *	%false if @work was not pending
989  */
990 static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
991 				  unsigned long *flags)
992 {
993 	/* Try to cancel the timer if exists. */
994 	if (is_dwork) {
995 		struct kthread_delayed_work *dwork =
996 			container_of(work, struct kthread_delayed_work, work);
997 		struct kthread_worker *worker = work->worker;
998 
999 		/*
1000 		 * del_timer_sync() must be called to make sure that the timer
1001 		 * callback is not running. The lock must be temporary released
1002 		 * to avoid a deadlock with the callback. In the meantime,
1003 		 * any queuing is blocked by setting the canceling counter.
1004 		 */
1005 		work->canceling++;
1006 		raw_spin_unlock_irqrestore(&worker->lock, *flags);
1007 		del_timer_sync(&dwork->timer);
1008 		raw_spin_lock_irqsave(&worker->lock, *flags);
1009 		work->canceling--;
1010 	}
1011 
1012 	/*
1013 	 * Try to remove the work from a worker list. It might either
1014 	 * be from worker->work_list or from worker->delayed_work_list.
1015 	 */
1016 	if (!list_empty(&work->node)) {
1017 		list_del_init(&work->node);
1018 		return true;
1019 	}
1020 
1021 	return false;
1022 }
1023 
1024 /**
1025  * kthread_mod_delayed_work - modify delay of or queue a kthread delayed work
1026  * @worker: kthread worker to use
1027  * @dwork: kthread delayed work to queue
1028  * @delay: number of jiffies to wait before queuing
1029  *
1030  * If @dwork is idle, equivalent to kthread_queue_delayed_work(). Otherwise,
1031  * modify @dwork's timer so that it expires after @delay. If @delay is zero,
1032  * @work is guaranteed to be queued immediately.
1033  *
1034  * Return: %true if @dwork was pending and its timer was modified,
1035  * %false otherwise.
1036  *
1037  * A special case is when the work is being canceled in parallel.
1038  * It might be caused either by the real kthread_cancel_delayed_work_sync()
1039  * or yet another kthread_mod_delayed_work() call. We let the other command
1040  * win and return %false here. The caller is supposed to synchronize these
1041  * operations a reasonable way.
1042  *
1043  * This function is safe to call from any context including IRQ handler.
1044  * See __kthread_cancel_work() and kthread_delayed_work_timer_fn()
1045  * for details.
1046  */
1047 bool kthread_mod_delayed_work(struct kthread_worker *worker,
1048 			      struct kthread_delayed_work *dwork,
1049 			      unsigned long delay)
1050 {
1051 	struct kthread_work *work = &dwork->work;
1052 	unsigned long flags;
1053 	int ret = false;
1054 
1055 	raw_spin_lock_irqsave(&worker->lock, flags);
1056 
1057 	/* Do not bother with canceling when never queued. */
1058 	if (!work->worker)
1059 		goto fast_queue;
1060 
1061 	/* Work must not be used with >1 worker, see kthread_queue_work() */
1062 	WARN_ON_ONCE(work->worker != worker);
1063 
1064 	/* Do not fight with another command that is canceling this work. */
1065 	if (work->canceling)
1066 		goto out;
1067 
1068 	ret = __kthread_cancel_work(work, true, &flags);
1069 fast_queue:
1070 	__kthread_queue_delayed_work(worker, dwork, delay);
1071 out:
1072 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1073 	return ret;
1074 }
1075 EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
1076 
1077 static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
1078 {
1079 	struct kthread_worker *worker = work->worker;
1080 	unsigned long flags;
1081 	int ret = false;
1082 
1083 	if (!worker)
1084 		goto out;
1085 
1086 	raw_spin_lock_irqsave(&worker->lock, flags);
1087 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
1088 	WARN_ON_ONCE(work->worker != worker);
1089 
1090 	ret = __kthread_cancel_work(work, is_dwork, &flags);
1091 
1092 	if (worker->current_work != work)
1093 		goto out_fast;
1094 
1095 	/*
1096 	 * The work is in progress and we need to wait with the lock released.
1097 	 * In the meantime, block any queuing by setting the canceling counter.
1098 	 */
1099 	work->canceling++;
1100 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1101 	kthread_flush_work(work);
1102 	raw_spin_lock_irqsave(&worker->lock, flags);
1103 	work->canceling--;
1104 
1105 out_fast:
1106 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1107 out:
1108 	return ret;
1109 }
1110 
1111 /**
1112  * kthread_cancel_work_sync - cancel a kthread work and wait for it to finish
1113  * @work: the kthread work to cancel
1114  *
1115  * Cancel @work and wait for its execution to finish.  This function
1116  * can be used even if the work re-queues itself. On return from this
1117  * function, @work is guaranteed to be not pending or executing on any CPU.
1118  *
1119  * kthread_cancel_work_sync(&delayed_work->work) must not be used for
1120  * delayed_work's. Use kthread_cancel_delayed_work_sync() instead.
1121  *
1122  * The caller must ensure that the worker on which @work was last
1123  * queued can't be destroyed before this function returns.
1124  *
1125  * Return: %true if @work was pending, %false otherwise.
1126  */
1127 bool kthread_cancel_work_sync(struct kthread_work *work)
1128 {
1129 	return __kthread_cancel_work_sync(work, false);
1130 }
1131 EXPORT_SYMBOL_GPL(kthread_cancel_work_sync);
1132 
1133 /**
1134  * kthread_cancel_delayed_work_sync - cancel a kthread delayed work and
1135  *	wait for it to finish.
1136  * @dwork: the kthread delayed work to cancel
1137  *
1138  * This is kthread_cancel_work_sync() for delayed works.
1139  *
1140  * Return: %true if @dwork was pending, %false otherwise.
1141  */
1142 bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *dwork)
1143 {
1144 	return __kthread_cancel_work_sync(&dwork->work, true);
1145 }
1146 EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);
1147 
1148 /**
1149  * kthread_flush_worker - flush all current works on a kthread_worker
1150  * @worker: worker to flush
1151  *
1152  * Wait until all currently executing or pending works on @worker are
1153  * finished.
1154  */
1155 void kthread_flush_worker(struct kthread_worker *worker)
1156 {
1157 	struct kthread_flush_work fwork = {
1158 		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
1159 		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
1160 	};
1161 
1162 	kthread_queue_work(worker, &fwork.work);
1163 	wait_for_completion(&fwork.done);
1164 }
1165 EXPORT_SYMBOL_GPL(kthread_flush_worker);
1166 
1167 /**
1168  * kthread_destroy_worker - destroy a kthread worker
1169  * @worker: worker to be destroyed
1170  *
1171  * Flush and destroy @worker.  The simple flush is enough because the kthread
1172  * worker API is used only in trivial scenarios.  There are no multi-step state
1173  * machines needed.
1174  */
1175 void kthread_destroy_worker(struct kthread_worker *worker)
1176 {
1177 	struct task_struct *task;
1178 
1179 	task = worker->task;
1180 	if (WARN_ON(!task))
1181 		return;
1182 
1183 	kthread_flush_worker(worker);
1184 	kthread_stop(task);
1185 	WARN_ON(!list_empty(&worker->work_list));
1186 	kfree(worker);
1187 }
1188 EXPORT_SYMBOL(kthread_destroy_worker);
1189 
1190 #ifdef CONFIG_BLK_CGROUP
1191 /**
1192  * kthread_associate_blkcg - associate blkcg to current kthread
1193  * @css: the cgroup info
1194  *
1195  * Current thread must be a kthread. The thread is running jobs on behalf of
1196  * other threads. In some cases, we expect the jobs attach cgroup info of
1197  * original threads instead of that of current thread. This function stores
1198  * original thread's cgroup info in current kthread context for later
1199  * retrieval.
1200  */
1201 void kthread_associate_blkcg(struct cgroup_subsys_state *css)
1202 {
1203 	struct kthread *kthread;
1204 
1205 	if (!(current->flags & PF_KTHREAD))
1206 		return;
1207 	kthread = to_kthread(current);
1208 	if (!kthread)
1209 		return;
1210 
1211 	if (kthread->blkcg_css) {
1212 		css_put(kthread->blkcg_css);
1213 		kthread->blkcg_css = NULL;
1214 	}
1215 	if (css) {
1216 		css_get(css);
1217 		kthread->blkcg_css = css;
1218 	}
1219 }
1220 EXPORT_SYMBOL(kthread_associate_blkcg);
1221 
1222 /**
1223  * kthread_blkcg - get associated blkcg css of current kthread
1224  *
1225  * Current thread must be a kthread.
1226  */
1227 struct cgroup_subsys_state *kthread_blkcg(void)
1228 {
1229 	struct kthread *kthread;
1230 
1231 	if (current->flags & PF_KTHREAD) {
1232 		kthread = to_kthread(current);
1233 		if (kthread)
1234 			return kthread->blkcg_css;
1235 	}
1236 	return NULL;
1237 }
1238 EXPORT_SYMBOL(kthread_blkcg);
1239 #endif
1240