xref: /linux-6.15/kernel/workqueue.c (revision affee4b2)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * linux/kernel/workqueue.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Generic mechanism for defining kernel helper threads for running
51da177e4SLinus Torvalds  * arbitrary tasks in process context.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * Started by Ingo Molnar, Copyright (C) 2002
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  * Derived from the taskqueue/keventd code by:
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *   David Woodhouse <[email protected]>
12e1f8e874SFrancois Cami  *   Andrew Morton
131da177e4SLinus Torvalds  *   Kai Petzke <[email protected]>
141da177e4SLinus Torvalds  *   Theodore Ts'o <[email protected]>
1589ada679SChristoph Lameter  *
16cde53535SChristoph Lameter  * Made to use alloc_percpu by Christoph Lameter.
171da177e4SLinus Torvalds  */
181da177e4SLinus Torvalds 
191da177e4SLinus Torvalds #include <linux/module.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/sched.h>
221da177e4SLinus Torvalds #include <linux/init.h>
231da177e4SLinus Torvalds #include <linux/signal.h>
241da177e4SLinus Torvalds #include <linux/completion.h>
251da177e4SLinus Torvalds #include <linux/workqueue.h>
261da177e4SLinus Torvalds #include <linux/slab.h>
271da177e4SLinus Torvalds #include <linux/cpu.h>
281da177e4SLinus Torvalds #include <linux/notifier.h>
291da177e4SLinus Torvalds #include <linux/kthread.h>
301fa44ecaSJames Bottomley #include <linux/hardirq.h>
3146934023SChristoph Lameter #include <linux/mempolicy.h>
32341a5958SRafael J. Wysocki #include <linux/freezer.h>
33d5abe669SPeter Zijlstra #include <linux/kallsyms.h>
34d5abe669SPeter Zijlstra #include <linux/debug_locks.h>
354e6045f1SJohannes Berg #include <linux/lockdep.h>
36c34056a3STejun Heo #include <linux/idr.h>
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds /*
394690c4abSTejun Heo  * Structure fields follow one of the following exclusion rules.
404690c4abSTejun Heo  *
414690c4abSTejun Heo  * I: Set during initialization and read-only afterwards.
424690c4abSTejun Heo  *
434690c4abSTejun Heo  * L: cwq->lock protected.  Access with cwq->lock held.
444690c4abSTejun Heo  *
4573f53c4aSTejun Heo  * F: wq->flush_mutex protected.
4673f53c4aSTejun Heo  *
474690c4abSTejun Heo  * W: workqueue_lock protected.
484690c4abSTejun Heo  */
494690c4abSTejun Heo 
50c34056a3STejun Heo struct cpu_workqueue_struct;
51c34056a3STejun Heo 
52c34056a3STejun Heo struct worker {
53c34056a3STejun Heo 	struct work_struct	*current_work;	/* L: work being processed */
54*affee4b2STejun Heo 	struct list_head	scheduled;	/* L: scheduled works */
55c34056a3STejun Heo 	struct task_struct	*task;		/* I: worker task */
56c34056a3STejun Heo 	struct cpu_workqueue_struct *cwq;	/* I: the associated cwq */
57c34056a3STejun Heo 	int			id;		/* I: worker id */
58c34056a3STejun Heo };
59c34056a3STejun Heo 
604690c4abSTejun Heo /*
61f756d5e2SNathan Lynch  * The per-CPU workqueue (if single thread, we always use the first
620f900049STejun Heo  * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
630f900049STejun Heo  * work_struct->data are used for flags and thus cwqs need to be
640f900049STejun Heo  * aligned at two's power of the number of flag bits.
651da177e4SLinus Torvalds  */
661da177e4SLinus Torvalds struct cpu_workqueue_struct {
671da177e4SLinus Torvalds 
681da177e4SLinus Torvalds 	spinlock_t lock;
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds 	struct list_head worklist;
711da177e4SLinus Torvalds 	wait_queue_head_t more_work;
721537663fSTejun Heo 	unsigned int		cpu;
73c34056a3STejun Heo 	struct worker		*worker;
741da177e4SLinus Torvalds 
754690c4abSTejun Heo 	struct workqueue_struct *wq;		/* I: the owning workqueue */
7673f53c4aSTejun Heo 	int			work_color;	/* L: current color */
7773f53c4aSTejun Heo 	int			flush_color;	/* L: flushing color */
7873f53c4aSTejun Heo 	int			nr_in_flight[WORK_NR_COLORS];
7973f53c4aSTejun Heo 						/* L: nr of in_flight works */
800f900049STejun Heo };
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds /*
8373f53c4aSTejun Heo  * Structure used to wait for workqueue flush.
8473f53c4aSTejun Heo  */
8573f53c4aSTejun Heo struct wq_flusher {
8673f53c4aSTejun Heo 	struct list_head	list;		/* F: list of flushers */
8773f53c4aSTejun Heo 	int			flush_color;	/* F: flush color waiting for */
8873f53c4aSTejun Heo 	struct completion	done;		/* flush completion */
8973f53c4aSTejun Heo };
9073f53c4aSTejun Heo 
9173f53c4aSTejun Heo /*
921da177e4SLinus Torvalds  * The externally visible workqueue abstraction is an array of
931da177e4SLinus Torvalds  * per-CPU workqueues:
941da177e4SLinus Torvalds  */
951da177e4SLinus Torvalds struct workqueue_struct {
9697e37d7bSTejun Heo 	unsigned int		flags;		/* I: WQ_* flags */
974690c4abSTejun Heo 	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
984690c4abSTejun Heo 	struct list_head	list;		/* W: list of all workqueues */
9973f53c4aSTejun Heo 
10073f53c4aSTejun Heo 	struct mutex		flush_mutex;	/* protects wq flushing */
10173f53c4aSTejun Heo 	int			work_color;	/* F: current work color */
10273f53c4aSTejun Heo 	int			flush_color;	/* F: current flush color */
10373f53c4aSTejun Heo 	atomic_t		nr_cwqs_to_flush; /* flush in progress */
10473f53c4aSTejun Heo 	struct wq_flusher	*first_flusher;	/* F: first flusher */
10573f53c4aSTejun Heo 	struct list_head	flusher_queue;	/* F: flush waiters */
10673f53c4aSTejun Heo 	struct list_head	flusher_overflow; /* F: flush overflow list */
10773f53c4aSTejun Heo 
1084690c4abSTejun Heo 	const char		*name;		/* I: workqueue name */
1094e6045f1SJohannes Berg #ifdef CONFIG_LOCKDEP
1104e6045f1SJohannes Berg 	struct lockdep_map	lockdep_map;
1114e6045f1SJohannes Berg #endif
1121da177e4SLinus Torvalds };
1131da177e4SLinus Torvalds 
114dc186ad7SThomas Gleixner #ifdef CONFIG_DEBUG_OBJECTS_WORK
115dc186ad7SThomas Gleixner 
116dc186ad7SThomas Gleixner static struct debug_obj_descr work_debug_descr;
117dc186ad7SThomas Gleixner 
118dc186ad7SThomas Gleixner /*
119dc186ad7SThomas Gleixner  * fixup_init is called when:
120dc186ad7SThomas Gleixner  * - an active object is initialized
121dc186ad7SThomas Gleixner  */
122dc186ad7SThomas Gleixner static int work_fixup_init(void *addr, enum debug_obj_state state)
123dc186ad7SThomas Gleixner {
124dc186ad7SThomas Gleixner 	struct work_struct *work = addr;
125dc186ad7SThomas Gleixner 
126dc186ad7SThomas Gleixner 	switch (state) {
127dc186ad7SThomas Gleixner 	case ODEBUG_STATE_ACTIVE:
128dc186ad7SThomas Gleixner 		cancel_work_sync(work);
129dc186ad7SThomas Gleixner 		debug_object_init(work, &work_debug_descr);
130dc186ad7SThomas Gleixner 		return 1;
131dc186ad7SThomas Gleixner 	default:
132dc186ad7SThomas Gleixner 		return 0;
133dc186ad7SThomas Gleixner 	}
134dc186ad7SThomas Gleixner }
135dc186ad7SThomas Gleixner 
136dc186ad7SThomas Gleixner /*
137dc186ad7SThomas Gleixner  * fixup_activate is called when:
138dc186ad7SThomas Gleixner  * - an active object is activated
139dc186ad7SThomas Gleixner  * - an unknown object is activated (might be a statically initialized object)
140dc186ad7SThomas Gleixner  */
141dc186ad7SThomas Gleixner static int work_fixup_activate(void *addr, enum debug_obj_state state)
142dc186ad7SThomas Gleixner {
143dc186ad7SThomas Gleixner 	struct work_struct *work = addr;
144dc186ad7SThomas Gleixner 
145dc186ad7SThomas Gleixner 	switch (state) {
146dc186ad7SThomas Gleixner 
147dc186ad7SThomas Gleixner 	case ODEBUG_STATE_NOTAVAILABLE:
148dc186ad7SThomas Gleixner 		/*
149dc186ad7SThomas Gleixner 		 * This is not really a fixup. The work struct was
150dc186ad7SThomas Gleixner 		 * statically initialized. We just make sure that it
151dc186ad7SThomas Gleixner 		 * is tracked in the object tracker.
152dc186ad7SThomas Gleixner 		 */
15322df02bbSTejun Heo 		if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
154dc186ad7SThomas Gleixner 			debug_object_init(work, &work_debug_descr);
155dc186ad7SThomas Gleixner 			debug_object_activate(work, &work_debug_descr);
156dc186ad7SThomas Gleixner 			return 0;
157dc186ad7SThomas Gleixner 		}
158dc186ad7SThomas Gleixner 		WARN_ON_ONCE(1);
159dc186ad7SThomas Gleixner 		return 0;
160dc186ad7SThomas Gleixner 
161dc186ad7SThomas Gleixner 	case ODEBUG_STATE_ACTIVE:
162dc186ad7SThomas Gleixner 		WARN_ON(1);
163dc186ad7SThomas Gleixner 
164dc186ad7SThomas Gleixner 	default:
165dc186ad7SThomas Gleixner 		return 0;
166dc186ad7SThomas Gleixner 	}
167dc186ad7SThomas Gleixner }
168dc186ad7SThomas Gleixner 
169dc186ad7SThomas Gleixner /*
170dc186ad7SThomas Gleixner  * fixup_free is called when:
171dc186ad7SThomas Gleixner  * - an active object is freed
172dc186ad7SThomas Gleixner  */
173dc186ad7SThomas Gleixner static int work_fixup_free(void *addr, enum debug_obj_state state)
174dc186ad7SThomas Gleixner {
175dc186ad7SThomas Gleixner 	struct work_struct *work = addr;
176dc186ad7SThomas Gleixner 
177dc186ad7SThomas Gleixner 	switch (state) {
178dc186ad7SThomas Gleixner 	case ODEBUG_STATE_ACTIVE:
179dc186ad7SThomas Gleixner 		cancel_work_sync(work);
180dc186ad7SThomas Gleixner 		debug_object_free(work, &work_debug_descr);
181dc186ad7SThomas Gleixner 		return 1;
182dc186ad7SThomas Gleixner 	default:
183dc186ad7SThomas Gleixner 		return 0;
184dc186ad7SThomas Gleixner 	}
185dc186ad7SThomas Gleixner }
186dc186ad7SThomas Gleixner 
187dc186ad7SThomas Gleixner static struct debug_obj_descr work_debug_descr = {
188dc186ad7SThomas Gleixner 	.name		= "work_struct",
189dc186ad7SThomas Gleixner 	.fixup_init	= work_fixup_init,
190dc186ad7SThomas Gleixner 	.fixup_activate	= work_fixup_activate,
191dc186ad7SThomas Gleixner 	.fixup_free	= work_fixup_free,
192dc186ad7SThomas Gleixner };
193dc186ad7SThomas Gleixner 
194dc186ad7SThomas Gleixner static inline void debug_work_activate(struct work_struct *work)
195dc186ad7SThomas Gleixner {
196dc186ad7SThomas Gleixner 	debug_object_activate(work, &work_debug_descr);
197dc186ad7SThomas Gleixner }
198dc186ad7SThomas Gleixner 
199dc186ad7SThomas Gleixner static inline void debug_work_deactivate(struct work_struct *work)
200dc186ad7SThomas Gleixner {
201dc186ad7SThomas Gleixner 	debug_object_deactivate(work, &work_debug_descr);
202dc186ad7SThomas Gleixner }
203dc186ad7SThomas Gleixner 
204dc186ad7SThomas Gleixner void __init_work(struct work_struct *work, int onstack)
205dc186ad7SThomas Gleixner {
206dc186ad7SThomas Gleixner 	if (onstack)
207dc186ad7SThomas Gleixner 		debug_object_init_on_stack(work, &work_debug_descr);
208dc186ad7SThomas Gleixner 	else
209dc186ad7SThomas Gleixner 		debug_object_init(work, &work_debug_descr);
210dc186ad7SThomas Gleixner }
211dc186ad7SThomas Gleixner EXPORT_SYMBOL_GPL(__init_work);
212dc186ad7SThomas Gleixner 
213dc186ad7SThomas Gleixner void destroy_work_on_stack(struct work_struct *work)
214dc186ad7SThomas Gleixner {
215dc186ad7SThomas Gleixner 	debug_object_free(work, &work_debug_descr);
216dc186ad7SThomas Gleixner }
217dc186ad7SThomas Gleixner EXPORT_SYMBOL_GPL(destroy_work_on_stack);
218dc186ad7SThomas Gleixner 
219dc186ad7SThomas Gleixner #else
220dc186ad7SThomas Gleixner static inline void debug_work_activate(struct work_struct *work) { }
221dc186ad7SThomas Gleixner static inline void debug_work_deactivate(struct work_struct *work) { }
222dc186ad7SThomas Gleixner #endif
223dc186ad7SThomas Gleixner 
22495402b38SGautham R Shenoy /* Serializes the accesses to the list of workqueues. */
22595402b38SGautham R Shenoy static DEFINE_SPINLOCK(workqueue_lock);
2261da177e4SLinus Torvalds static LIST_HEAD(workqueues);
227c34056a3STejun Heo static DEFINE_PER_CPU(struct ida, worker_ida);
228c34056a3STejun Heo 
229c34056a3STejun Heo static int worker_thread(void *__worker);
2301da177e4SLinus Torvalds 
2313af24433SOleg Nesterov static int singlethread_cpu __read_mostly;
232b1f4ec17SOleg Nesterov 
2334690c4abSTejun Heo static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
2344690c4abSTejun Heo 					    struct workqueue_struct *wq)
235a848e3b6SOleg Nesterov {
236a848e3b6SOleg Nesterov 	return per_cpu_ptr(wq->cpu_wq, cpu);
237a848e3b6SOleg Nesterov }
238a848e3b6SOleg Nesterov 
2391537663fSTejun Heo static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
2401537663fSTejun Heo 					       struct workqueue_struct *wq)
2411537663fSTejun Heo {
2421537663fSTejun Heo 	if (unlikely(wq->flags & WQ_SINGLE_THREAD))
2431537663fSTejun Heo 		cpu = singlethread_cpu;
2441537663fSTejun Heo 	return get_cwq(cpu, wq);
2451537663fSTejun Heo }
2461537663fSTejun Heo 
24773f53c4aSTejun Heo static unsigned int work_color_to_flags(int color)
24873f53c4aSTejun Heo {
24973f53c4aSTejun Heo 	return color << WORK_STRUCT_COLOR_SHIFT;
25073f53c4aSTejun Heo }
25173f53c4aSTejun Heo 
25273f53c4aSTejun Heo static int get_work_color(struct work_struct *work)
25373f53c4aSTejun Heo {
25473f53c4aSTejun Heo 	return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
25573f53c4aSTejun Heo 		((1 << WORK_STRUCT_COLOR_BITS) - 1);
25673f53c4aSTejun Heo }
25773f53c4aSTejun Heo 
25873f53c4aSTejun Heo static int work_next_color(int color)
25973f53c4aSTejun Heo {
26073f53c4aSTejun Heo 	return (color + 1) % WORK_NR_COLORS;
26173f53c4aSTejun Heo }
26273f53c4aSTejun Heo 
2634594bf15SDavid Howells /*
2644594bf15SDavid Howells  * Set the workqueue on which a work item is to be run
2654594bf15SDavid Howells  * - Must *only* be called if the pending flag is set
2664594bf15SDavid Howells  */
267ed7c0feeSOleg Nesterov static inline void set_wq_data(struct work_struct *work,
2684690c4abSTejun Heo 			       struct cpu_workqueue_struct *cwq,
2694690c4abSTejun Heo 			       unsigned long extra_flags)
270365970a1SDavid Howells {
2714594bf15SDavid Howells 	BUG_ON(!work_pending(work));
2724594bf15SDavid Howells 
2734690c4abSTejun Heo 	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
27422df02bbSTejun Heo 			WORK_STRUCT_PENDING | extra_flags);
275365970a1SDavid Howells }
276365970a1SDavid Howells 
2774d707b9fSOleg Nesterov /*
2784d707b9fSOleg Nesterov  * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
2794d707b9fSOleg Nesterov  */
2804d707b9fSOleg Nesterov static inline void clear_wq_data(struct work_struct *work)
2814d707b9fSOleg Nesterov {
2824690c4abSTejun Heo 	atomic_long_set(&work->data, work_static(work));
2834d707b9fSOleg Nesterov }
2844d707b9fSOleg Nesterov 
28564166699STejun Heo static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
286365970a1SDavid Howells {
28764166699STejun Heo 	return (void *)(atomic_long_read(&work->data) &
28864166699STejun Heo 			WORK_STRUCT_WQ_DATA_MASK);
289365970a1SDavid Howells }
290365970a1SDavid Howells 
2914690c4abSTejun Heo /**
2924690c4abSTejun Heo  * insert_work - insert a work into cwq
2934690c4abSTejun Heo  * @cwq: cwq @work belongs to
2944690c4abSTejun Heo  * @work: work to insert
2954690c4abSTejun Heo  * @head: insertion point
2964690c4abSTejun Heo  * @extra_flags: extra WORK_STRUCT_* flags to set
2974690c4abSTejun Heo  *
2984690c4abSTejun Heo  * Insert @work into @cwq after @head.
2994690c4abSTejun Heo  *
3004690c4abSTejun Heo  * CONTEXT:
3014690c4abSTejun Heo  * spin_lock_irq(cwq->lock).
3024690c4abSTejun Heo  */
303b89deed3SOleg Nesterov static void insert_work(struct cpu_workqueue_struct *cwq,
3044690c4abSTejun Heo 			struct work_struct *work, struct list_head *head,
3054690c4abSTejun Heo 			unsigned int extra_flags)
306b89deed3SOleg Nesterov {
3074690c4abSTejun Heo 	/* we own @work, set data and link */
3084690c4abSTejun Heo 	set_wq_data(work, cwq, extra_flags);
3094690c4abSTejun Heo 
3106e84d644SOleg Nesterov 	/*
3116e84d644SOleg Nesterov 	 * Ensure that we get the right work->data if we see the
3126e84d644SOleg Nesterov 	 * result of list_add() below, see try_to_grab_pending().
3136e84d644SOleg Nesterov 	 */
3146e84d644SOleg Nesterov 	smp_wmb();
3154690c4abSTejun Heo 
3161a4d9b0aSOleg Nesterov 	list_add_tail(&work->entry, head);
317b89deed3SOleg Nesterov 	wake_up(&cwq->more_work);
318b89deed3SOleg Nesterov }
319b89deed3SOleg Nesterov 
3204690c4abSTejun Heo static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
3211da177e4SLinus Torvalds 			 struct work_struct *work)
3221da177e4SLinus Torvalds {
3231537663fSTejun Heo 	struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
3241da177e4SLinus Torvalds 	unsigned long flags;
3251da177e4SLinus Torvalds 
326dc186ad7SThomas Gleixner 	debug_work_activate(work);
3271da177e4SLinus Torvalds 	spin_lock_irqsave(&cwq->lock, flags);
3284690c4abSTejun Heo 	BUG_ON(!list_empty(&work->entry));
32973f53c4aSTejun Heo 	cwq->nr_in_flight[cwq->work_color]++;
33073f53c4aSTejun Heo 	insert_work(cwq, work, &cwq->worklist,
33173f53c4aSTejun Heo 		    work_color_to_flags(cwq->work_color));
3321da177e4SLinus Torvalds 	spin_unlock_irqrestore(&cwq->lock, flags);
3331da177e4SLinus Torvalds }
3341da177e4SLinus Torvalds 
3350fcb78c2SRolf Eike Beer /**
3360fcb78c2SRolf Eike Beer  * queue_work - queue work on a workqueue
3370fcb78c2SRolf Eike Beer  * @wq: workqueue to use
3380fcb78c2SRolf Eike Beer  * @work: work to queue
3390fcb78c2SRolf Eike Beer  *
340057647fcSAlan Stern  * Returns 0 if @work was already on a queue, non-zero otherwise.
3411da177e4SLinus Torvalds  *
34200dfcaf7SOleg Nesterov  * We queue the work to the CPU on which it was submitted, but if the CPU dies
34300dfcaf7SOleg Nesterov  * it can be processed by another CPU.
3441da177e4SLinus Torvalds  */
3457ad5b3a5SHarvey Harrison int queue_work(struct workqueue_struct *wq, struct work_struct *work)
3461da177e4SLinus Torvalds {
347ef1ca236SOleg Nesterov 	int ret;
3481da177e4SLinus Torvalds 
349ef1ca236SOleg Nesterov 	ret = queue_work_on(get_cpu(), wq, work);
350a848e3b6SOleg Nesterov 	put_cpu();
351ef1ca236SOleg Nesterov 
3521da177e4SLinus Torvalds 	return ret;
3531da177e4SLinus Torvalds }
354ae90dd5dSDave Jones EXPORT_SYMBOL_GPL(queue_work);
3551da177e4SLinus Torvalds 
356c1a220e7SZhang Rui /**
357c1a220e7SZhang Rui  * queue_work_on - queue work on specific cpu
358c1a220e7SZhang Rui  * @cpu: CPU number to execute work on
359c1a220e7SZhang Rui  * @wq: workqueue to use
360c1a220e7SZhang Rui  * @work: work to queue
361c1a220e7SZhang Rui  *
362c1a220e7SZhang Rui  * Returns 0 if @work was already on a queue, non-zero otherwise.
363c1a220e7SZhang Rui  *
364c1a220e7SZhang Rui  * We queue the work to a specific CPU, the caller must ensure it
365c1a220e7SZhang Rui  * can't go away.
366c1a220e7SZhang Rui  */
367c1a220e7SZhang Rui int
368c1a220e7SZhang Rui queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
369c1a220e7SZhang Rui {
370c1a220e7SZhang Rui 	int ret = 0;
371c1a220e7SZhang Rui 
37222df02bbSTejun Heo 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
3734690c4abSTejun Heo 		__queue_work(cpu, wq, work);
374c1a220e7SZhang Rui 		ret = 1;
375c1a220e7SZhang Rui 	}
376c1a220e7SZhang Rui 	return ret;
377c1a220e7SZhang Rui }
378c1a220e7SZhang Rui EXPORT_SYMBOL_GPL(queue_work_on);
379c1a220e7SZhang Rui 
3806d141c3fSLi Zefan static void delayed_work_timer_fn(unsigned long __data)
3811da177e4SLinus Torvalds {
38252bad64dSDavid Howells 	struct delayed_work *dwork = (struct delayed_work *)__data;
383ed7c0feeSOleg Nesterov 	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
3841da177e4SLinus Torvalds 
3854690c4abSTejun Heo 	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
3861da177e4SLinus Torvalds }
3871da177e4SLinus Torvalds 
3880fcb78c2SRolf Eike Beer /**
3890fcb78c2SRolf Eike Beer  * queue_delayed_work - queue work on a workqueue after delay
3900fcb78c2SRolf Eike Beer  * @wq: workqueue to use
391af9997e4SRandy Dunlap  * @dwork: delayable work to queue
3920fcb78c2SRolf Eike Beer  * @delay: number of jiffies to wait before queueing
3930fcb78c2SRolf Eike Beer  *
394057647fcSAlan Stern  * Returns 0 if @work was already on a queue, non-zero otherwise.
3950fcb78c2SRolf Eike Beer  */
3967ad5b3a5SHarvey Harrison int queue_delayed_work(struct workqueue_struct *wq,
39752bad64dSDavid Howells 			struct delayed_work *dwork, unsigned long delay)
3981da177e4SLinus Torvalds {
39952bad64dSDavid Howells 	if (delay == 0)
40063bc0362SOleg Nesterov 		return queue_work(wq, &dwork->work);
4011da177e4SLinus Torvalds 
40263bc0362SOleg Nesterov 	return queue_delayed_work_on(-1, wq, dwork, delay);
4031da177e4SLinus Torvalds }
404ae90dd5dSDave Jones EXPORT_SYMBOL_GPL(queue_delayed_work);
4051da177e4SLinus Torvalds 
4060fcb78c2SRolf Eike Beer /**
4070fcb78c2SRolf Eike Beer  * queue_delayed_work_on - queue work on specific CPU after delay
4080fcb78c2SRolf Eike Beer  * @cpu: CPU number to execute work on
4090fcb78c2SRolf Eike Beer  * @wq: workqueue to use
410af9997e4SRandy Dunlap  * @dwork: work to queue
4110fcb78c2SRolf Eike Beer  * @delay: number of jiffies to wait before queueing
4120fcb78c2SRolf Eike Beer  *
413057647fcSAlan Stern  * Returns 0 if @work was already on a queue, non-zero otherwise.
4140fcb78c2SRolf Eike Beer  */
4157a6bc1cdSVenkatesh Pallipadi int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
41652bad64dSDavid Howells 			struct delayed_work *dwork, unsigned long delay)
4177a6bc1cdSVenkatesh Pallipadi {
4187a6bc1cdSVenkatesh Pallipadi 	int ret = 0;
41952bad64dSDavid Howells 	struct timer_list *timer = &dwork->timer;
42052bad64dSDavid Howells 	struct work_struct *work = &dwork->work;
4217a6bc1cdSVenkatesh Pallipadi 
42222df02bbSTejun Heo 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
4237a6bc1cdSVenkatesh Pallipadi 		BUG_ON(timer_pending(timer));
4247a6bc1cdSVenkatesh Pallipadi 		BUG_ON(!list_empty(&work->entry));
4257a6bc1cdSVenkatesh Pallipadi 
4268a3e77ccSAndrew Liu 		timer_stats_timer_set_start_info(&dwork->timer);
4278a3e77ccSAndrew Liu 
428ed7c0feeSOleg Nesterov 		/* This stores cwq for the moment, for the timer_fn */
4291537663fSTejun Heo 		set_wq_data(work, target_cwq(raw_smp_processor_id(), wq), 0);
4307a6bc1cdSVenkatesh Pallipadi 		timer->expires = jiffies + delay;
43152bad64dSDavid Howells 		timer->data = (unsigned long)dwork;
4327a6bc1cdSVenkatesh Pallipadi 		timer->function = delayed_work_timer_fn;
43363bc0362SOleg Nesterov 
43463bc0362SOleg Nesterov 		if (unlikely(cpu >= 0))
4357a6bc1cdSVenkatesh Pallipadi 			add_timer_on(timer, cpu);
43663bc0362SOleg Nesterov 		else
43763bc0362SOleg Nesterov 			add_timer(timer);
4387a6bc1cdSVenkatesh Pallipadi 		ret = 1;
4397a6bc1cdSVenkatesh Pallipadi 	}
4407a6bc1cdSVenkatesh Pallipadi 	return ret;
4417a6bc1cdSVenkatesh Pallipadi }
442ae90dd5dSDave Jones EXPORT_SYMBOL_GPL(queue_delayed_work_on);
4431da177e4SLinus Torvalds 
444c34056a3STejun Heo static struct worker *alloc_worker(void)
445c34056a3STejun Heo {
446c34056a3STejun Heo 	struct worker *worker;
447c34056a3STejun Heo 
448c34056a3STejun Heo 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
449*affee4b2STejun Heo 	if (worker)
450*affee4b2STejun Heo 		INIT_LIST_HEAD(&worker->scheduled);
451c34056a3STejun Heo 	return worker;
452c34056a3STejun Heo }
453c34056a3STejun Heo 
454c34056a3STejun Heo /**
455c34056a3STejun Heo  * create_worker - create a new workqueue worker
456c34056a3STejun Heo  * @cwq: cwq the new worker will belong to
457c34056a3STejun Heo  * @bind: whether to set affinity to @cpu or not
458c34056a3STejun Heo  *
459c34056a3STejun Heo  * Create a new worker which is bound to @cwq.  The returned worker
460c34056a3STejun Heo  * can be started by calling start_worker() or destroyed using
461c34056a3STejun Heo  * destroy_worker().
462c34056a3STejun Heo  *
463c34056a3STejun Heo  * CONTEXT:
464c34056a3STejun Heo  * Might sleep.  Does GFP_KERNEL allocations.
465c34056a3STejun Heo  *
466c34056a3STejun Heo  * RETURNS:
467c34056a3STejun Heo  * Pointer to the newly created worker.
468c34056a3STejun Heo  */
469c34056a3STejun Heo static struct worker *create_worker(struct cpu_workqueue_struct *cwq, bool bind)
470c34056a3STejun Heo {
471c34056a3STejun Heo 	int id = -1;
472c34056a3STejun Heo 	struct worker *worker = NULL;
473c34056a3STejun Heo 
474c34056a3STejun Heo 	spin_lock(&workqueue_lock);
475c34056a3STejun Heo 	while (ida_get_new(&per_cpu(worker_ida, cwq->cpu), &id)) {
476c34056a3STejun Heo 		spin_unlock(&workqueue_lock);
477c34056a3STejun Heo 		if (!ida_pre_get(&per_cpu(worker_ida, cwq->cpu), GFP_KERNEL))
478c34056a3STejun Heo 			goto fail;
479c34056a3STejun Heo 		spin_lock(&workqueue_lock);
480c34056a3STejun Heo 	}
481c34056a3STejun Heo 	spin_unlock(&workqueue_lock);
482c34056a3STejun Heo 
483c34056a3STejun Heo 	worker = alloc_worker();
484c34056a3STejun Heo 	if (!worker)
485c34056a3STejun Heo 		goto fail;
486c34056a3STejun Heo 
487c34056a3STejun Heo 	worker->cwq = cwq;
488c34056a3STejun Heo 	worker->id = id;
489c34056a3STejun Heo 
490c34056a3STejun Heo 	worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
491c34056a3STejun Heo 				      cwq->cpu, id);
492c34056a3STejun Heo 	if (IS_ERR(worker->task))
493c34056a3STejun Heo 		goto fail;
494c34056a3STejun Heo 
495c34056a3STejun Heo 	if (bind)
496c34056a3STejun Heo 		kthread_bind(worker->task, cwq->cpu);
497c34056a3STejun Heo 
498c34056a3STejun Heo 	return worker;
499c34056a3STejun Heo fail:
500c34056a3STejun Heo 	if (id >= 0) {
501c34056a3STejun Heo 		spin_lock(&workqueue_lock);
502c34056a3STejun Heo 		ida_remove(&per_cpu(worker_ida, cwq->cpu), id);
503c34056a3STejun Heo 		spin_unlock(&workqueue_lock);
504c34056a3STejun Heo 	}
505c34056a3STejun Heo 	kfree(worker);
506c34056a3STejun Heo 	return NULL;
507c34056a3STejun Heo }
508c34056a3STejun Heo 
509c34056a3STejun Heo /**
510c34056a3STejun Heo  * start_worker - start a newly created worker
511c34056a3STejun Heo  * @worker: worker to start
512c34056a3STejun Heo  *
513c34056a3STejun Heo  * Start @worker.
514c34056a3STejun Heo  *
515c34056a3STejun Heo  * CONTEXT:
516c34056a3STejun Heo  * spin_lock_irq(cwq->lock).
517c34056a3STejun Heo  */
518c34056a3STejun Heo static void start_worker(struct worker *worker)
519c34056a3STejun Heo {
520c34056a3STejun Heo 	wake_up_process(worker->task);
521c34056a3STejun Heo }
522c34056a3STejun Heo 
523c34056a3STejun Heo /**
524c34056a3STejun Heo  * destroy_worker - destroy a workqueue worker
525c34056a3STejun Heo  * @worker: worker to be destroyed
526c34056a3STejun Heo  *
527c34056a3STejun Heo  * Destroy @worker.
528c34056a3STejun Heo  */
529c34056a3STejun Heo static void destroy_worker(struct worker *worker)
530c34056a3STejun Heo {
531c34056a3STejun Heo 	int cpu = worker->cwq->cpu;
532c34056a3STejun Heo 	int id = worker->id;
533c34056a3STejun Heo 
534c34056a3STejun Heo 	/* sanity check frenzy */
535c34056a3STejun Heo 	BUG_ON(worker->current_work);
536*affee4b2STejun Heo 	BUG_ON(!list_empty(&worker->scheduled));
537c34056a3STejun Heo 
538c34056a3STejun Heo 	kthread_stop(worker->task);
539c34056a3STejun Heo 	kfree(worker);
540c34056a3STejun Heo 
541c34056a3STejun Heo 	spin_lock(&workqueue_lock);
542c34056a3STejun Heo 	ida_remove(&per_cpu(worker_ida, cpu), id);
543c34056a3STejun Heo 	spin_unlock(&workqueue_lock);
544c34056a3STejun Heo }
545c34056a3STejun Heo 
546a62428c0STejun Heo /**
547*affee4b2STejun Heo  * move_linked_works - move linked works to a list
548*affee4b2STejun Heo  * @work: start of series of works to be scheduled
549*affee4b2STejun Heo  * @head: target list to append @work to
550*affee4b2STejun Heo  * @nextp: out paramter for nested worklist walking
551*affee4b2STejun Heo  *
552*affee4b2STejun Heo  * Schedule linked works starting from @work to @head.  Work series to
553*affee4b2STejun Heo  * be scheduled starts at @work and includes any consecutive work with
554*affee4b2STejun Heo  * WORK_STRUCT_LINKED set in its predecessor.
555*affee4b2STejun Heo  *
556*affee4b2STejun Heo  * If @nextp is not NULL, it's updated to point to the next work of
557*affee4b2STejun Heo  * the last scheduled work.  This allows move_linked_works() to be
558*affee4b2STejun Heo  * nested inside outer list_for_each_entry_safe().
559*affee4b2STejun Heo  *
560*affee4b2STejun Heo  * CONTEXT:
561*affee4b2STejun Heo  * spin_lock_irq(cwq->lock).
562*affee4b2STejun Heo  */
563*affee4b2STejun Heo static void move_linked_works(struct work_struct *work, struct list_head *head,
564*affee4b2STejun Heo 			      struct work_struct **nextp)
565*affee4b2STejun Heo {
566*affee4b2STejun Heo 	struct work_struct *n;
567*affee4b2STejun Heo 
568*affee4b2STejun Heo 	/*
569*affee4b2STejun Heo 	 * Linked worklist will always end before the end of the list,
570*affee4b2STejun Heo 	 * use NULL for list head.
571*affee4b2STejun Heo 	 */
572*affee4b2STejun Heo 	list_for_each_entry_safe_from(work, n, NULL, entry) {
573*affee4b2STejun Heo 		list_move_tail(&work->entry, head);
574*affee4b2STejun Heo 		if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
575*affee4b2STejun Heo 			break;
576*affee4b2STejun Heo 	}
577*affee4b2STejun Heo 
578*affee4b2STejun Heo 	/*
579*affee4b2STejun Heo 	 * If we're already inside safe list traversal and have moved
580*affee4b2STejun Heo 	 * multiple works to the scheduled queue, the next position
581*affee4b2STejun Heo 	 * needs to be updated.
582*affee4b2STejun Heo 	 */
583*affee4b2STejun Heo 	if (nextp)
584*affee4b2STejun Heo 		*nextp = n;
585*affee4b2STejun Heo }
586*affee4b2STejun Heo 
587*affee4b2STejun Heo /**
58873f53c4aSTejun Heo  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
58973f53c4aSTejun Heo  * @cwq: cwq of interest
59073f53c4aSTejun Heo  * @color: color of work which left the queue
59173f53c4aSTejun Heo  *
59273f53c4aSTejun Heo  * A work either has completed or is removed from pending queue,
59373f53c4aSTejun Heo  * decrement nr_in_flight of its cwq and handle workqueue flushing.
59473f53c4aSTejun Heo  *
59573f53c4aSTejun Heo  * CONTEXT:
59673f53c4aSTejun Heo  * spin_lock_irq(cwq->lock).
59773f53c4aSTejun Heo  */
59873f53c4aSTejun Heo static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
59973f53c4aSTejun Heo {
60073f53c4aSTejun Heo 	/* ignore uncolored works */
60173f53c4aSTejun Heo 	if (color == WORK_NO_COLOR)
60273f53c4aSTejun Heo 		return;
60373f53c4aSTejun Heo 
60473f53c4aSTejun Heo 	cwq->nr_in_flight[color]--;
60573f53c4aSTejun Heo 
60673f53c4aSTejun Heo 	/* is flush in progress and are we at the flushing tip? */
60773f53c4aSTejun Heo 	if (likely(cwq->flush_color != color))
60873f53c4aSTejun Heo 		return;
60973f53c4aSTejun Heo 
61073f53c4aSTejun Heo 	/* are there still in-flight works? */
61173f53c4aSTejun Heo 	if (cwq->nr_in_flight[color])
61273f53c4aSTejun Heo 		return;
61373f53c4aSTejun Heo 
61473f53c4aSTejun Heo 	/* this cwq is done, clear flush_color */
61573f53c4aSTejun Heo 	cwq->flush_color = -1;
61673f53c4aSTejun Heo 
61773f53c4aSTejun Heo 	/*
61873f53c4aSTejun Heo 	 * If this was the last cwq, wake up the first flusher.  It
61973f53c4aSTejun Heo 	 * will handle the rest.
62073f53c4aSTejun Heo 	 */
62173f53c4aSTejun Heo 	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
62273f53c4aSTejun Heo 		complete(&cwq->wq->first_flusher->done);
62373f53c4aSTejun Heo }
62473f53c4aSTejun Heo 
62573f53c4aSTejun Heo /**
626a62428c0STejun Heo  * process_one_work - process single work
627c34056a3STejun Heo  * @worker: self
628a62428c0STejun Heo  * @work: work to process
629a62428c0STejun Heo  *
630a62428c0STejun Heo  * Process @work.  This function contains all the logics necessary to
631a62428c0STejun Heo  * process a single work including synchronization against and
632a62428c0STejun Heo  * interaction with other workers on the same cpu, queueing and
633a62428c0STejun Heo  * flushing.  As long as context requirement is met, any worker can
634a62428c0STejun Heo  * call this function to process a work.
635a62428c0STejun Heo  *
636a62428c0STejun Heo  * CONTEXT:
637a62428c0STejun Heo  * spin_lock_irq(cwq->lock) which is released and regrabbed.
638a62428c0STejun Heo  */
639c34056a3STejun Heo static void process_one_work(struct worker *worker, struct work_struct *work)
6401da177e4SLinus Torvalds {
641c34056a3STejun Heo 	struct cpu_workqueue_struct *cwq = worker->cwq;
6426bb49e59SDavid Howells 	work_func_t f = work->func;
64373f53c4aSTejun Heo 	int work_color;
6444e6045f1SJohannes Berg #ifdef CONFIG_LOCKDEP
6454e6045f1SJohannes Berg 	/*
646a62428c0STejun Heo 	 * It is permissible to free the struct work_struct from
647a62428c0STejun Heo 	 * inside the function that is called from it, this we need to
648a62428c0STejun Heo 	 * take into account for lockdep too.  To avoid bogus "held
649a62428c0STejun Heo 	 * lock freed" warnings as well as problems when looking into
650a62428c0STejun Heo 	 * work->lockdep_map, make a copy and use that here.
6514e6045f1SJohannes Berg 	 */
6524e6045f1SJohannes Berg 	struct lockdep_map lockdep_map = work->lockdep_map;
6534e6045f1SJohannes Berg #endif
654a62428c0STejun Heo 	/* claim and process */
655dc186ad7SThomas Gleixner 	debug_work_deactivate(work);
656c34056a3STejun Heo 	worker->current_work = work;
65773f53c4aSTejun Heo 	work_color = get_work_color(work);
658a62428c0STejun Heo 	list_del_init(&work->entry);
659a62428c0STejun Heo 
660f293ea92SOleg Nesterov 	spin_unlock_irq(&cwq->lock);
6611da177e4SLinus Torvalds 
662365970a1SDavid Howells 	BUG_ON(get_wq_data(work) != cwq);
66323b2e599SOleg Nesterov 	work_clear_pending(work);
6643295f0efSIngo Molnar 	lock_map_acquire(&cwq->wq->lockdep_map);
6653295f0efSIngo Molnar 	lock_map_acquire(&lockdep_map);
66665f27f38SDavid Howells 	f(work);
6673295f0efSIngo Molnar 	lock_map_release(&lockdep_map);
6683295f0efSIngo Molnar 	lock_map_release(&cwq->wq->lockdep_map);
6691da177e4SLinus Torvalds 
670d5abe669SPeter Zijlstra 	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
671d5abe669SPeter Zijlstra 		printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
672d5abe669SPeter Zijlstra 		       "%s/0x%08x/%d\n",
673a62428c0STejun Heo 		       current->comm, preempt_count(), task_pid_nr(current));
674d5abe669SPeter Zijlstra 		printk(KERN_ERR "    last function: ");
675d5abe669SPeter Zijlstra 		print_symbol("%s\n", (unsigned long)f);
676d5abe669SPeter Zijlstra 		debug_show_held_locks(current);
677d5abe669SPeter Zijlstra 		dump_stack();
678d5abe669SPeter Zijlstra 	}
679d5abe669SPeter Zijlstra 
680f293ea92SOleg Nesterov 	spin_lock_irq(&cwq->lock);
681a62428c0STejun Heo 
682a62428c0STejun Heo 	/* we're done with it, release */
683c34056a3STejun Heo 	worker->current_work = NULL;
68473f53c4aSTejun Heo 	cwq_dec_nr_in_flight(cwq, work_color);
6851da177e4SLinus Torvalds }
686a62428c0STejun Heo 
687*affee4b2STejun Heo /**
688*affee4b2STejun Heo  * process_scheduled_works - process scheduled works
689*affee4b2STejun Heo  * @worker: self
690*affee4b2STejun Heo  *
691*affee4b2STejun Heo  * Process all scheduled works.  Please note that the scheduled list
692*affee4b2STejun Heo  * may change while processing a work, so this function repeatedly
693*affee4b2STejun Heo  * fetches a work from the top and executes it.
694*affee4b2STejun Heo  *
695*affee4b2STejun Heo  * CONTEXT:
696*affee4b2STejun Heo  * spin_lock_irq(cwq->lock) which may be released and regrabbed
697*affee4b2STejun Heo  * multiple times.
698*affee4b2STejun Heo  */
699*affee4b2STejun Heo static void process_scheduled_works(struct worker *worker)
700a62428c0STejun Heo {
701*affee4b2STejun Heo 	while (!list_empty(&worker->scheduled)) {
702*affee4b2STejun Heo 		struct work_struct *work = list_first_entry(&worker->scheduled,
703a62428c0STejun Heo 						struct work_struct, entry);
704c34056a3STejun Heo 		process_one_work(worker, work);
705a62428c0STejun Heo 	}
7061da177e4SLinus Torvalds }
7071da177e4SLinus Torvalds 
7084690c4abSTejun Heo /**
7094690c4abSTejun Heo  * worker_thread - the worker thread function
710c34056a3STejun Heo  * @__worker: self
7114690c4abSTejun Heo  *
7124690c4abSTejun Heo  * The cwq worker thread function.
7134690c4abSTejun Heo  */
714c34056a3STejun Heo static int worker_thread(void *__worker)
7151da177e4SLinus Torvalds {
716c34056a3STejun Heo 	struct worker *worker = __worker;
717c34056a3STejun Heo 	struct cpu_workqueue_struct *cwq = worker->cwq;
7183af24433SOleg Nesterov 	DEFINE_WAIT(wait);
7191da177e4SLinus Torvalds 
72097e37d7bSTejun Heo 	if (cwq->wq->flags & WQ_FREEZEABLE)
72183144186SRafael J. Wysocki 		set_freezable();
7221da177e4SLinus Torvalds 
7233af24433SOleg Nesterov 	for (;;) {
7243af24433SOleg Nesterov 		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
72514441960SOleg Nesterov 		if (!freezing(current) &&
72614441960SOleg Nesterov 		    !kthread_should_stop() &&
72714441960SOleg Nesterov 		    list_empty(&cwq->worklist))
7281da177e4SLinus Torvalds 			schedule();
7293af24433SOleg Nesterov 		finish_wait(&cwq->more_work, &wait);
7301da177e4SLinus Torvalds 
73185f4186aSOleg Nesterov 		try_to_freeze();
73285f4186aSOleg Nesterov 
73314441960SOleg Nesterov 		if (kthread_should_stop())
7343af24433SOleg Nesterov 			break;
7353af24433SOleg Nesterov 
736c34056a3STejun Heo 		if (unlikely(!cpumask_equal(&worker->task->cpus_allowed,
7371537663fSTejun Heo 					    get_cpu_mask(cwq->cpu))))
738c34056a3STejun Heo 			set_cpus_allowed_ptr(worker->task,
7391537663fSTejun Heo 					     get_cpu_mask(cwq->cpu));
740*affee4b2STejun Heo 
741*affee4b2STejun Heo 		spin_lock_irq(&cwq->lock);
742*affee4b2STejun Heo 
743*affee4b2STejun Heo 		while (!list_empty(&cwq->worklist)) {
744*affee4b2STejun Heo 			struct work_struct *work =
745*affee4b2STejun Heo 				list_first_entry(&cwq->worklist,
746*affee4b2STejun Heo 						 struct work_struct, entry);
747*affee4b2STejun Heo 
748*affee4b2STejun Heo 			if (likely(!(*work_data_bits(work) &
749*affee4b2STejun Heo 				     WORK_STRUCT_LINKED))) {
750*affee4b2STejun Heo 				/* optimization path, not strictly necessary */
751*affee4b2STejun Heo 				process_one_work(worker, work);
752*affee4b2STejun Heo 				if (unlikely(!list_empty(&worker->scheduled)))
753*affee4b2STejun Heo 					process_scheduled_works(worker);
754*affee4b2STejun Heo 			} else {
755*affee4b2STejun Heo 				move_linked_works(work, &worker->scheduled,
756*affee4b2STejun Heo 						  NULL);
757*affee4b2STejun Heo 				process_scheduled_works(worker);
758*affee4b2STejun Heo 			}
759*affee4b2STejun Heo 		}
760*affee4b2STejun Heo 
761*affee4b2STejun Heo 		spin_unlock_irq(&cwq->lock);
7621da177e4SLinus Torvalds 	}
7633af24433SOleg Nesterov 
7641da177e4SLinus Torvalds 	return 0;
7651da177e4SLinus Torvalds }
7661da177e4SLinus Torvalds 
767fc2e4d70SOleg Nesterov struct wq_barrier {
768fc2e4d70SOleg Nesterov 	struct work_struct	work;
769fc2e4d70SOleg Nesterov 	struct completion	done;
770fc2e4d70SOleg Nesterov };
771fc2e4d70SOleg Nesterov 
772fc2e4d70SOleg Nesterov static void wq_barrier_func(struct work_struct *work)
773fc2e4d70SOleg Nesterov {
774fc2e4d70SOleg Nesterov 	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
775fc2e4d70SOleg Nesterov 	complete(&barr->done);
776fc2e4d70SOleg Nesterov }
777fc2e4d70SOleg Nesterov 
7784690c4abSTejun Heo /**
7794690c4abSTejun Heo  * insert_wq_barrier - insert a barrier work
7804690c4abSTejun Heo  * @cwq: cwq to insert barrier into
7814690c4abSTejun Heo  * @barr: wq_barrier to insert
782*affee4b2STejun Heo  * @target: target work to attach @barr to
783*affee4b2STejun Heo  * @worker: worker currently executing @target, NULL if @target is not executing
7844690c4abSTejun Heo  *
785*affee4b2STejun Heo  * @barr is linked to @target such that @barr is completed only after
786*affee4b2STejun Heo  * @target finishes execution.  Please note that the ordering
787*affee4b2STejun Heo  * guarantee is observed only with respect to @target and on the local
788*affee4b2STejun Heo  * cpu.
789*affee4b2STejun Heo  *
790*affee4b2STejun Heo  * Currently, a queued barrier can't be canceled.  This is because
791*affee4b2STejun Heo  * try_to_grab_pending() can't determine whether the work to be
792*affee4b2STejun Heo  * grabbed is at the head of the queue and thus can't clear LINKED
793*affee4b2STejun Heo  * flag of the previous work while there must be a valid next work
794*affee4b2STejun Heo  * after a work with LINKED flag set.
795*affee4b2STejun Heo  *
796*affee4b2STejun Heo  * Note that when @worker is non-NULL, @target may be modified
797*affee4b2STejun Heo  * underneath us, so we can't reliably determine cwq from @target.
7984690c4abSTejun Heo  *
7994690c4abSTejun Heo  * CONTEXT:
8004690c4abSTejun Heo  * spin_lock_irq(cwq->lock).
8014690c4abSTejun Heo  */
80283c22520SOleg Nesterov static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
803*affee4b2STejun Heo 			      struct wq_barrier *barr,
804*affee4b2STejun Heo 			      struct work_struct *target, struct worker *worker)
805fc2e4d70SOleg Nesterov {
806*affee4b2STejun Heo 	struct list_head *head;
807*affee4b2STejun Heo 	unsigned int linked = 0;
808*affee4b2STejun Heo 
809dc186ad7SThomas Gleixner 	/*
810dc186ad7SThomas Gleixner 	 * debugobject calls are safe here even with cwq->lock locked
811dc186ad7SThomas Gleixner 	 * as we know for sure that this will not trigger any of the
812dc186ad7SThomas Gleixner 	 * checks and call back into the fixup functions where we
813dc186ad7SThomas Gleixner 	 * might deadlock.
814dc186ad7SThomas Gleixner 	 */
815dc186ad7SThomas Gleixner 	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
81622df02bbSTejun Heo 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
817fc2e4d70SOleg Nesterov 	init_completion(&barr->done);
81883c22520SOleg Nesterov 
819*affee4b2STejun Heo 	/*
820*affee4b2STejun Heo 	 * If @target is currently being executed, schedule the
821*affee4b2STejun Heo 	 * barrier to the worker; otherwise, put it after @target.
822*affee4b2STejun Heo 	 */
823*affee4b2STejun Heo 	if (worker)
824*affee4b2STejun Heo 		head = worker->scheduled.next;
825*affee4b2STejun Heo 	else {
826*affee4b2STejun Heo 		unsigned long *bits = work_data_bits(target);
827*affee4b2STejun Heo 
828*affee4b2STejun Heo 		head = target->entry.next;
829*affee4b2STejun Heo 		/* there can already be other linked works, inherit and set */
830*affee4b2STejun Heo 		linked = *bits & WORK_STRUCT_LINKED;
831*affee4b2STejun Heo 		__set_bit(WORK_STRUCT_LINKED_BIT, bits);
832*affee4b2STejun Heo 	}
833*affee4b2STejun Heo 
834dc186ad7SThomas Gleixner 	debug_work_activate(&barr->work);
835*affee4b2STejun Heo 	insert_work(cwq, &barr->work, head,
836*affee4b2STejun Heo 		    work_color_to_flags(WORK_NO_COLOR) | linked);
837fc2e4d70SOleg Nesterov }
838fc2e4d70SOleg Nesterov 
83973f53c4aSTejun Heo /**
84073f53c4aSTejun Heo  * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
84173f53c4aSTejun Heo  * @wq: workqueue being flushed
84273f53c4aSTejun Heo  * @flush_color: new flush color, < 0 for no-op
84373f53c4aSTejun Heo  * @work_color: new work color, < 0 for no-op
84473f53c4aSTejun Heo  *
84573f53c4aSTejun Heo  * Prepare cwqs for workqueue flushing.
84673f53c4aSTejun Heo  *
84773f53c4aSTejun Heo  * If @flush_color is non-negative, flush_color on all cwqs should be
84873f53c4aSTejun Heo  * -1.  If no cwq has in-flight commands at the specified color, all
84973f53c4aSTejun Heo  * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
85073f53c4aSTejun Heo  * has in flight commands, its cwq->flush_color is set to
85173f53c4aSTejun Heo  * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
85273f53c4aSTejun Heo  * wakeup logic is armed and %true is returned.
85373f53c4aSTejun Heo  *
85473f53c4aSTejun Heo  * The caller should have initialized @wq->first_flusher prior to
85573f53c4aSTejun Heo  * calling this function with non-negative @flush_color.  If
85673f53c4aSTejun Heo  * @flush_color is negative, no flush color update is done and %false
85773f53c4aSTejun Heo  * is returned.
85873f53c4aSTejun Heo  *
85973f53c4aSTejun Heo  * If @work_color is non-negative, all cwqs should have the same
86073f53c4aSTejun Heo  * work_color which is previous to @work_color and all will be
86173f53c4aSTejun Heo  * advanced to @work_color.
86273f53c4aSTejun Heo  *
86373f53c4aSTejun Heo  * CONTEXT:
86473f53c4aSTejun Heo  * mutex_lock(wq->flush_mutex).
86573f53c4aSTejun Heo  *
86673f53c4aSTejun Heo  * RETURNS:
86773f53c4aSTejun Heo  * %true if @flush_color >= 0 and there's something to flush.  %false
86873f53c4aSTejun Heo  * otherwise.
86973f53c4aSTejun Heo  */
87073f53c4aSTejun Heo static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
87173f53c4aSTejun Heo 				      int flush_color, int work_color)
8721da177e4SLinus Torvalds {
87373f53c4aSTejun Heo 	bool wait = false;
87473f53c4aSTejun Heo 	unsigned int cpu;
8751da177e4SLinus Torvalds 
87673f53c4aSTejun Heo 	if (flush_color >= 0) {
87773f53c4aSTejun Heo 		BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
87873f53c4aSTejun Heo 		atomic_set(&wq->nr_cwqs_to_flush, 1);
87973f53c4aSTejun Heo 	}
88073f53c4aSTejun Heo 
88173f53c4aSTejun Heo 	for_each_possible_cpu(cpu) {
88273f53c4aSTejun Heo 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
8832355b70fSLai Jiangshan 
88483c22520SOleg Nesterov 		spin_lock_irq(&cwq->lock);
88573f53c4aSTejun Heo 
88673f53c4aSTejun Heo 		if (flush_color >= 0) {
88773f53c4aSTejun Heo 			BUG_ON(cwq->flush_color != -1);
88873f53c4aSTejun Heo 
88973f53c4aSTejun Heo 			if (cwq->nr_in_flight[flush_color]) {
89073f53c4aSTejun Heo 				cwq->flush_color = flush_color;
89173f53c4aSTejun Heo 				atomic_inc(&wq->nr_cwqs_to_flush);
89273f53c4aSTejun Heo 				wait = true;
89383c22520SOleg Nesterov 			}
89473f53c4aSTejun Heo 		}
89573f53c4aSTejun Heo 
89673f53c4aSTejun Heo 		if (work_color >= 0) {
89773f53c4aSTejun Heo 			BUG_ON(work_color != work_next_color(cwq->work_color));
89873f53c4aSTejun Heo 			cwq->work_color = work_color;
89973f53c4aSTejun Heo 		}
90073f53c4aSTejun Heo 
90183c22520SOleg Nesterov 		spin_unlock_irq(&cwq->lock);
902dc186ad7SThomas Gleixner 	}
90314441960SOleg Nesterov 
90473f53c4aSTejun Heo 	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
90573f53c4aSTejun Heo 		complete(&wq->first_flusher->done);
90673f53c4aSTejun Heo 
90773f53c4aSTejun Heo 	return wait;
90883c22520SOleg Nesterov }
9091da177e4SLinus Torvalds 
9100fcb78c2SRolf Eike Beer /**
9111da177e4SLinus Torvalds  * flush_workqueue - ensure that any scheduled work has run to completion.
9120fcb78c2SRolf Eike Beer  * @wq: workqueue to flush
9131da177e4SLinus Torvalds  *
9141da177e4SLinus Torvalds  * Forces execution of the workqueue and blocks until its completion.
9151da177e4SLinus Torvalds  * This is typically used in driver shutdown handlers.
9161da177e4SLinus Torvalds  *
917fc2e4d70SOleg Nesterov  * We sleep until all works which were queued on entry have been handled,
918fc2e4d70SOleg Nesterov  * but we are not livelocked by new incoming ones.
9191da177e4SLinus Torvalds  */
9207ad5b3a5SHarvey Harrison void flush_workqueue(struct workqueue_struct *wq)
9211da177e4SLinus Torvalds {
92273f53c4aSTejun Heo 	struct wq_flusher this_flusher = {
92373f53c4aSTejun Heo 		.list = LIST_HEAD_INIT(this_flusher.list),
92473f53c4aSTejun Heo 		.flush_color = -1,
92573f53c4aSTejun Heo 		.done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
92673f53c4aSTejun Heo 	};
92773f53c4aSTejun Heo 	int next_color;
928b1f4ec17SOleg Nesterov 
9293295f0efSIngo Molnar 	lock_map_acquire(&wq->lockdep_map);
9303295f0efSIngo Molnar 	lock_map_release(&wq->lockdep_map);
93173f53c4aSTejun Heo 
93273f53c4aSTejun Heo 	mutex_lock(&wq->flush_mutex);
93373f53c4aSTejun Heo 
93473f53c4aSTejun Heo 	/*
93573f53c4aSTejun Heo 	 * Start-to-wait phase
93673f53c4aSTejun Heo 	 */
93773f53c4aSTejun Heo 	next_color = work_next_color(wq->work_color);
93873f53c4aSTejun Heo 
93973f53c4aSTejun Heo 	if (next_color != wq->flush_color) {
94073f53c4aSTejun Heo 		/*
94173f53c4aSTejun Heo 		 * Color space is not full.  The current work_color
94273f53c4aSTejun Heo 		 * becomes our flush_color and work_color is advanced
94373f53c4aSTejun Heo 		 * by one.
94473f53c4aSTejun Heo 		 */
94573f53c4aSTejun Heo 		BUG_ON(!list_empty(&wq->flusher_overflow));
94673f53c4aSTejun Heo 		this_flusher.flush_color = wq->work_color;
94773f53c4aSTejun Heo 		wq->work_color = next_color;
94873f53c4aSTejun Heo 
94973f53c4aSTejun Heo 		if (!wq->first_flusher) {
95073f53c4aSTejun Heo 			/* no flush in progress, become the first flusher */
95173f53c4aSTejun Heo 			BUG_ON(wq->flush_color != this_flusher.flush_color);
95273f53c4aSTejun Heo 
95373f53c4aSTejun Heo 			wq->first_flusher = &this_flusher;
95473f53c4aSTejun Heo 
95573f53c4aSTejun Heo 			if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
95673f53c4aSTejun Heo 						       wq->work_color)) {
95773f53c4aSTejun Heo 				/* nothing to flush, done */
95873f53c4aSTejun Heo 				wq->flush_color = next_color;
95973f53c4aSTejun Heo 				wq->first_flusher = NULL;
96073f53c4aSTejun Heo 				goto out_unlock;
96173f53c4aSTejun Heo 			}
96273f53c4aSTejun Heo 		} else {
96373f53c4aSTejun Heo 			/* wait in queue */
96473f53c4aSTejun Heo 			BUG_ON(wq->flush_color == this_flusher.flush_color);
96573f53c4aSTejun Heo 			list_add_tail(&this_flusher.list, &wq->flusher_queue);
96673f53c4aSTejun Heo 			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
96773f53c4aSTejun Heo 		}
96873f53c4aSTejun Heo 	} else {
96973f53c4aSTejun Heo 		/*
97073f53c4aSTejun Heo 		 * Oops, color space is full, wait on overflow queue.
97173f53c4aSTejun Heo 		 * The next flush completion will assign us
97273f53c4aSTejun Heo 		 * flush_color and transfer to flusher_queue.
97373f53c4aSTejun Heo 		 */
97473f53c4aSTejun Heo 		list_add_tail(&this_flusher.list, &wq->flusher_overflow);
97573f53c4aSTejun Heo 	}
97673f53c4aSTejun Heo 
97773f53c4aSTejun Heo 	mutex_unlock(&wq->flush_mutex);
97873f53c4aSTejun Heo 
97973f53c4aSTejun Heo 	wait_for_completion(&this_flusher.done);
98073f53c4aSTejun Heo 
98173f53c4aSTejun Heo 	/*
98273f53c4aSTejun Heo 	 * Wake-up-and-cascade phase
98373f53c4aSTejun Heo 	 *
98473f53c4aSTejun Heo 	 * First flushers are responsible for cascading flushes and
98573f53c4aSTejun Heo 	 * handling overflow.  Non-first flushers can simply return.
98673f53c4aSTejun Heo 	 */
98773f53c4aSTejun Heo 	if (wq->first_flusher != &this_flusher)
98873f53c4aSTejun Heo 		return;
98973f53c4aSTejun Heo 
99073f53c4aSTejun Heo 	mutex_lock(&wq->flush_mutex);
99173f53c4aSTejun Heo 
99273f53c4aSTejun Heo 	wq->first_flusher = NULL;
99373f53c4aSTejun Heo 
99473f53c4aSTejun Heo 	BUG_ON(!list_empty(&this_flusher.list));
99573f53c4aSTejun Heo 	BUG_ON(wq->flush_color != this_flusher.flush_color);
99673f53c4aSTejun Heo 
99773f53c4aSTejun Heo 	while (true) {
99873f53c4aSTejun Heo 		struct wq_flusher *next, *tmp;
99973f53c4aSTejun Heo 
100073f53c4aSTejun Heo 		/* complete all the flushers sharing the current flush color */
100173f53c4aSTejun Heo 		list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
100273f53c4aSTejun Heo 			if (next->flush_color != wq->flush_color)
100373f53c4aSTejun Heo 				break;
100473f53c4aSTejun Heo 			list_del_init(&next->list);
100573f53c4aSTejun Heo 			complete(&next->done);
100673f53c4aSTejun Heo 		}
100773f53c4aSTejun Heo 
100873f53c4aSTejun Heo 		BUG_ON(!list_empty(&wq->flusher_overflow) &&
100973f53c4aSTejun Heo 		       wq->flush_color != work_next_color(wq->work_color));
101073f53c4aSTejun Heo 
101173f53c4aSTejun Heo 		/* this flush_color is finished, advance by one */
101273f53c4aSTejun Heo 		wq->flush_color = work_next_color(wq->flush_color);
101373f53c4aSTejun Heo 
101473f53c4aSTejun Heo 		/* one color has been freed, handle overflow queue */
101573f53c4aSTejun Heo 		if (!list_empty(&wq->flusher_overflow)) {
101673f53c4aSTejun Heo 			/*
101773f53c4aSTejun Heo 			 * Assign the same color to all overflowed
101873f53c4aSTejun Heo 			 * flushers, advance work_color and append to
101973f53c4aSTejun Heo 			 * flusher_queue.  This is the start-to-wait
102073f53c4aSTejun Heo 			 * phase for these overflowed flushers.
102173f53c4aSTejun Heo 			 */
102273f53c4aSTejun Heo 			list_for_each_entry(tmp, &wq->flusher_overflow, list)
102373f53c4aSTejun Heo 				tmp->flush_color = wq->work_color;
102473f53c4aSTejun Heo 
102573f53c4aSTejun Heo 			wq->work_color = work_next_color(wq->work_color);
102673f53c4aSTejun Heo 
102773f53c4aSTejun Heo 			list_splice_tail_init(&wq->flusher_overflow,
102873f53c4aSTejun Heo 					      &wq->flusher_queue);
102973f53c4aSTejun Heo 			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
103073f53c4aSTejun Heo 		}
103173f53c4aSTejun Heo 
103273f53c4aSTejun Heo 		if (list_empty(&wq->flusher_queue)) {
103373f53c4aSTejun Heo 			BUG_ON(wq->flush_color != wq->work_color);
103473f53c4aSTejun Heo 			break;
103573f53c4aSTejun Heo 		}
103673f53c4aSTejun Heo 
103773f53c4aSTejun Heo 		/*
103873f53c4aSTejun Heo 		 * Need to flush more colors.  Make the next flusher
103973f53c4aSTejun Heo 		 * the new first flusher and arm cwqs.
104073f53c4aSTejun Heo 		 */
104173f53c4aSTejun Heo 		BUG_ON(wq->flush_color == wq->work_color);
104273f53c4aSTejun Heo 		BUG_ON(wq->flush_color != next->flush_color);
104373f53c4aSTejun Heo 
104473f53c4aSTejun Heo 		list_del_init(&next->list);
104573f53c4aSTejun Heo 		wq->first_flusher = next;
104673f53c4aSTejun Heo 
104773f53c4aSTejun Heo 		if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
104873f53c4aSTejun Heo 			break;
104973f53c4aSTejun Heo 
105073f53c4aSTejun Heo 		/*
105173f53c4aSTejun Heo 		 * Meh... this color is already done, clear first
105273f53c4aSTejun Heo 		 * flusher and repeat cascading.
105373f53c4aSTejun Heo 		 */
105473f53c4aSTejun Heo 		wq->first_flusher = NULL;
105573f53c4aSTejun Heo 	}
105673f53c4aSTejun Heo 
105773f53c4aSTejun Heo out_unlock:
105873f53c4aSTejun Heo 	mutex_unlock(&wq->flush_mutex);
10591da177e4SLinus Torvalds }
1060ae90dd5dSDave Jones EXPORT_SYMBOL_GPL(flush_workqueue);
10611da177e4SLinus Torvalds 
1062db700897SOleg Nesterov /**
1063db700897SOleg Nesterov  * flush_work - block until a work_struct's callback has terminated
1064db700897SOleg Nesterov  * @work: the work which is to be flushed
1065db700897SOleg Nesterov  *
1066a67da70dSOleg Nesterov  * Returns false if @work has already terminated.
1067a67da70dSOleg Nesterov  *
1068db700897SOleg Nesterov  * It is expected that, prior to calling flush_work(), the caller has
1069db700897SOleg Nesterov  * arranged for the work to not be requeued, otherwise it doesn't make
1070db700897SOleg Nesterov  * sense to use this function.
1071db700897SOleg Nesterov  */
1072db700897SOleg Nesterov int flush_work(struct work_struct *work)
1073db700897SOleg Nesterov {
1074*affee4b2STejun Heo 	struct worker *worker = NULL;
1075db700897SOleg Nesterov 	struct cpu_workqueue_struct *cwq;
1076db700897SOleg Nesterov 	struct wq_barrier barr;
1077db700897SOleg Nesterov 
1078db700897SOleg Nesterov 	might_sleep();
1079db700897SOleg Nesterov 	cwq = get_wq_data(work);
1080db700897SOleg Nesterov 	if (!cwq)
1081db700897SOleg Nesterov 		return 0;
1082db700897SOleg Nesterov 
10833295f0efSIngo Molnar 	lock_map_acquire(&cwq->wq->lockdep_map);
10843295f0efSIngo Molnar 	lock_map_release(&cwq->wq->lockdep_map);
1085a67da70dSOleg Nesterov 
1086db700897SOleg Nesterov 	spin_lock_irq(&cwq->lock);
1087db700897SOleg Nesterov 	if (!list_empty(&work->entry)) {
1088db700897SOleg Nesterov 		/*
1089db700897SOleg Nesterov 		 * See the comment near try_to_grab_pending()->smp_rmb().
1090db700897SOleg Nesterov 		 * If it was re-queued under us we are not going to wait.
1091db700897SOleg Nesterov 		 */
1092db700897SOleg Nesterov 		smp_rmb();
1093db700897SOleg Nesterov 		if (unlikely(cwq != get_wq_data(work)))
10944690c4abSTejun Heo 			goto already_gone;
1095db700897SOleg Nesterov 	} else {
1096*affee4b2STejun Heo 		if (cwq->worker && cwq->worker->current_work == work)
1097*affee4b2STejun Heo 			worker = cwq->worker;
1098*affee4b2STejun Heo 		if (!worker)
10994690c4abSTejun Heo 			goto already_gone;
1100db700897SOleg Nesterov 	}
1101db700897SOleg Nesterov 
1102*affee4b2STejun Heo 	insert_wq_barrier(cwq, &barr, work, worker);
11034690c4abSTejun Heo 	spin_unlock_irq(&cwq->lock);
1104db700897SOleg Nesterov 	wait_for_completion(&barr.done);
1105dc186ad7SThomas Gleixner 	destroy_work_on_stack(&barr.work);
1106db700897SOleg Nesterov 	return 1;
11074690c4abSTejun Heo already_gone:
11084690c4abSTejun Heo 	spin_unlock_irq(&cwq->lock);
11094690c4abSTejun Heo 	return 0;
1110db700897SOleg Nesterov }
1111db700897SOleg Nesterov EXPORT_SYMBOL_GPL(flush_work);
1112db700897SOleg Nesterov 
11136e84d644SOleg Nesterov /*
11141f1f642eSOleg Nesterov  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
11156e84d644SOleg Nesterov  * so this work can't be re-armed in any way.
11166e84d644SOleg Nesterov  */
11176e84d644SOleg Nesterov static int try_to_grab_pending(struct work_struct *work)
11186e84d644SOleg Nesterov {
11196e84d644SOleg Nesterov 	struct cpu_workqueue_struct *cwq;
11201f1f642eSOleg Nesterov 	int ret = -1;
11216e84d644SOleg Nesterov 
112222df02bbSTejun Heo 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
11231f1f642eSOleg Nesterov 		return 0;
11246e84d644SOleg Nesterov 
11256e84d644SOleg Nesterov 	/*
11266e84d644SOleg Nesterov 	 * The queueing is in progress, or it is already queued. Try to
11276e84d644SOleg Nesterov 	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
11286e84d644SOleg Nesterov 	 */
11296e84d644SOleg Nesterov 
11306e84d644SOleg Nesterov 	cwq = get_wq_data(work);
11316e84d644SOleg Nesterov 	if (!cwq)
11326e84d644SOleg Nesterov 		return ret;
11336e84d644SOleg Nesterov 
11346e84d644SOleg Nesterov 	spin_lock_irq(&cwq->lock);
11356e84d644SOleg Nesterov 	if (!list_empty(&work->entry)) {
11366e84d644SOleg Nesterov 		/*
11376e84d644SOleg Nesterov 		 * This work is queued, but perhaps we locked the wrong cwq.
11386e84d644SOleg Nesterov 		 * In that case we must see the new value after rmb(), see
11396e84d644SOleg Nesterov 		 * insert_work()->wmb().
11406e84d644SOleg Nesterov 		 */
11416e84d644SOleg Nesterov 		smp_rmb();
11426e84d644SOleg Nesterov 		if (cwq == get_wq_data(work)) {
1143dc186ad7SThomas Gleixner 			debug_work_deactivate(work);
11446e84d644SOleg Nesterov 			list_del_init(&work->entry);
114573f53c4aSTejun Heo 			cwq_dec_nr_in_flight(cwq, get_work_color(work));
11466e84d644SOleg Nesterov 			ret = 1;
11476e84d644SOleg Nesterov 		}
11486e84d644SOleg Nesterov 	}
11496e84d644SOleg Nesterov 	spin_unlock_irq(&cwq->lock);
11506e84d644SOleg Nesterov 
11516e84d644SOleg Nesterov 	return ret;
11526e84d644SOleg Nesterov }
11536e84d644SOleg Nesterov 
11546e84d644SOleg Nesterov static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
1155b89deed3SOleg Nesterov 				struct work_struct *work)
1156b89deed3SOleg Nesterov {
1157b89deed3SOleg Nesterov 	struct wq_barrier barr;
1158*affee4b2STejun Heo 	struct worker *worker;
1159b89deed3SOleg Nesterov 
1160b89deed3SOleg Nesterov 	spin_lock_irq(&cwq->lock);
1161*affee4b2STejun Heo 
1162*affee4b2STejun Heo 	worker = NULL;
1163c34056a3STejun Heo 	if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
1164*affee4b2STejun Heo 		worker = cwq->worker;
1165*affee4b2STejun Heo 		insert_wq_barrier(cwq, &barr, work, worker);
1166b89deed3SOleg Nesterov 	}
1167*affee4b2STejun Heo 
1168b89deed3SOleg Nesterov 	spin_unlock_irq(&cwq->lock);
1169b89deed3SOleg Nesterov 
1170*affee4b2STejun Heo 	if (unlikely(worker)) {
1171b89deed3SOleg Nesterov 		wait_for_completion(&barr.done);
1172dc186ad7SThomas Gleixner 		destroy_work_on_stack(&barr.work);
1173dc186ad7SThomas Gleixner 	}
1174b89deed3SOleg Nesterov }
1175b89deed3SOleg Nesterov 
11766e84d644SOleg Nesterov static void wait_on_work(struct work_struct *work)
1177b89deed3SOleg Nesterov {
1178b89deed3SOleg Nesterov 	struct cpu_workqueue_struct *cwq;
117928e53bddSOleg Nesterov 	struct workqueue_struct *wq;
1180b1f4ec17SOleg Nesterov 	int cpu;
1181b89deed3SOleg Nesterov 
1182f293ea92SOleg Nesterov 	might_sleep();
1183f293ea92SOleg Nesterov 
11843295f0efSIngo Molnar 	lock_map_acquire(&work->lockdep_map);
11853295f0efSIngo Molnar 	lock_map_release(&work->lockdep_map);
11864e6045f1SJohannes Berg 
1187b89deed3SOleg Nesterov 	cwq = get_wq_data(work);
1188b89deed3SOleg Nesterov 	if (!cwq)
11893af24433SOleg Nesterov 		return;
1190b89deed3SOleg Nesterov 
119128e53bddSOleg Nesterov 	wq = cwq->wq;
119228e53bddSOleg Nesterov 
11931537663fSTejun Heo 	for_each_possible_cpu(cpu)
11944690c4abSTejun Heo 		wait_on_cpu_work(get_cwq(cpu, wq), work);
11956e84d644SOleg Nesterov }
11966e84d644SOleg Nesterov 
11971f1f642eSOleg Nesterov static int __cancel_work_timer(struct work_struct *work,
11981f1f642eSOleg Nesterov 				struct timer_list* timer)
11991f1f642eSOleg Nesterov {
12001f1f642eSOleg Nesterov 	int ret;
12011f1f642eSOleg Nesterov 
12021f1f642eSOleg Nesterov 	do {
12031f1f642eSOleg Nesterov 		ret = (timer && likely(del_timer(timer)));
12041f1f642eSOleg Nesterov 		if (!ret)
12051f1f642eSOleg Nesterov 			ret = try_to_grab_pending(work);
12061f1f642eSOleg Nesterov 		wait_on_work(work);
12071f1f642eSOleg Nesterov 	} while (unlikely(ret < 0));
12081f1f642eSOleg Nesterov 
12094d707b9fSOleg Nesterov 	clear_wq_data(work);
12101f1f642eSOleg Nesterov 	return ret;
12111f1f642eSOleg Nesterov }
12121f1f642eSOleg Nesterov 
12136e84d644SOleg Nesterov /**
12146e84d644SOleg Nesterov  * cancel_work_sync - block until a work_struct's callback has terminated
12156e84d644SOleg Nesterov  * @work: the work which is to be flushed
12166e84d644SOleg Nesterov  *
12171f1f642eSOleg Nesterov  * Returns true if @work was pending.
12181f1f642eSOleg Nesterov  *
12196e84d644SOleg Nesterov  * cancel_work_sync() will cancel the work if it is queued. If the work's
12206e84d644SOleg Nesterov  * callback appears to be running, cancel_work_sync() will block until it
12216e84d644SOleg Nesterov  * has completed.
12226e84d644SOleg Nesterov  *
12236e84d644SOleg Nesterov  * It is possible to use this function if the work re-queues itself. It can
12246e84d644SOleg Nesterov  * cancel the work even if it migrates to another workqueue, however in that
12256e84d644SOleg Nesterov  * case it only guarantees that work->func() has completed on the last queued
12266e84d644SOleg Nesterov  * workqueue.
12276e84d644SOleg Nesterov  *
12286e84d644SOleg Nesterov  * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
12296e84d644SOleg Nesterov  * pending, otherwise it goes into a busy-wait loop until the timer expires.
12306e84d644SOleg Nesterov  *
12316e84d644SOleg Nesterov  * The caller must ensure that workqueue_struct on which this work was last
12326e84d644SOleg Nesterov  * queued can't be destroyed before this function returns.
12336e84d644SOleg Nesterov  */
12341f1f642eSOleg Nesterov int cancel_work_sync(struct work_struct *work)
12356e84d644SOleg Nesterov {
12361f1f642eSOleg Nesterov 	return __cancel_work_timer(work, NULL);
1237b89deed3SOleg Nesterov }
123828e53bddSOleg Nesterov EXPORT_SYMBOL_GPL(cancel_work_sync);
1239b89deed3SOleg Nesterov 
12406e84d644SOleg Nesterov /**
1241f5a421a4SOleg Nesterov  * cancel_delayed_work_sync - reliably kill off a delayed work.
12426e84d644SOleg Nesterov  * @dwork: the delayed work struct
12436e84d644SOleg Nesterov  *
12441f1f642eSOleg Nesterov  * Returns true if @dwork was pending.
12451f1f642eSOleg Nesterov  *
12466e84d644SOleg Nesterov  * It is possible to use this function if @dwork rearms itself via queue_work()
12476e84d644SOleg Nesterov  * or queue_delayed_work(). See also the comment for cancel_work_sync().
12486e84d644SOleg Nesterov  */
12491f1f642eSOleg Nesterov int cancel_delayed_work_sync(struct delayed_work *dwork)
12506e84d644SOleg Nesterov {
12511f1f642eSOleg Nesterov 	return __cancel_work_timer(&dwork->work, &dwork->timer);
12526e84d644SOleg Nesterov }
1253f5a421a4SOleg Nesterov EXPORT_SYMBOL(cancel_delayed_work_sync);
12541da177e4SLinus Torvalds 
12556e84d644SOleg Nesterov static struct workqueue_struct *keventd_wq __read_mostly;
12561da177e4SLinus Torvalds 
12570fcb78c2SRolf Eike Beer /**
12580fcb78c2SRolf Eike Beer  * schedule_work - put work task in global workqueue
12590fcb78c2SRolf Eike Beer  * @work: job to be done
12600fcb78c2SRolf Eike Beer  *
12615b0f437dSBart Van Assche  * Returns zero if @work was already on the kernel-global workqueue and
12625b0f437dSBart Van Assche  * non-zero otherwise.
12635b0f437dSBart Van Assche  *
12645b0f437dSBart Van Assche  * This puts a job in the kernel-global workqueue if it was not already
12655b0f437dSBart Van Assche  * queued and leaves it in the same position on the kernel-global
12665b0f437dSBart Van Assche  * workqueue otherwise.
12670fcb78c2SRolf Eike Beer  */
12687ad5b3a5SHarvey Harrison int schedule_work(struct work_struct *work)
12691da177e4SLinus Torvalds {
12701da177e4SLinus Torvalds 	return queue_work(keventd_wq, work);
12711da177e4SLinus Torvalds }
1272ae90dd5dSDave Jones EXPORT_SYMBOL(schedule_work);
12731da177e4SLinus Torvalds 
1274c1a220e7SZhang Rui /*
1275c1a220e7SZhang Rui  * schedule_work_on - put work task on a specific cpu
1276c1a220e7SZhang Rui  * @cpu: cpu to put the work task on
1277c1a220e7SZhang Rui  * @work: job to be done
1278c1a220e7SZhang Rui  *
1279c1a220e7SZhang Rui  * This puts a job on a specific cpu
1280c1a220e7SZhang Rui  */
1281c1a220e7SZhang Rui int schedule_work_on(int cpu, struct work_struct *work)
1282c1a220e7SZhang Rui {
1283c1a220e7SZhang Rui 	return queue_work_on(cpu, keventd_wq, work);
1284c1a220e7SZhang Rui }
1285c1a220e7SZhang Rui EXPORT_SYMBOL(schedule_work_on);
1286c1a220e7SZhang Rui 
12870fcb78c2SRolf Eike Beer /**
12880fcb78c2SRolf Eike Beer  * schedule_delayed_work - put work task in global workqueue after delay
128952bad64dSDavid Howells  * @dwork: job to be done
129052bad64dSDavid Howells  * @delay: number of jiffies to wait or 0 for immediate execution
12910fcb78c2SRolf Eike Beer  *
12920fcb78c2SRolf Eike Beer  * After waiting for a given time this puts a job in the kernel-global
12930fcb78c2SRolf Eike Beer  * workqueue.
12940fcb78c2SRolf Eike Beer  */
12957ad5b3a5SHarvey Harrison int schedule_delayed_work(struct delayed_work *dwork,
129682f67cd9SIngo Molnar 					unsigned long delay)
12971da177e4SLinus Torvalds {
129852bad64dSDavid Howells 	return queue_delayed_work(keventd_wq, dwork, delay);
12991da177e4SLinus Torvalds }
1300ae90dd5dSDave Jones EXPORT_SYMBOL(schedule_delayed_work);
13011da177e4SLinus Torvalds 
13020fcb78c2SRolf Eike Beer /**
13038c53e463SLinus Torvalds  * flush_delayed_work - block until a dwork_struct's callback has terminated
13048c53e463SLinus Torvalds  * @dwork: the delayed work which is to be flushed
13058c53e463SLinus Torvalds  *
13068c53e463SLinus Torvalds  * Any timeout is cancelled, and any pending work is run immediately.
13078c53e463SLinus Torvalds  */
13088c53e463SLinus Torvalds void flush_delayed_work(struct delayed_work *dwork)
13098c53e463SLinus Torvalds {
13108c53e463SLinus Torvalds 	if (del_timer_sync(&dwork->timer)) {
13114690c4abSTejun Heo 		__queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
13124690c4abSTejun Heo 			     &dwork->work);
13138c53e463SLinus Torvalds 		put_cpu();
13148c53e463SLinus Torvalds 	}
13158c53e463SLinus Torvalds 	flush_work(&dwork->work);
13168c53e463SLinus Torvalds }
13178c53e463SLinus Torvalds EXPORT_SYMBOL(flush_delayed_work);
13188c53e463SLinus Torvalds 
13198c53e463SLinus Torvalds /**
13200fcb78c2SRolf Eike Beer  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
13210fcb78c2SRolf Eike Beer  * @cpu: cpu to use
132252bad64dSDavid Howells  * @dwork: job to be done
13230fcb78c2SRolf Eike Beer  * @delay: number of jiffies to wait
13240fcb78c2SRolf Eike Beer  *
13250fcb78c2SRolf Eike Beer  * After waiting for a given time this puts a job in the kernel-global
13260fcb78c2SRolf Eike Beer  * workqueue on the specified CPU.
13270fcb78c2SRolf Eike Beer  */
13281da177e4SLinus Torvalds int schedule_delayed_work_on(int cpu,
132952bad64dSDavid Howells 			struct delayed_work *dwork, unsigned long delay)
13301da177e4SLinus Torvalds {
133152bad64dSDavid Howells 	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
13321da177e4SLinus Torvalds }
1333ae90dd5dSDave Jones EXPORT_SYMBOL(schedule_delayed_work_on);
13341da177e4SLinus Torvalds 
1335b6136773SAndrew Morton /**
1336b6136773SAndrew Morton  * schedule_on_each_cpu - call a function on each online CPU from keventd
1337b6136773SAndrew Morton  * @func: the function to call
1338b6136773SAndrew Morton  *
1339b6136773SAndrew Morton  * Returns zero on success.
1340b6136773SAndrew Morton  * Returns -ve errno on failure.
1341b6136773SAndrew Morton  *
1342b6136773SAndrew Morton  * schedule_on_each_cpu() is very slow.
1343b6136773SAndrew Morton  */
134465f27f38SDavid Howells int schedule_on_each_cpu(work_func_t func)
134515316ba8SChristoph Lameter {
134615316ba8SChristoph Lameter 	int cpu;
134765a64464SAndi Kleen 	int orig = -1;
1348b6136773SAndrew Morton 	struct work_struct *works;
134915316ba8SChristoph Lameter 
1350b6136773SAndrew Morton 	works = alloc_percpu(struct work_struct);
1351b6136773SAndrew Morton 	if (!works)
135215316ba8SChristoph Lameter 		return -ENOMEM;
1353b6136773SAndrew Morton 
135495402b38SGautham R Shenoy 	get_online_cpus();
135593981800STejun Heo 
135693981800STejun Heo 	/*
135793981800STejun Heo 	 * When running in keventd don't schedule a work item on
135893981800STejun Heo 	 * itself.  Can just call directly because the work queue is
135993981800STejun Heo 	 * already bound.  This also is faster.
136093981800STejun Heo 	 */
136193981800STejun Heo 	if (current_is_keventd())
136293981800STejun Heo 		orig = raw_smp_processor_id();
136393981800STejun Heo 
136415316ba8SChristoph Lameter 	for_each_online_cpu(cpu) {
13659bfb1839SIngo Molnar 		struct work_struct *work = per_cpu_ptr(works, cpu);
13669bfb1839SIngo Molnar 
13679bfb1839SIngo Molnar 		INIT_WORK(work, func);
136893981800STejun Heo 		if (cpu != orig)
13698de6d308SOleg Nesterov 			schedule_work_on(cpu, work);
137015316ba8SChristoph Lameter 	}
137193981800STejun Heo 	if (orig >= 0)
137293981800STejun Heo 		func(per_cpu_ptr(works, orig));
137393981800STejun Heo 
137493981800STejun Heo 	for_each_online_cpu(cpu)
13758616a89aSOleg Nesterov 		flush_work(per_cpu_ptr(works, cpu));
137693981800STejun Heo 
137795402b38SGautham R Shenoy 	put_online_cpus();
1378b6136773SAndrew Morton 	free_percpu(works);
137915316ba8SChristoph Lameter 	return 0;
138015316ba8SChristoph Lameter }
138115316ba8SChristoph Lameter 
1382eef6a7d5SAlan Stern /**
1383eef6a7d5SAlan Stern  * flush_scheduled_work - ensure that any scheduled work has run to completion.
1384eef6a7d5SAlan Stern  *
1385eef6a7d5SAlan Stern  * Forces execution of the kernel-global workqueue and blocks until its
1386eef6a7d5SAlan Stern  * completion.
1387eef6a7d5SAlan Stern  *
1388eef6a7d5SAlan Stern  * Think twice before calling this function!  It's very easy to get into
1389eef6a7d5SAlan Stern  * trouble if you don't take great care.  Either of the following situations
1390eef6a7d5SAlan Stern  * will lead to deadlock:
1391eef6a7d5SAlan Stern  *
1392eef6a7d5SAlan Stern  *	One of the work items currently on the workqueue needs to acquire
1393eef6a7d5SAlan Stern  *	a lock held by your code or its caller.
1394eef6a7d5SAlan Stern  *
1395eef6a7d5SAlan Stern  *	Your code is running in the context of a work routine.
1396eef6a7d5SAlan Stern  *
1397eef6a7d5SAlan Stern  * They will be detected by lockdep when they occur, but the first might not
1398eef6a7d5SAlan Stern  * occur very often.  It depends on what work items are on the workqueue and
1399eef6a7d5SAlan Stern  * what locks they need, which you have no control over.
1400eef6a7d5SAlan Stern  *
1401eef6a7d5SAlan Stern  * In most situations flushing the entire workqueue is overkill; you merely
1402eef6a7d5SAlan Stern  * need to know that a particular work item isn't queued and isn't running.
1403eef6a7d5SAlan Stern  * In such cases you should use cancel_delayed_work_sync() or
1404eef6a7d5SAlan Stern  * cancel_work_sync() instead.
1405eef6a7d5SAlan Stern  */
14061da177e4SLinus Torvalds void flush_scheduled_work(void)
14071da177e4SLinus Torvalds {
14081da177e4SLinus Torvalds 	flush_workqueue(keventd_wq);
14091da177e4SLinus Torvalds }
1410ae90dd5dSDave Jones EXPORT_SYMBOL(flush_scheduled_work);
14111da177e4SLinus Torvalds 
14121da177e4SLinus Torvalds /**
14131fa44ecaSJames Bottomley  * execute_in_process_context - reliably execute the routine with user context
14141fa44ecaSJames Bottomley  * @fn:		the function to execute
14151fa44ecaSJames Bottomley  * @ew:		guaranteed storage for the execute work structure (must
14161fa44ecaSJames Bottomley  *		be available when the work executes)
14171fa44ecaSJames Bottomley  *
14181fa44ecaSJames Bottomley  * Executes the function immediately if process context is available,
14191fa44ecaSJames Bottomley  * otherwise schedules the function for delayed execution.
14201fa44ecaSJames Bottomley  *
14211fa44ecaSJames Bottomley  * Returns:	0 - function was executed
14221fa44ecaSJames Bottomley  *		1 - function was scheduled for execution
14231fa44ecaSJames Bottomley  */
142465f27f38SDavid Howells int execute_in_process_context(work_func_t fn, struct execute_work *ew)
14251fa44ecaSJames Bottomley {
14261fa44ecaSJames Bottomley 	if (!in_interrupt()) {
142765f27f38SDavid Howells 		fn(&ew->work);
14281fa44ecaSJames Bottomley 		return 0;
14291fa44ecaSJames Bottomley 	}
14301fa44ecaSJames Bottomley 
143165f27f38SDavid Howells 	INIT_WORK(&ew->work, fn);
14321fa44ecaSJames Bottomley 	schedule_work(&ew->work);
14331fa44ecaSJames Bottomley 
14341fa44ecaSJames Bottomley 	return 1;
14351fa44ecaSJames Bottomley }
14361fa44ecaSJames Bottomley EXPORT_SYMBOL_GPL(execute_in_process_context);
14371fa44ecaSJames Bottomley 
14381da177e4SLinus Torvalds int keventd_up(void)
14391da177e4SLinus Torvalds {
14401da177e4SLinus Torvalds 	return keventd_wq != NULL;
14411da177e4SLinus Torvalds }
14421da177e4SLinus Torvalds 
14431da177e4SLinus Torvalds int current_is_keventd(void)
14441da177e4SLinus Torvalds {
14451da177e4SLinus Torvalds 	struct cpu_workqueue_struct *cwq;
1446d243769dSHugh Dickins 	int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
14471da177e4SLinus Torvalds 	int ret = 0;
14481da177e4SLinus Torvalds 
14491da177e4SLinus Torvalds 	BUG_ON(!keventd_wq);
14501da177e4SLinus Torvalds 
14511537663fSTejun Heo 	cwq = get_cwq(cpu, keventd_wq);
1452c34056a3STejun Heo 	if (current == cwq->worker->task)
14531da177e4SLinus Torvalds 		ret = 1;
14541da177e4SLinus Torvalds 
14551da177e4SLinus Torvalds 	return ret;
14561da177e4SLinus Torvalds 
14571da177e4SLinus Torvalds }
14581da177e4SLinus Torvalds 
14590f900049STejun Heo static struct cpu_workqueue_struct *alloc_cwqs(void)
14600f900049STejun Heo {
14610f900049STejun Heo 	/*
14620f900049STejun Heo 	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
14630f900049STejun Heo 	 * Make sure that the alignment isn't lower than that of
14640f900049STejun Heo 	 * unsigned long long.
14650f900049STejun Heo 	 */
14660f900049STejun Heo 	const size_t size = sizeof(struct cpu_workqueue_struct);
14670f900049STejun Heo 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
14680f900049STejun Heo 				   __alignof__(unsigned long long));
14690f900049STejun Heo 	struct cpu_workqueue_struct *cwqs;
14700f900049STejun Heo #ifndef CONFIG_SMP
14710f900049STejun Heo 	void *ptr;
14720f900049STejun Heo 
14730f900049STejun Heo 	/*
14740f900049STejun Heo 	 * On UP, percpu allocator doesn't honor alignment parameter
14750f900049STejun Heo 	 * and simply uses arch-dependent default.  Allocate enough
14760f900049STejun Heo 	 * room to align cwq and put an extra pointer at the end
14770f900049STejun Heo 	 * pointing back to the originally allocated pointer which
14780f900049STejun Heo 	 * will be used for free.
14790f900049STejun Heo 	 *
14800f900049STejun Heo 	 * FIXME: This really belongs to UP percpu code.  Update UP
14810f900049STejun Heo 	 * percpu code to honor alignment and remove this ugliness.
14820f900049STejun Heo 	 */
14830f900049STejun Heo 	ptr = __alloc_percpu(size + align + sizeof(void *), 1);
14840f900049STejun Heo 	cwqs = PTR_ALIGN(ptr, align);
14850f900049STejun Heo 	*(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
14860f900049STejun Heo #else
14870f900049STejun Heo 	/* On SMP, percpu allocator can do it itself */
14880f900049STejun Heo 	cwqs = __alloc_percpu(size, align);
14890f900049STejun Heo #endif
14900f900049STejun Heo 	/* just in case, make sure it's actually aligned */
14910f900049STejun Heo 	BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
14920f900049STejun Heo 	return cwqs;
14930f900049STejun Heo }
14940f900049STejun Heo 
14950f900049STejun Heo static void free_cwqs(struct cpu_workqueue_struct *cwqs)
14960f900049STejun Heo {
14970f900049STejun Heo #ifndef CONFIG_SMP
14980f900049STejun Heo 	/* on UP, the pointer to free is stored right after the cwq */
14990f900049STejun Heo 	if (cwqs)
15000f900049STejun Heo 		free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
15010f900049STejun Heo #else
15020f900049STejun Heo 	free_percpu(cwqs);
15030f900049STejun Heo #endif
15040f900049STejun Heo }
15050f900049STejun Heo 
15064e6045f1SJohannes Berg struct workqueue_struct *__create_workqueue_key(const char *name,
150797e37d7bSTejun Heo 						unsigned int flags,
1508eb13ba87SJohannes Berg 						struct lock_class_key *key,
1509eb13ba87SJohannes Berg 						const char *lock_name)
15103af24433SOleg Nesterov {
15111537663fSTejun Heo 	bool singlethread = flags & WQ_SINGLE_THREAD;
15123af24433SOleg Nesterov 	struct workqueue_struct *wq;
1513c34056a3STejun Heo 	bool failed = false;
1514c34056a3STejun Heo 	unsigned int cpu;
15153af24433SOleg Nesterov 
15163af24433SOleg Nesterov 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
15173af24433SOleg Nesterov 	if (!wq)
15184690c4abSTejun Heo 		goto err;
15193af24433SOleg Nesterov 
15200f900049STejun Heo 	wq->cpu_wq = alloc_cwqs();
15214690c4abSTejun Heo 	if (!wq->cpu_wq)
15224690c4abSTejun Heo 		goto err;
15233af24433SOleg Nesterov 
152497e37d7bSTejun Heo 	wq->flags = flags;
152573f53c4aSTejun Heo 	mutex_init(&wq->flush_mutex);
152673f53c4aSTejun Heo 	atomic_set(&wq->nr_cwqs_to_flush, 0);
152773f53c4aSTejun Heo 	INIT_LIST_HEAD(&wq->flusher_queue);
152873f53c4aSTejun Heo 	INIT_LIST_HEAD(&wq->flusher_overflow);
15293af24433SOleg Nesterov 	wq->name = name;
1530eb13ba87SJohannes Berg 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
1531cce1a165SOleg Nesterov 	INIT_LIST_HEAD(&wq->list);
15323af24433SOleg Nesterov 
15333da1c84cSOleg Nesterov 	cpu_maps_update_begin();
15346af8bf3dSOleg Nesterov 	/*
15356af8bf3dSOleg Nesterov 	 * We must initialize cwqs for each possible cpu even if we
15366af8bf3dSOleg Nesterov 	 * are going to call destroy_workqueue() finally. Otherwise
15376af8bf3dSOleg Nesterov 	 * cpu_up() can hit the uninitialized cwq once we drop the
15386af8bf3dSOleg Nesterov 	 * lock.
15396af8bf3dSOleg Nesterov 	 */
15403af24433SOleg Nesterov 	for_each_possible_cpu(cpu) {
15411537663fSTejun Heo 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
15421537663fSTejun Heo 
15430f900049STejun Heo 		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
15441537663fSTejun Heo 		cwq->cpu = cpu;
1545c34056a3STejun Heo 		cwq->wq = wq;
154673f53c4aSTejun Heo 		cwq->flush_color = -1;
15471537663fSTejun Heo 		spin_lock_init(&cwq->lock);
15481537663fSTejun Heo 		INIT_LIST_HEAD(&cwq->worklist);
15491537663fSTejun Heo 		init_waitqueue_head(&cwq->more_work);
15501537663fSTejun Heo 
1551c34056a3STejun Heo 		if (failed)
15523af24433SOleg Nesterov 			continue;
1553c34056a3STejun Heo 		cwq->worker = create_worker(cwq,
1554c34056a3STejun Heo 					    cpu_online(cpu) && !singlethread);
1555c34056a3STejun Heo 		if (cwq->worker)
1556c34056a3STejun Heo 			start_worker(cwq->worker);
15571537663fSTejun Heo 		else
1558c34056a3STejun Heo 			failed = true;
15593af24433SOleg Nesterov 	}
15601537663fSTejun Heo 
15611537663fSTejun Heo 	spin_lock(&workqueue_lock);
15621537663fSTejun Heo 	list_add(&wq->list, &workqueues);
15631537663fSTejun Heo 	spin_unlock(&workqueue_lock);
15641537663fSTejun Heo 
15653da1c84cSOleg Nesterov 	cpu_maps_update_done();
15663af24433SOleg Nesterov 
1567c34056a3STejun Heo 	if (failed) {
15683af24433SOleg Nesterov 		destroy_workqueue(wq);
15693af24433SOleg Nesterov 		wq = NULL;
15703af24433SOleg Nesterov 	}
15713af24433SOleg Nesterov 	return wq;
15724690c4abSTejun Heo err:
15734690c4abSTejun Heo 	if (wq) {
15740f900049STejun Heo 		free_cwqs(wq->cpu_wq);
15754690c4abSTejun Heo 		kfree(wq);
15764690c4abSTejun Heo 	}
15774690c4abSTejun Heo 	return NULL;
15783af24433SOleg Nesterov }
15794e6045f1SJohannes Berg EXPORT_SYMBOL_GPL(__create_workqueue_key);
15803af24433SOleg Nesterov 
15813af24433SOleg Nesterov /**
15823af24433SOleg Nesterov  * destroy_workqueue - safely terminate a workqueue
15833af24433SOleg Nesterov  * @wq: target workqueue
15843af24433SOleg Nesterov  *
15853af24433SOleg Nesterov  * Safely destroy a workqueue. All work currently pending will be done first.
15863af24433SOleg Nesterov  */
15873af24433SOleg Nesterov void destroy_workqueue(struct workqueue_struct *wq)
15883af24433SOleg Nesterov {
15893af24433SOleg Nesterov 	int cpu;
15903af24433SOleg Nesterov 
15913da1c84cSOleg Nesterov 	cpu_maps_update_begin();
159295402b38SGautham R Shenoy 	spin_lock(&workqueue_lock);
15933af24433SOleg Nesterov 	list_del(&wq->list);
159495402b38SGautham R Shenoy 	spin_unlock(&workqueue_lock);
15953da1c84cSOleg Nesterov 	cpu_maps_update_done();
15963af24433SOleg Nesterov 
159773f53c4aSTejun Heo 	flush_workqueue(wq);
159873f53c4aSTejun Heo 
159973f53c4aSTejun Heo 	for_each_possible_cpu(cpu) {
160073f53c4aSTejun Heo 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
160173f53c4aSTejun Heo 		int i;
160273f53c4aSTejun Heo 
1603c34056a3STejun Heo 		if (cwq->worker) {
1604c34056a3STejun Heo 			destroy_worker(cwq->worker);
1605c34056a3STejun Heo 			cwq->worker = NULL;
160673f53c4aSTejun Heo 		}
160773f53c4aSTejun Heo 
160873f53c4aSTejun Heo 		for (i = 0; i < WORK_NR_COLORS; i++)
160973f53c4aSTejun Heo 			BUG_ON(cwq->nr_in_flight[i]);
161073f53c4aSTejun Heo 	}
16111537663fSTejun Heo 
16120f900049STejun Heo 	free_cwqs(wq->cpu_wq);
16133af24433SOleg Nesterov 	kfree(wq);
16143af24433SOleg Nesterov }
16153af24433SOleg Nesterov EXPORT_SYMBOL_GPL(destroy_workqueue);
16163af24433SOleg Nesterov 
16179c7b216dSChandra Seetharaman static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
16181da177e4SLinus Torvalds 						unsigned long action,
16191da177e4SLinus Torvalds 						void *hcpu)
16201da177e4SLinus Torvalds {
16213af24433SOleg Nesterov 	unsigned int cpu = (unsigned long)hcpu;
16223af24433SOleg Nesterov 	struct cpu_workqueue_struct *cwq;
16231da177e4SLinus Torvalds 	struct workqueue_struct *wq;
16241da177e4SLinus Torvalds 
16258bb78442SRafael J. Wysocki 	action &= ~CPU_TASKS_FROZEN;
16268bb78442SRafael J. Wysocki 
16271da177e4SLinus Torvalds 	list_for_each_entry(wq, &workqueues, list) {
16281537663fSTejun Heo 		if (wq->flags & WQ_SINGLE_THREAD)
16291537663fSTejun Heo 			continue;
16301537663fSTejun Heo 
16311537663fSTejun Heo 		cwq = get_cwq(cpu, wq);
16323af24433SOleg Nesterov 
16333af24433SOleg Nesterov 		switch (action) {
16343da1c84cSOleg Nesterov 		case CPU_POST_DEAD:
163573f53c4aSTejun Heo 			flush_workqueue(wq);
16361da177e4SLinus Torvalds 			break;
16371da177e4SLinus Torvalds 		}
16383af24433SOleg Nesterov 	}
16391da177e4SLinus Torvalds 
16401537663fSTejun Heo 	return notifier_from_errno(0);
16411da177e4SLinus Torvalds }
16421da177e4SLinus Torvalds 
16432d3854a3SRusty Russell #ifdef CONFIG_SMP
16448ccad40dSRusty Russell 
16452d3854a3SRusty Russell struct work_for_cpu {
16466b44003eSAndrew Morton 	struct completion completion;
16472d3854a3SRusty Russell 	long (*fn)(void *);
16482d3854a3SRusty Russell 	void *arg;
16492d3854a3SRusty Russell 	long ret;
16502d3854a3SRusty Russell };
16512d3854a3SRusty Russell 
16526b44003eSAndrew Morton static int do_work_for_cpu(void *_wfc)
16532d3854a3SRusty Russell {
16546b44003eSAndrew Morton 	struct work_for_cpu *wfc = _wfc;
16552d3854a3SRusty Russell 	wfc->ret = wfc->fn(wfc->arg);
16566b44003eSAndrew Morton 	complete(&wfc->completion);
16576b44003eSAndrew Morton 	return 0;
16582d3854a3SRusty Russell }
16592d3854a3SRusty Russell 
16602d3854a3SRusty Russell /**
16612d3854a3SRusty Russell  * work_on_cpu - run a function in user context on a particular cpu
16622d3854a3SRusty Russell  * @cpu: the cpu to run on
16632d3854a3SRusty Russell  * @fn: the function to run
16642d3854a3SRusty Russell  * @arg: the function arg
16652d3854a3SRusty Russell  *
166631ad9081SRusty Russell  * This will return the value @fn returns.
166731ad9081SRusty Russell  * It is up to the caller to ensure that the cpu doesn't go offline.
16686b44003eSAndrew Morton  * The caller must not hold any locks which would prevent @fn from completing.
16692d3854a3SRusty Russell  */
16702d3854a3SRusty Russell long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
16712d3854a3SRusty Russell {
16726b44003eSAndrew Morton 	struct task_struct *sub_thread;
16736b44003eSAndrew Morton 	struct work_for_cpu wfc = {
16746b44003eSAndrew Morton 		.completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
16756b44003eSAndrew Morton 		.fn = fn,
16766b44003eSAndrew Morton 		.arg = arg,
16776b44003eSAndrew Morton 	};
16782d3854a3SRusty Russell 
16796b44003eSAndrew Morton 	sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
16806b44003eSAndrew Morton 	if (IS_ERR(sub_thread))
16816b44003eSAndrew Morton 		return PTR_ERR(sub_thread);
16826b44003eSAndrew Morton 	kthread_bind(sub_thread, cpu);
16836b44003eSAndrew Morton 	wake_up_process(sub_thread);
16846b44003eSAndrew Morton 	wait_for_completion(&wfc.completion);
16852d3854a3SRusty Russell 	return wfc.ret;
16862d3854a3SRusty Russell }
16872d3854a3SRusty Russell EXPORT_SYMBOL_GPL(work_on_cpu);
16882d3854a3SRusty Russell #endif /* CONFIG_SMP */
16892d3854a3SRusty Russell 
1690c12920d1SOleg Nesterov void __init init_workqueues(void)
16911da177e4SLinus Torvalds {
1692c34056a3STejun Heo 	unsigned int cpu;
1693c34056a3STejun Heo 
1694c34056a3STejun Heo 	for_each_possible_cpu(cpu)
1695c34056a3STejun Heo 		ida_init(&per_cpu(worker_ida, cpu));
1696c34056a3STejun Heo 
1697e7577c50SRusty Russell 	singlethread_cpu = cpumask_first(cpu_possible_mask);
16981da177e4SLinus Torvalds 	hotcpu_notifier(workqueue_cpu_callback, 0);
16991da177e4SLinus Torvalds 	keventd_wq = create_workqueue("events");
17001da177e4SLinus Torvalds 	BUG_ON(!keventd_wq);
17011da177e4SLinus Torvalds }
1702