xref: /linux-6.15/kernel/sched/debug.c (revision 4ae0c2b9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * kernel/sched/debug.c
4  *
5  * Print the CFS rbtree and other debugging details
6  *
7  * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
8  */
9 
10 /*
11  * This allows printing both to /sys/kernel/debug/sched/debug and
12  * to the console
13  */
14 #define SEQ_printf(m, x...)			\
15  do {						\
16 	if (m)					\
17 		seq_printf(m, x);		\
18 	else					\
19 		pr_cont(x);			\
20  } while (0)
21 
22 /*
23  * Ease the printing of nsec fields:
24  */
25 static long long nsec_high(unsigned long long nsec)
26 {
27 	if ((long long)nsec < 0) {
28 		nsec = -nsec;
29 		do_div(nsec, 1000000);
30 		return -nsec;
31 	}
32 	do_div(nsec, 1000000);
33 
34 	return nsec;
35 }
36 
37 static unsigned long nsec_low(unsigned long long nsec)
38 {
39 	if ((long long)nsec < 0)
40 		nsec = -nsec;
41 
42 	return do_div(nsec, 1000000);
43 }
44 
45 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
46 
47 #define SCHED_FEAT(name, enabled)	\
48 	#name ,
49 
50 static const char * const sched_feat_names[] = {
51 #include "features.h"
52 };
53 
54 #undef SCHED_FEAT
55 
56 static int sched_feat_show(struct seq_file *m, void *v)
57 {
58 	int i;
59 
60 	for (i = 0; i < __SCHED_FEAT_NR; i++) {
61 		if (!(sysctl_sched_features & (1UL << i)))
62 			seq_puts(m, "NO_");
63 		seq_printf(m, "%s ", sched_feat_names[i]);
64 	}
65 	seq_puts(m, "\n");
66 
67 	return 0;
68 }
69 
70 #ifdef CONFIG_JUMP_LABEL
71 
72 #define jump_label_key__true  STATIC_KEY_INIT_TRUE
73 #define jump_label_key__false STATIC_KEY_INIT_FALSE
74 
75 #define SCHED_FEAT(name, enabled)	\
76 	jump_label_key__##enabled ,
77 
78 struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
79 #include "features.h"
80 };
81 
82 #undef SCHED_FEAT
83 
84 static void sched_feat_disable(int i)
85 {
86 	static_key_disable_cpuslocked(&sched_feat_keys[i]);
87 }
88 
89 static void sched_feat_enable(int i)
90 {
91 	static_key_enable_cpuslocked(&sched_feat_keys[i]);
92 }
93 #else
94 static void sched_feat_disable(int i) { };
95 static void sched_feat_enable(int i) { };
96 #endif /* CONFIG_JUMP_LABEL */
97 
98 static int sched_feat_set(char *cmp)
99 {
100 	int i;
101 	int neg = 0;
102 
103 	if (strncmp(cmp, "NO_", 3) == 0) {
104 		neg = 1;
105 		cmp += 3;
106 	}
107 
108 	i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp);
109 	if (i < 0)
110 		return i;
111 
112 	if (neg) {
113 		sysctl_sched_features &= ~(1UL << i);
114 		sched_feat_disable(i);
115 	} else {
116 		sysctl_sched_features |= (1UL << i);
117 		sched_feat_enable(i);
118 	}
119 
120 	return 0;
121 }
122 
123 static ssize_t
124 sched_feat_write(struct file *filp, const char __user *ubuf,
125 		size_t cnt, loff_t *ppos)
126 {
127 	char buf[64];
128 	char *cmp;
129 	int ret;
130 	struct inode *inode;
131 
132 	if (cnt > 63)
133 		cnt = 63;
134 
135 	if (copy_from_user(&buf, ubuf, cnt))
136 		return -EFAULT;
137 
138 	buf[cnt] = 0;
139 	cmp = strstrip(buf);
140 
141 	/* Ensure the static_key remains in a consistent state */
142 	inode = file_inode(filp);
143 	cpus_read_lock();
144 	inode_lock(inode);
145 	ret = sched_feat_set(cmp);
146 	inode_unlock(inode);
147 	cpus_read_unlock();
148 	if (ret < 0)
149 		return ret;
150 
151 	*ppos += cnt;
152 
153 	return cnt;
154 }
155 
156 static int sched_feat_open(struct inode *inode, struct file *filp)
157 {
158 	return single_open(filp, sched_feat_show, NULL);
159 }
160 
161 static const struct file_operations sched_feat_fops = {
162 	.open		= sched_feat_open,
163 	.write		= sched_feat_write,
164 	.read		= seq_read,
165 	.llseek		= seq_lseek,
166 	.release	= single_release,
167 };
168 
169 #ifdef CONFIG_SMP
170 
171 static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
172 				   size_t cnt, loff_t *ppos)
173 {
174 	char buf[16];
175 	unsigned int scaling;
176 
177 	if (cnt > 15)
178 		cnt = 15;
179 
180 	if (copy_from_user(&buf, ubuf, cnt))
181 		return -EFAULT;
182 	buf[cnt] = '\0';
183 
184 	if (kstrtouint(buf, 10, &scaling))
185 		return -EINVAL;
186 
187 	if (scaling >= SCHED_TUNABLESCALING_END)
188 		return -EINVAL;
189 
190 	sysctl_sched_tunable_scaling = scaling;
191 	if (sched_update_scaling())
192 		return -EINVAL;
193 
194 	*ppos += cnt;
195 	return cnt;
196 }
197 
198 static int sched_scaling_show(struct seq_file *m, void *v)
199 {
200 	seq_printf(m, "%d\n", sysctl_sched_tunable_scaling);
201 	return 0;
202 }
203 
204 static int sched_scaling_open(struct inode *inode, struct file *filp)
205 {
206 	return single_open(filp, sched_scaling_show, NULL);
207 }
208 
209 static const struct file_operations sched_scaling_fops = {
210 	.open		= sched_scaling_open,
211 	.write		= sched_scaling_write,
212 	.read		= seq_read,
213 	.llseek		= seq_lseek,
214 	.release	= single_release,
215 };
216 
217 #endif /* SMP */
218 
219 #ifdef CONFIG_PREEMPT_DYNAMIC
220 
221 static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf,
222 				   size_t cnt, loff_t *ppos)
223 {
224 	char buf[16];
225 	int mode;
226 
227 	if (cnt > 15)
228 		cnt = 15;
229 
230 	if (copy_from_user(&buf, ubuf, cnt))
231 		return -EFAULT;
232 
233 	buf[cnt] = 0;
234 	mode = sched_dynamic_mode(strstrip(buf));
235 	if (mode < 0)
236 		return mode;
237 
238 	sched_dynamic_update(mode);
239 
240 	*ppos += cnt;
241 
242 	return cnt;
243 }
244 
245 static int sched_dynamic_show(struct seq_file *m, void *v)
246 {
247 	static const char * preempt_modes[] = {
248 		"none", "voluntary", "full"
249 	};
250 	int i;
251 
252 	for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) {
253 		if (preempt_dynamic_mode == i)
254 			seq_puts(m, "(");
255 		seq_puts(m, preempt_modes[i]);
256 		if (preempt_dynamic_mode == i)
257 			seq_puts(m, ")");
258 
259 		seq_puts(m, " ");
260 	}
261 
262 	seq_puts(m, "\n");
263 	return 0;
264 }
265 
266 static int sched_dynamic_open(struct inode *inode, struct file *filp)
267 {
268 	return single_open(filp, sched_dynamic_show, NULL);
269 }
270 
271 static const struct file_operations sched_dynamic_fops = {
272 	.open		= sched_dynamic_open,
273 	.write		= sched_dynamic_write,
274 	.read		= seq_read,
275 	.llseek		= seq_lseek,
276 	.release	= single_release,
277 };
278 
279 #endif /* CONFIG_PREEMPT_DYNAMIC */
280 
281 __read_mostly bool sched_debug_verbose;
282 
283 #ifdef CONFIG_SMP
284 static struct dentry           *sd_dentry;
285 
286 
287 static ssize_t sched_verbose_write(struct file *filp, const char __user *ubuf,
288 				  size_t cnt, loff_t *ppos)
289 {
290 	ssize_t result;
291 	bool orig;
292 
293 	cpus_read_lock();
294 	mutex_lock(&sched_domains_mutex);
295 
296 	orig = sched_debug_verbose;
297 	result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
298 
299 	if (sched_debug_verbose && !orig)
300 		update_sched_domain_debugfs();
301 	else if (!sched_debug_verbose && orig) {
302 		debugfs_remove(sd_dentry);
303 		sd_dentry = NULL;
304 	}
305 
306 	mutex_unlock(&sched_domains_mutex);
307 	cpus_read_unlock();
308 
309 	return result;
310 }
311 #else
312 #define sched_verbose_write debugfs_write_file_bool
313 #endif
314 
315 static const struct file_operations sched_verbose_fops = {
316 	.read =         debugfs_read_file_bool,
317 	.write =        sched_verbose_write,
318 	.open =         simple_open,
319 	.llseek =       default_llseek,
320 };
321 
322 static const struct seq_operations sched_debug_sops;
323 
324 static int sched_debug_open(struct inode *inode, struct file *filp)
325 {
326 	return seq_open(filp, &sched_debug_sops);
327 }
328 
329 static const struct file_operations sched_debug_fops = {
330 	.open		= sched_debug_open,
331 	.read		= seq_read,
332 	.llseek		= seq_lseek,
333 	.release	= seq_release,
334 };
335 
336 enum dl_param {
337 	DL_RUNTIME = 0,
338 	DL_PERIOD,
339 };
340 
341 static unsigned long fair_server_period_max = (1UL << 22) * NSEC_PER_USEC; /* ~4 seconds */
342 static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC;     /* 100 us */
343 
344 static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf,
345 				       size_t cnt, loff_t *ppos, enum dl_param param)
346 {
347 	long cpu = (long) ((struct seq_file *) filp->private_data)->private;
348 	struct rq *rq = cpu_rq(cpu);
349 	u64 runtime, period;
350 	size_t err;
351 	int retval;
352 	u64 value;
353 
354 	err = kstrtoull_from_user(ubuf, cnt, 10, &value);
355 	if (err)
356 		return err;
357 
358 	scoped_guard (rq_lock_irqsave, rq) {
359 		runtime  = rq->fair_server.dl_runtime;
360 		period = rq->fair_server.dl_period;
361 
362 		switch (param) {
363 		case DL_RUNTIME:
364 			if (runtime == value)
365 				break;
366 			runtime = value;
367 			break;
368 		case DL_PERIOD:
369 			if (value == period)
370 				break;
371 			period = value;
372 			break;
373 		}
374 
375 		if (runtime > period ||
376 		    period > fair_server_period_max ||
377 		    period < fair_server_period_min) {
378 			return  -EINVAL;
379 		}
380 
381 		if (rq->cfs.h_nr_running) {
382 			update_rq_clock(rq);
383 			dl_server_stop(&rq->fair_server);
384 		}
385 
386 		retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0);
387 		if (retval)
388 			cnt = retval;
389 
390 		if (!runtime)
391 			printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
392 					cpu_of(rq));
393 
394 		if (rq->cfs.h_nr_running)
395 			dl_server_start(&rq->fair_server);
396 	}
397 
398 	*ppos += cnt;
399 	return cnt;
400 }
401 
402 static size_t sched_fair_server_show(struct seq_file *m, void *v, enum dl_param param)
403 {
404 	unsigned long cpu = (unsigned long) m->private;
405 	struct rq *rq = cpu_rq(cpu);
406 	u64 value;
407 
408 	switch (param) {
409 	case DL_RUNTIME:
410 		value = rq->fair_server.dl_runtime;
411 		break;
412 	case DL_PERIOD:
413 		value = rq->fair_server.dl_period;
414 		break;
415 	}
416 
417 	seq_printf(m, "%llu\n", value);
418 	return 0;
419 
420 }
421 
422 static ssize_t
423 sched_fair_server_runtime_write(struct file *filp, const char __user *ubuf,
424 				size_t cnt, loff_t *ppos)
425 {
426 	return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_RUNTIME);
427 }
428 
429 static int sched_fair_server_runtime_show(struct seq_file *m, void *v)
430 {
431 	return sched_fair_server_show(m, v, DL_RUNTIME);
432 }
433 
434 static int sched_fair_server_runtime_open(struct inode *inode, struct file *filp)
435 {
436 	return single_open(filp, sched_fair_server_runtime_show, inode->i_private);
437 }
438 
439 static const struct file_operations fair_server_runtime_fops = {
440 	.open		= sched_fair_server_runtime_open,
441 	.write		= sched_fair_server_runtime_write,
442 	.read		= seq_read,
443 	.llseek		= seq_lseek,
444 	.release	= single_release,
445 };
446 
447 static ssize_t
448 sched_fair_server_period_write(struct file *filp, const char __user *ubuf,
449 			       size_t cnt, loff_t *ppos)
450 {
451 	return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_PERIOD);
452 }
453 
454 static int sched_fair_server_period_show(struct seq_file *m, void *v)
455 {
456 	return sched_fair_server_show(m, v, DL_PERIOD);
457 }
458 
459 static int sched_fair_server_period_open(struct inode *inode, struct file *filp)
460 {
461 	return single_open(filp, sched_fair_server_period_show, inode->i_private);
462 }
463 
464 static const struct file_operations fair_server_period_fops = {
465 	.open		= sched_fair_server_period_open,
466 	.write		= sched_fair_server_period_write,
467 	.read		= seq_read,
468 	.llseek		= seq_lseek,
469 	.release	= single_release,
470 };
471 
472 static struct dentry *debugfs_sched;
473 
474 static void debugfs_fair_server_init(void)
475 {
476 	struct dentry *d_fair;
477 	unsigned long cpu;
478 
479 	d_fair = debugfs_create_dir("fair_server", debugfs_sched);
480 	if (!d_fair)
481 		return;
482 
483 	for_each_possible_cpu(cpu) {
484 		struct dentry *d_cpu;
485 		char buf[32];
486 
487 		snprintf(buf, sizeof(buf), "cpu%lu", cpu);
488 		d_cpu = debugfs_create_dir(buf, d_fair);
489 
490 		debugfs_create_file("runtime", 0644, d_cpu, (void *) cpu, &fair_server_runtime_fops);
491 		debugfs_create_file("period", 0644, d_cpu, (void *) cpu, &fair_server_period_fops);
492 	}
493 }
494 
495 static __init int sched_init_debug(void)
496 {
497 	struct dentry __maybe_unused *numa;
498 
499 	debugfs_sched = debugfs_create_dir("sched", NULL);
500 
501 	debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops);
502 	debugfs_create_file_unsafe("verbose", 0644, debugfs_sched, &sched_debug_verbose, &sched_verbose_fops);
503 #ifdef CONFIG_PREEMPT_DYNAMIC
504 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
505 #endif
506 
507 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
508 
509 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
510 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
511 
512 #ifdef CONFIG_SMP
513 	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
514 	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
515 	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
516 
517 	mutex_lock(&sched_domains_mutex);
518 	update_sched_domain_debugfs();
519 	mutex_unlock(&sched_domains_mutex);
520 #endif
521 
522 #ifdef CONFIG_NUMA_BALANCING
523 	numa = debugfs_create_dir("numa_balancing", debugfs_sched);
524 
525 	debugfs_create_u32("scan_delay_ms", 0644, numa, &sysctl_numa_balancing_scan_delay);
526 	debugfs_create_u32("scan_period_min_ms", 0644, numa, &sysctl_numa_balancing_scan_period_min);
527 	debugfs_create_u32("scan_period_max_ms", 0644, numa, &sysctl_numa_balancing_scan_period_max);
528 	debugfs_create_u32("scan_size_mb", 0644, numa, &sysctl_numa_balancing_scan_size);
529 	debugfs_create_u32("hot_threshold_ms", 0644, numa, &sysctl_numa_balancing_hot_threshold);
530 #endif
531 
532 	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
533 
534 	debugfs_fair_server_init();
535 
536 	return 0;
537 }
538 late_initcall(sched_init_debug);
539 
540 #ifdef CONFIG_SMP
541 
542 static cpumask_var_t		sd_sysctl_cpus;
543 
544 static int sd_flags_show(struct seq_file *m, void *v)
545 {
546 	unsigned long flags = *(unsigned int *)m->private;
547 	int idx;
548 
549 	for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
550 		seq_puts(m, sd_flag_debug[idx].name);
551 		seq_puts(m, " ");
552 	}
553 	seq_puts(m, "\n");
554 
555 	return 0;
556 }
557 
558 static int sd_flags_open(struct inode *inode, struct file *file)
559 {
560 	return single_open(file, sd_flags_show, inode->i_private);
561 }
562 
563 static const struct file_operations sd_flags_fops = {
564 	.open		= sd_flags_open,
565 	.read		= seq_read,
566 	.llseek		= seq_lseek,
567 	.release	= single_release,
568 };
569 
570 static void register_sd(struct sched_domain *sd, struct dentry *parent)
571 {
572 #define SDM(type, mode, member)	\
573 	debugfs_create_##type(#member, mode, parent, &sd->member)
574 
575 	SDM(ulong, 0644, min_interval);
576 	SDM(ulong, 0644, max_interval);
577 	SDM(u64,   0644, max_newidle_lb_cost);
578 	SDM(u32,   0644, busy_factor);
579 	SDM(u32,   0644, imbalance_pct);
580 	SDM(u32,   0644, cache_nice_tries);
581 	SDM(str,   0444, name);
582 
583 #undef SDM
584 
585 	debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
586 	debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
587 	debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level);
588 }
589 
590 void update_sched_domain_debugfs(void)
591 {
592 	int cpu, i;
593 
594 	/*
595 	 * This can unfortunately be invoked before sched_debug_init() creates
596 	 * the debug directory. Don't touch sd_sysctl_cpus until then.
597 	 */
598 	if (!debugfs_sched)
599 		return;
600 
601 	if (!sched_debug_verbose)
602 		return;
603 
604 	if (!cpumask_available(sd_sysctl_cpus)) {
605 		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
606 			return;
607 		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
608 	}
609 
610 	if (!sd_dentry) {
611 		sd_dentry = debugfs_create_dir("domains", debugfs_sched);
612 
613 		/* rebuild sd_sysctl_cpus if empty since it gets cleared below */
614 		if (cpumask_empty(sd_sysctl_cpus))
615 			cpumask_copy(sd_sysctl_cpus, cpu_online_mask);
616 	}
617 
618 	for_each_cpu(cpu, sd_sysctl_cpus) {
619 		struct sched_domain *sd;
620 		struct dentry *d_cpu;
621 		char buf[32];
622 
623 		snprintf(buf, sizeof(buf), "cpu%d", cpu);
624 		debugfs_lookup_and_remove(buf, sd_dentry);
625 		d_cpu = debugfs_create_dir(buf, sd_dentry);
626 
627 		i = 0;
628 		for_each_domain(cpu, sd) {
629 			struct dentry *d_sd;
630 
631 			snprintf(buf, sizeof(buf), "domain%d", i);
632 			d_sd = debugfs_create_dir(buf, d_cpu);
633 
634 			register_sd(sd, d_sd);
635 			i++;
636 		}
637 
638 		__cpumask_clear_cpu(cpu, sd_sysctl_cpus);
639 	}
640 }
641 
642 void dirty_sched_domain_sysctl(int cpu)
643 {
644 	if (cpumask_available(sd_sysctl_cpus))
645 		__cpumask_set_cpu(cpu, sd_sysctl_cpus);
646 }
647 
648 #endif /* CONFIG_SMP */
649 
650 #ifdef CONFIG_FAIR_GROUP_SCHED
651 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
652 {
653 	struct sched_entity *se = tg->se[cpu];
654 
655 #define P(F)		SEQ_printf(m, "  .%-30s: %lld\n",	#F, (long long)F)
656 #define P_SCHEDSTAT(F)	SEQ_printf(m, "  .%-30s: %lld\n",	\
657 		#F, (long long)schedstat_val(stats->F))
658 #define PN(F)		SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
659 #define PN_SCHEDSTAT(F)	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", \
660 		#F, SPLIT_NS((long long)schedstat_val(stats->F)))
661 
662 	if (!se)
663 		return;
664 
665 	PN(se->exec_start);
666 	PN(se->vruntime);
667 	PN(se->sum_exec_runtime);
668 
669 	if (schedstat_enabled()) {
670 		struct sched_statistics *stats;
671 		stats = __schedstats_from_se(se);
672 
673 		PN_SCHEDSTAT(wait_start);
674 		PN_SCHEDSTAT(sleep_start);
675 		PN_SCHEDSTAT(block_start);
676 		PN_SCHEDSTAT(sleep_max);
677 		PN_SCHEDSTAT(block_max);
678 		PN_SCHEDSTAT(exec_max);
679 		PN_SCHEDSTAT(slice_max);
680 		PN_SCHEDSTAT(wait_max);
681 		PN_SCHEDSTAT(wait_sum);
682 		P_SCHEDSTAT(wait_count);
683 	}
684 
685 	P(se->load.weight);
686 #ifdef CONFIG_SMP
687 	P(se->avg.load_avg);
688 	P(se->avg.util_avg);
689 	P(se->avg.runnable_avg);
690 #endif
691 
692 #undef PN_SCHEDSTAT
693 #undef PN
694 #undef P_SCHEDSTAT
695 #undef P
696 }
697 #endif
698 
699 #ifdef CONFIG_CGROUP_SCHED
700 static DEFINE_SPINLOCK(sched_debug_lock);
701 static char group_path[PATH_MAX];
702 
703 static void task_group_path(struct task_group *tg, char *path, int plen)
704 {
705 	if (autogroup_path(tg, path, plen))
706 		return;
707 
708 	cgroup_path(tg->css.cgroup, path, plen);
709 }
710 
711 /*
712  * Only 1 SEQ_printf_task_group_path() caller can use the full length
713  * group_path[] for cgroup path. Other simultaneous callers will have
714  * to use a shorter stack buffer. A "..." suffix is appended at the end
715  * of the stack buffer so that it will show up in case the output length
716  * matches the given buffer size to indicate possible path name truncation.
717  */
718 #define SEQ_printf_task_group_path(m, tg, fmt...)			\
719 {									\
720 	if (spin_trylock(&sched_debug_lock)) {				\
721 		task_group_path(tg, group_path, sizeof(group_path));	\
722 		SEQ_printf(m, fmt, group_path);				\
723 		spin_unlock(&sched_debug_lock);				\
724 	} else {							\
725 		char buf[128];						\
726 		char *bufend = buf + sizeof(buf) - 3;			\
727 		task_group_path(tg, buf, bufend - buf);			\
728 		strcpy(bufend - 1, "...");				\
729 		SEQ_printf(m, fmt, buf);				\
730 	}								\
731 }
732 #endif
733 
734 static void
735 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
736 {
737 	if (task_current(rq, p))
738 		SEQ_printf(m, ">R");
739 	else
740 		SEQ_printf(m, " %c", task_state_to_char(p));
741 
742 	SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
743 		p->comm, task_pid_nr(p),
744 		SPLIT_NS(p->se.vruntime),
745 		entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N',
746 		SPLIT_NS(p->se.deadline),
747 		SPLIT_NS(p->se.slice),
748 		SPLIT_NS(p->se.sum_exec_runtime),
749 		(long long)(p->nvcsw + p->nivcsw),
750 		p->prio);
751 
752 	SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld",
753 		SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
754 		SPLIT_NS(p->se.sum_exec_runtime),
755 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
756 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
757 
758 #ifdef CONFIG_NUMA_BALANCING
759 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
760 #endif
761 #ifdef CONFIG_CGROUP_SCHED
762 	SEQ_printf_task_group_path(m, task_group(p), " %s")
763 #endif
764 
765 	SEQ_printf(m, "\n");
766 }
767 
768 static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
769 {
770 	struct task_struct *g, *p;
771 
772 	SEQ_printf(m, "\n");
773 	SEQ_printf(m, "runnable tasks:\n");
774 	SEQ_printf(m, " S            task   PID         tree-key  switches  prio"
775 		   "     wait-time             sum-exec        sum-sleep\n");
776 	SEQ_printf(m, "-------------------------------------------------------"
777 		   "------------------------------------------------------\n");
778 
779 	rcu_read_lock();
780 	for_each_process_thread(g, p) {
781 		if (task_cpu(p) != rq_cpu)
782 			continue;
783 
784 		print_task(m, rq, p);
785 	}
786 	rcu_read_unlock();
787 }
788 
789 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
790 {
791 	s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, left_deadline = -1, spread;
792 	struct sched_entity *last, *first, *root;
793 	struct rq *rq = cpu_rq(cpu);
794 	unsigned long flags;
795 
796 #ifdef CONFIG_FAIR_GROUP_SCHED
797 	SEQ_printf(m, "\n");
798 	SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu);
799 #else
800 	SEQ_printf(m, "\n");
801 	SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
802 #endif
803 
804 	raw_spin_rq_lock_irqsave(rq, flags);
805 	root = __pick_root_entity(cfs_rq);
806 	if (root)
807 		left_vruntime = root->min_vruntime;
808 	first = __pick_first_entity(cfs_rq);
809 	if (first)
810 		left_deadline = first->deadline;
811 	last = __pick_last_entity(cfs_rq);
812 	if (last)
813 		right_vruntime = last->vruntime;
814 	min_vruntime = cfs_rq->min_vruntime;
815 	raw_spin_rq_unlock_irqrestore(rq, flags);
816 
817 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "left_deadline",
818 			SPLIT_NS(left_deadline));
819 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "left_vruntime",
820 			SPLIT_NS(left_vruntime));
821 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
822 			SPLIT_NS(min_vruntime));
823 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "avg_vruntime",
824 			SPLIT_NS(avg_vruntime(cfs_rq)));
825 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "right_vruntime",
826 			SPLIT_NS(right_vruntime));
827 	spread = right_vruntime - left_vruntime;
828 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
829 	SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
830 	SEQ_printf(m, "  .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
831 	SEQ_printf(m, "  .%-30s: %d\n", "idle_nr_running",
832 			cfs_rq->idle_nr_running);
833 	SEQ_printf(m, "  .%-30s: %d\n", "idle_h_nr_running",
834 			cfs_rq->idle_h_nr_running);
835 	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
836 #ifdef CONFIG_SMP
837 	SEQ_printf(m, "  .%-30s: %lu\n", "load_avg",
838 			cfs_rq->avg.load_avg);
839 	SEQ_printf(m, "  .%-30s: %lu\n", "runnable_avg",
840 			cfs_rq->avg.runnable_avg);
841 	SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
842 			cfs_rq->avg.util_avg);
843 	SEQ_printf(m, "  .%-30s: %u\n", "util_est",
844 			cfs_rq->avg.util_est);
845 	SEQ_printf(m, "  .%-30s: %ld\n", "removed.load_avg",
846 			cfs_rq->removed.load_avg);
847 	SEQ_printf(m, "  .%-30s: %ld\n", "removed.util_avg",
848 			cfs_rq->removed.util_avg);
849 	SEQ_printf(m, "  .%-30s: %ld\n", "removed.runnable_avg",
850 			cfs_rq->removed.runnable_avg);
851 #ifdef CONFIG_FAIR_GROUP_SCHED
852 	SEQ_printf(m, "  .%-30s: %lu\n", "tg_load_avg_contrib",
853 			cfs_rq->tg_load_avg_contrib);
854 	SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg",
855 			atomic_long_read(&cfs_rq->tg->load_avg));
856 #endif
857 #endif
858 #ifdef CONFIG_CFS_BANDWIDTH
859 	SEQ_printf(m, "  .%-30s: %d\n", "throttled",
860 			cfs_rq->throttled);
861 	SEQ_printf(m, "  .%-30s: %d\n", "throttle_count",
862 			cfs_rq->throttle_count);
863 #endif
864 
865 #ifdef CONFIG_FAIR_GROUP_SCHED
866 	print_cfs_group_stats(m, cpu, cfs_rq->tg);
867 #endif
868 }
869 
870 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
871 {
872 #ifdef CONFIG_RT_GROUP_SCHED
873 	SEQ_printf(m, "\n");
874 	SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu);
875 #else
876 	SEQ_printf(m, "\n");
877 	SEQ_printf(m, "rt_rq[%d]:\n", cpu);
878 #endif
879 
880 #define P(x) \
881 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
882 #define PU(x) \
883 	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
884 #define PN(x) \
885 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
886 
887 	PU(rt_nr_running);
888 
889 #ifdef CONFIG_RT_GROUP_SCHED
890 	P(rt_throttled);
891 	PN(rt_time);
892 	PN(rt_runtime);
893 #endif
894 
895 #undef PN
896 #undef PU
897 #undef P
898 }
899 
900 void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
901 {
902 	struct dl_bw *dl_bw;
903 
904 	SEQ_printf(m, "\n");
905 	SEQ_printf(m, "dl_rq[%d]:\n", cpu);
906 
907 #define PU(x) \
908 	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
909 
910 	PU(dl_nr_running);
911 #ifdef CONFIG_SMP
912 	dl_bw = &cpu_rq(cpu)->rd->dl_bw;
913 #else
914 	dl_bw = &dl_rq->dl_bw;
915 #endif
916 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
917 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
918 
919 #undef PU
920 }
921 
922 static void print_cpu(struct seq_file *m, int cpu)
923 {
924 	struct rq *rq = cpu_rq(cpu);
925 
926 #ifdef CONFIG_X86
927 	{
928 		unsigned int freq = cpu_khz ? : 1;
929 
930 		SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
931 			   cpu, freq / 1000, (freq % 1000));
932 	}
933 #else
934 	SEQ_printf(m, "cpu#%d\n", cpu);
935 #endif
936 
937 #define P(x)								\
938 do {									\
939 	if (sizeof(rq->x) == 4)						\
940 		SEQ_printf(m, "  .%-30s: %d\n", #x, (int)(rq->x));	\
941 	else								\
942 		SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\
943 } while (0)
944 
945 #define PN(x) \
946 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
947 
948 	P(nr_running);
949 	P(nr_switches);
950 	P(nr_uninterruptible);
951 	PN(next_balance);
952 	SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
953 	PN(clock);
954 	PN(clock_task);
955 #undef P
956 #undef PN
957 
958 #ifdef CONFIG_SMP
959 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
960 	P64(avg_idle);
961 	P64(max_idle_balance_cost);
962 #undef P64
963 #endif
964 
965 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
966 	if (schedstat_enabled()) {
967 		P(yld_count);
968 		P(sched_count);
969 		P(sched_goidle);
970 		P(ttwu_count);
971 		P(ttwu_local);
972 	}
973 #undef P
974 
975 	print_cfs_stats(m, cpu);
976 	print_rt_stats(m, cpu);
977 	print_dl_stats(m, cpu);
978 
979 	print_rq(m, rq, cpu);
980 	SEQ_printf(m, "\n");
981 }
982 
983 static const char *sched_tunable_scaling_names[] = {
984 	"none",
985 	"logarithmic",
986 	"linear"
987 };
988 
989 static void sched_debug_header(struct seq_file *m)
990 {
991 	u64 ktime, sched_clk, cpu_clk;
992 	unsigned long flags;
993 
994 	local_irq_save(flags);
995 	ktime = ktime_to_ns(ktime_get());
996 	sched_clk = sched_clock();
997 	cpu_clk = local_clock();
998 	local_irq_restore(flags);
999 
1000 	SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
1001 		init_utsname()->release,
1002 		(int)strcspn(init_utsname()->version, " "),
1003 		init_utsname()->version);
1004 
1005 #define P(x) \
1006 	SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
1007 #define PN(x) \
1008 	SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
1009 	PN(ktime);
1010 	PN(sched_clk);
1011 	PN(cpu_clk);
1012 	P(jiffies);
1013 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1014 	P(sched_clock_stable());
1015 #endif
1016 #undef PN
1017 #undef P
1018 
1019 	SEQ_printf(m, "\n");
1020 	SEQ_printf(m, "sysctl_sched\n");
1021 
1022 #define P(x) \
1023 	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
1024 #define PN(x) \
1025 	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
1026 	PN(sysctl_sched_base_slice);
1027 	P(sysctl_sched_features);
1028 #undef PN
1029 #undef P
1030 
1031 	SEQ_printf(m, "  .%-40s: %d (%s)\n",
1032 		"sysctl_sched_tunable_scaling",
1033 		sysctl_sched_tunable_scaling,
1034 		sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
1035 	SEQ_printf(m, "\n");
1036 }
1037 
1038 static int sched_debug_show(struct seq_file *m, void *v)
1039 {
1040 	int cpu = (unsigned long)(v - 2);
1041 
1042 	if (cpu != -1)
1043 		print_cpu(m, cpu);
1044 	else
1045 		sched_debug_header(m);
1046 
1047 	return 0;
1048 }
1049 
1050 void sysrq_sched_debug_show(void)
1051 {
1052 	int cpu;
1053 
1054 	sched_debug_header(NULL);
1055 	for_each_online_cpu(cpu) {
1056 		/*
1057 		 * Need to reset softlockup watchdogs on all CPUs, because
1058 		 * another CPU might be blocked waiting for us to process
1059 		 * an IPI or stop_machine.
1060 		 */
1061 		touch_nmi_watchdog();
1062 		touch_all_softlockup_watchdogs();
1063 		print_cpu(NULL, cpu);
1064 	}
1065 }
1066 
1067 /*
1068  * This iterator needs some explanation.
1069  * It returns 1 for the header position.
1070  * This means 2 is CPU 0.
1071  * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
1072  * to use cpumask_* to iterate over the CPUs.
1073  */
1074 static void *sched_debug_start(struct seq_file *file, loff_t *offset)
1075 {
1076 	unsigned long n = *offset;
1077 
1078 	if (n == 0)
1079 		return (void *) 1;
1080 
1081 	n--;
1082 
1083 	if (n > 0)
1084 		n = cpumask_next(n - 1, cpu_online_mask);
1085 	else
1086 		n = cpumask_first(cpu_online_mask);
1087 
1088 	*offset = n + 1;
1089 
1090 	if (n < nr_cpu_ids)
1091 		return (void *)(unsigned long)(n + 2);
1092 
1093 	return NULL;
1094 }
1095 
1096 static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
1097 {
1098 	(*offset)++;
1099 	return sched_debug_start(file, offset);
1100 }
1101 
1102 static void sched_debug_stop(struct seq_file *file, void *data)
1103 {
1104 }
1105 
1106 static const struct seq_operations sched_debug_sops = {
1107 	.start		= sched_debug_start,
1108 	.next		= sched_debug_next,
1109 	.stop		= sched_debug_stop,
1110 	.show		= sched_debug_show,
1111 };
1112 
1113 #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
1114 #define __P(F) __PS(#F, F)
1115 #define   P(F) __PS(#F, p->F)
1116 #define   PM(F, M) __PS(#F, p->F & (M))
1117 #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
1118 #define __PN(F) __PSN(#F, F)
1119 #define   PN(F) __PSN(#F, p->F)
1120 
1121 
1122 #ifdef CONFIG_NUMA_BALANCING
1123 void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
1124 		unsigned long tpf, unsigned long gsf, unsigned long gpf)
1125 {
1126 	SEQ_printf(m, "numa_faults node=%d ", node);
1127 	SEQ_printf(m, "task_private=%lu task_shared=%lu ", tpf, tsf);
1128 	SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gpf, gsf);
1129 }
1130 #endif
1131 
1132 
1133 static void sched_show_numa(struct task_struct *p, struct seq_file *m)
1134 {
1135 #ifdef CONFIG_NUMA_BALANCING
1136 	if (p->mm)
1137 		P(mm->numa_scan_seq);
1138 
1139 	P(numa_pages_migrated);
1140 	P(numa_preferred_nid);
1141 	P(total_numa_faults);
1142 	SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
1143 			task_node(p), task_numa_group_id(p));
1144 	show_numa_stats(p, m);
1145 #endif
1146 }
1147 
1148 void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
1149 						  struct seq_file *m)
1150 {
1151 	unsigned long nr_switches;
1152 
1153 	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
1154 						get_nr_threads(p));
1155 	SEQ_printf(m,
1156 		"---------------------------------------------------------"
1157 		"----------\n");
1158 
1159 #define P_SCHEDSTAT(F)  __PS(#F, schedstat_val(p->stats.F))
1160 #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
1161 
1162 	PN(se.exec_start);
1163 	PN(se.vruntime);
1164 	PN(se.sum_exec_runtime);
1165 
1166 	nr_switches = p->nvcsw + p->nivcsw;
1167 
1168 	P(se.nr_migrations);
1169 
1170 	if (schedstat_enabled()) {
1171 		u64 avg_atom, avg_per_cpu;
1172 
1173 		PN_SCHEDSTAT(sum_sleep_runtime);
1174 		PN_SCHEDSTAT(sum_block_runtime);
1175 		PN_SCHEDSTAT(wait_start);
1176 		PN_SCHEDSTAT(sleep_start);
1177 		PN_SCHEDSTAT(block_start);
1178 		PN_SCHEDSTAT(sleep_max);
1179 		PN_SCHEDSTAT(block_max);
1180 		PN_SCHEDSTAT(exec_max);
1181 		PN_SCHEDSTAT(slice_max);
1182 		PN_SCHEDSTAT(wait_max);
1183 		PN_SCHEDSTAT(wait_sum);
1184 		P_SCHEDSTAT(wait_count);
1185 		PN_SCHEDSTAT(iowait_sum);
1186 		P_SCHEDSTAT(iowait_count);
1187 		P_SCHEDSTAT(nr_migrations_cold);
1188 		P_SCHEDSTAT(nr_failed_migrations_affine);
1189 		P_SCHEDSTAT(nr_failed_migrations_running);
1190 		P_SCHEDSTAT(nr_failed_migrations_hot);
1191 		P_SCHEDSTAT(nr_forced_migrations);
1192 		P_SCHEDSTAT(nr_wakeups);
1193 		P_SCHEDSTAT(nr_wakeups_sync);
1194 		P_SCHEDSTAT(nr_wakeups_migrate);
1195 		P_SCHEDSTAT(nr_wakeups_local);
1196 		P_SCHEDSTAT(nr_wakeups_remote);
1197 		P_SCHEDSTAT(nr_wakeups_affine);
1198 		P_SCHEDSTAT(nr_wakeups_affine_attempts);
1199 		P_SCHEDSTAT(nr_wakeups_passive);
1200 		P_SCHEDSTAT(nr_wakeups_idle);
1201 
1202 		avg_atom = p->se.sum_exec_runtime;
1203 		if (nr_switches)
1204 			avg_atom = div64_ul(avg_atom, nr_switches);
1205 		else
1206 			avg_atom = -1LL;
1207 
1208 		avg_per_cpu = p->se.sum_exec_runtime;
1209 		if (p->se.nr_migrations) {
1210 			avg_per_cpu = div64_u64(avg_per_cpu,
1211 						p->se.nr_migrations);
1212 		} else {
1213 			avg_per_cpu = -1LL;
1214 		}
1215 
1216 		__PN(avg_atom);
1217 		__PN(avg_per_cpu);
1218 
1219 #ifdef CONFIG_SCHED_CORE
1220 		PN_SCHEDSTAT(core_forceidle_sum);
1221 #endif
1222 	}
1223 
1224 	__P(nr_switches);
1225 	__PS("nr_voluntary_switches", p->nvcsw);
1226 	__PS("nr_involuntary_switches", p->nivcsw);
1227 
1228 	P(se.load.weight);
1229 #ifdef CONFIG_SMP
1230 	P(se.avg.load_sum);
1231 	P(se.avg.runnable_sum);
1232 	P(se.avg.util_sum);
1233 	P(se.avg.load_avg);
1234 	P(se.avg.runnable_avg);
1235 	P(se.avg.util_avg);
1236 	P(se.avg.last_update_time);
1237 	PM(se.avg.util_est, ~UTIL_AVG_UNCHANGED);
1238 #endif
1239 #ifdef CONFIG_UCLAMP_TASK
1240 	__PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value);
1241 	__PS("uclamp.max", p->uclamp_req[UCLAMP_MAX].value);
1242 	__PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
1243 	__PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
1244 #endif
1245 	P(policy);
1246 	P(prio);
1247 	if (task_has_dl_policy(p)) {
1248 		P(dl.runtime);
1249 		P(dl.deadline);
1250 	}
1251 #undef PN_SCHEDSTAT
1252 #undef P_SCHEDSTAT
1253 
1254 	{
1255 		unsigned int this_cpu = raw_smp_processor_id();
1256 		u64 t0, t1;
1257 
1258 		t0 = cpu_clock(this_cpu);
1259 		t1 = cpu_clock(this_cpu);
1260 		__PS("clock-delta", t1-t0);
1261 	}
1262 
1263 	sched_show_numa(p, m);
1264 }
1265 
1266 void proc_sched_set_task(struct task_struct *p)
1267 {
1268 #ifdef CONFIG_SCHEDSTATS
1269 	memset(&p->stats, 0, sizeof(p->stats));
1270 #endif
1271 }
1272 
1273 void resched_latency_warn(int cpu, u64 latency)
1274 {
1275 	static DEFINE_RATELIMIT_STATE(latency_check_ratelimit, 60 * 60 * HZ, 1);
1276 
1277 	WARN(__ratelimit(&latency_check_ratelimit),
1278 	     "sched: CPU %d need_resched set for > %llu ns (%d ticks) "
1279 	     "without schedule\n",
1280 	     cpu, latency, cpu_rq(cpu)->ticks_without_resched);
1281 }
1282