xref: /linux-6.15/kernel/trace/trace_stack.c (revision 4285f2fc)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008 Steven Rostedt <[email protected]>
4  *
5  */
6 #include <linux/sched/task_stack.h>
7 #include <linux/stacktrace.h>
8 #include <linux/kallsyms.h>
9 #include <linux/seq_file.h>
10 #include <linux/spinlock.h>
11 #include <linux/uaccess.h>
12 #include <linux/ftrace.h>
13 #include <linux/module.h>
14 #include <linux/sysctl.h>
15 #include <linux/init.h>
16 
17 #include <asm/setup.h>
18 
19 #include "trace.h"
20 
21 static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES + 1];
22 unsigned stack_trace_index[STACK_TRACE_ENTRIES];
23 
24 /*
25  * Reserve one entry for the passed in ip. This will allow
26  * us to remove most or all of the stack size overhead
27  * added by the stack tracer itself.
28  */
29 struct stack_trace stack_trace_max = {
30 	.max_entries		= STACK_TRACE_ENTRIES - 1,
31 	.entries		= &stack_dump_trace[0],
32 };
33 
34 unsigned long stack_trace_max_size;
35 arch_spinlock_t stack_trace_max_lock =
36 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
37 
38 DEFINE_PER_CPU(int, disable_stack_tracer);
39 static DEFINE_MUTEX(stack_sysctl_mutex);
40 
41 int stack_tracer_enabled;
42 static int last_stack_tracer_enabled;
43 
44 void stack_trace_print(void)
45 {
46 	long i;
47 	int size;
48 
49 	pr_emerg("        Depth    Size   Location    (%d entries)\n"
50 			   "        -----    ----   --------\n",
51 			   stack_trace_max.nr_entries);
52 
53 	for (i = 0; i < stack_trace_max.nr_entries; i++) {
54 		if (i + 1 == stack_trace_max.nr_entries)
55 			size = stack_trace_index[i];
56 		else
57 			size = stack_trace_index[i] - stack_trace_index[i+1];
58 
59 		pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_trace_index[i],
60 				size, (void *)stack_dump_trace[i]);
61 	}
62 }
63 
64 /*
65  * When arch-specific code overrides this function, the following
66  * data should be filled up, assuming stack_trace_max_lock is held to
67  * prevent concurrent updates.
68  *     stack_trace_index[]
69  *     stack_trace_max
70  *     stack_trace_max_size
71  */
72 void __weak
73 check_stack(unsigned long ip, unsigned long *stack)
74 {
75 	unsigned long this_size, flags; unsigned long *p, *top, *start;
76 	static int tracer_frame;
77 	int frame_size = READ_ONCE(tracer_frame);
78 	int i, x;
79 
80 	this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
81 	this_size = THREAD_SIZE - this_size;
82 	/* Remove the frame of the tracer */
83 	this_size -= frame_size;
84 
85 	if (this_size <= stack_trace_max_size)
86 		return;
87 
88 	/* we do not handle interrupt stacks yet */
89 	if (!object_is_on_stack(stack))
90 		return;
91 
92 	/* Can't do this from NMI context (can cause deadlocks) */
93 	if (in_nmi())
94 		return;
95 
96 	local_irq_save(flags);
97 	arch_spin_lock(&stack_trace_max_lock);
98 
99 	/* In case another CPU set the tracer_frame on us */
100 	if (unlikely(!frame_size))
101 		this_size -= tracer_frame;
102 
103 	/* a race could have already updated it */
104 	if (this_size <= stack_trace_max_size)
105 		goto out;
106 
107 	stack_trace_max_size = this_size;
108 
109 	stack_trace_max.nr_entries = 0;
110 	stack_trace_max.skip = 0;
111 
112 	save_stack_trace(&stack_trace_max);
113 
114 	/* Skip over the overhead of the stack tracer itself */
115 	for (i = 0; i < stack_trace_max.nr_entries; i++) {
116 		if (stack_dump_trace[i] == ip)
117 			break;
118 	}
119 
120 	/*
121 	 * Some archs may not have the passed in ip in the dump.
122 	 * If that happens, we need to show everything.
123 	 */
124 	if (i == stack_trace_max.nr_entries)
125 		i = 0;
126 
127 	/*
128 	 * Now find where in the stack these are.
129 	 */
130 	x = 0;
131 	start = stack;
132 	top = (unsigned long *)
133 		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
134 
135 	/*
136 	 * Loop through all the entries. One of the entries may
137 	 * for some reason be missed on the stack, so we may
138 	 * have to account for them. If they are all there, this
139 	 * loop will only happen once. This code only takes place
140 	 * on a new max, so it is far from a fast path.
141 	 */
142 	while (i < stack_trace_max.nr_entries) {
143 		int found = 0;
144 
145 		stack_trace_index[x] = this_size;
146 		p = start;
147 
148 		for (; p < top && i < stack_trace_max.nr_entries; p++) {
149 			/*
150 			 * The READ_ONCE_NOCHECK is used to let KASAN know that
151 			 * this is not a stack-out-of-bounds error.
152 			 */
153 			if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
154 				stack_dump_trace[x] = stack_dump_trace[i++];
155 				this_size = stack_trace_index[x++] =
156 					(top - p) * sizeof(unsigned long);
157 				found = 1;
158 				/* Start the search from here */
159 				start = p + 1;
160 				/*
161 				 * We do not want to show the overhead
162 				 * of the stack tracer stack in the
163 				 * max stack. If we haven't figured
164 				 * out what that is, then figure it out
165 				 * now.
166 				 */
167 				if (unlikely(!tracer_frame)) {
168 					tracer_frame = (p - stack) *
169 						sizeof(unsigned long);
170 					stack_trace_max_size -= tracer_frame;
171 				}
172 			}
173 		}
174 
175 		if (!found)
176 			i++;
177 	}
178 
179 	stack_trace_max.nr_entries = x;
180 
181 	if (task_stack_end_corrupted(current)) {
182 		stack_trace_print();
183 		BUG();
184 	}
185 
186  out:
187 	arch_spin_unlock(&stack_trace_max_lock);
188 	local_irq_restore(flags);
189 }
190 
191 static void
192 stack_trace_call(unsigned long ip, unsigned long parent_ip,
193 		 struct ftrace_ops *op, struct pt_regs *pt_regs)
194 {
195 	unsigned long stack;
196 
197 	preempt_disable_notrace();
198 
199 	/* no atomic needed, we only modify this variable by this cpu */
200 	__this_cpu_inc(disable_stack_tracer);
201 	if (__this_cpu_read(disable_stack_tracer) != 1)
202 		goto out;
203 
204 	/* If rcu is not watching, then save stack trace can fail */
205 	if (!rcu_is_watching())
206 		goto out;
207 
208 	ip += MCOUNT_INSN_SIZE;
209 
210 	check_stack(ip, &stack);
211 
212  out:
213 	__this_cpu_dec(disable_stack_tracer);
214 	/* prevent recursion in schedule */
215 	preempt_enable_notrace();
216 }
217 
218 static struct ftrace_ops trace_ops __read_mostly =
219 {
220 	.func = stack_trace_call,
221 	.flags = FTRACE_OPS_FL_RECURSION_SAFE,
222 };
223 
224 static ssize_t
225 stack_max_size_read(struct file *filp, char __user *ubuf,
226 		    size_t count, loff_t *ppos)
227 {
228 	unsigned long *ptr = filp->private_data;
229 	char buf[64];
230 	int r;
231 
232 	r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
233 	if (r > sizeof(buf))
234 		r = sizeof(buf);
235 	return simple_read_from_buffer(ubuf, count, ppos, buf, r);
236 }
237 
238 static ssize_t
239 stack_max_size_write(struct file *filp, const char __user *ubuf,
240 		     size_t count, loff_t *ppos)
241 {
242 	long *ptr = filp->private_data;
243 	unsigned long val, flags;
244 	int ret;
245 
246 	ret = kstrtoul_from_user(ubuf, count, 10, &val);
247 	if (ret)
248 		return ret;
249 
250 	local_irq_save(flags);
251 
252 	/*
253 	 * In case we trace inside arch_spin_lock() or after (NMI),
254 	 * we will cause circular lock, so we also need to increase
255 	 * the percpu disable_stack_tracer here.
256 	 */
257 	__this_cpu_inc(disable_stack_tracer);
258 
259 	arch_spin_lock(&stack_trace_max_lock);
260 	*ptr = val;
261 	arch_spin_unlock(&stack_trace_max_lock);
262 
263 	__this_cpu_dec(disable_stack_tracer);
264 	local_irq_restore(flags);
265 
266 	return count;
267 }
268 
269 static const struct file_operations stack_max_size_fops = {
270 	.open		= tracing_open_generic,
271 	.read		= stack_max_size_read,
272 	.write		= stack_max_size_write,
273 	.llseek		= default_llseek,
274 };
275 
276 static void *
277 __next(struct seq_file *m, loff_t *pos)
278 {
279 	long n = *pos - 1;
280 
281 	if (n >= stack_trace_max.nr_entries)
282 		return NULL;
283 
284 	m->private = (void *)n;
285 	return &m->private;
286 }
287 
288 static void *
289 t_next(struct seq_file *m, void *v, loff_t *pos)
290 {
291 	(*pos)++;
292 	return __next(m, pos);
293 }
294 
295 static void *t_start(struct seq_file *m, loff_t *pos)
296 {
297 	local_irq_disable();
298 
299 	__this_cpu_inc(disable_stack_tracer);
300 
301 	arch_spin_lock(&stack_trace_max_lock);
302 
303 	if (*pos == 0)
304 		return SEQ_START_TOKEN;
305 
306 	return __next(m, pos);
307 }
308 
309 static void t_stop(struct seq_file *m, void *p)
310 {
311 	arch_spin_unlock(&stack_trace_max_lock);
312 
313 	__this_cpu_dec(disable_stack_tracer);
314 
315 	local_irq_enable();
316 }
317 
318 static void trace_lookup_stack(struct seq_file *m, long i)
319 {
320 	unsigned long addr = stack_dump_trace[i];
321 
322 	seq_printf(m, "%pS\n", (void *)addr);
323 }
324 
325 static void print_disabled(struct seq_file *m)
326 {
327 	seq_puts(m, "#\n"
328 		 "#  Stack tracer disabled\n"
329 		 "#\n"
330 		 "# To enable the stack tracer, either add 'stacktrace' to the\n"
331 		 "# kernel command line\n"
332 		 "# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
333 		 "#\n");
334 }
335 
336 static int t_show(struct seq_file *m, void *v)
337 {
338 	long i;
339 	int size;
340 
341 	if (v == SEQ_START_TOKEN) {
342 		seq_printf(m, "        Depth    Size   Location"
343 			   "    (%d entries)\n"
344 			   "        -----    ----   --------\n",
345 			   stack_trace_max.nr_entries);
346 
347 		if (!stack_tracer_enabled && !stack_trace_max_size)
348 			print_disabled(m);
349 
350 		return 0;
351 	}
352 
353 	i = *(long *)v;
354 
355 	if (i >= stack_trace_max.nr_entries)
356 		return 0;
357 
358 	if (i + 1 == stack_trace_max.nr_entries)
359 		size = stack_trace_index[i];
360 	else
361 		size = stack_trace_index[i] - stack_trace_index[i+1];
362 
363 	seq_printf(m, "%3ld) %8d   %5d   ", i, stack_trace_index[i], size);
364 
365 	trace_lookup_stack(m, i);
366 
367 	return 0;
368 }
369 
370 static const struct seq_operations stack_trace_seq_ops = {
371 	.start		= t_start,
372 	.next		= t_next,
373 	.stop		= t_stop,
374 	.show		= t_show,
375 };
376 
377 static int stack_trace_open(struct inode *inode, struct file *file)
378 {
379 	return seq_open(file, &stack_trace_seq_ops);
380 }
381 
382 static const struct file_operations stack_trace_fops = {
383 	.open		= stack_trace_open,
384 	.read		= seq_read,
385 	.llseek		= seq_lseek,
386 	.release	= seq_release,
387 };
388 
389 #ifdef CONFIG_DYNAMIC_FTRACE
390 
391 static int
392 stack_trace_filter_open(struct inode *inode, struct file *file)
393 {
394 	struct ftrace_ops *ops = inode->i_private;
395 
396 	return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
397 				 inode, file);
398 }
399 
400 static const struct file_operations stack_trace_filter_fops = {
401 	.open = stack_trace_filter_open,
402 	.read = seq_read,
403 	.write = ftrace_filter_write,
404 	.llseek = tracing_lseek,
405 	.release = ftrace_regex_release,
406 };
407 
408 #endif /* CONFIG_DYNAMIC_FTRACE */
409 
410 int
411 stack_trace_sysctl(struct ctl_table *table, int write,
412 		   void __user *buffer, size_t *lenp,
413 		   loff_t *ppos)
414 {
415 	int ret;
416 
417 	mutex_lock(&stack_sysctl_mutex);
418 
419 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
420 
421 	if (ret || !write ||
422 	    (last_stack_tracer_enabled == !!stack_tracer_enabled))
423 		goto out;
424 
425 	last_stack_tracer_enabled = !!stack_tracer_enabled;
426 
427 	if (stack_tracer_enabled)
428 		register_ftrace_function(&trace_ops);
429 	else
430 		unregister_ftrace_function(&trace_ops);
431 
432  out:
433 	mutex_unlock(&stack_sysctl_mutex);
434 	return ret;
435 }
436 
437 static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
438 
439 static __init int enable_stacktrace(char *str)
440 {
441 	int len;
442 
443 	if ((len = str_has_prefix(str, "_filter=")))
444 		strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE);
445 
446 	stack_tracer_enabled = 1;
447 	last_stack_tracer_enabled = 1;
448 	return 1;
449 }
450 __setup("stacktrace", enable_stacktrace);
451 
452 static __init int stack_trace_init(void)
453 {
454 	struct dentry *d_tracer;
455 
456 	d_tracer = tracing_init_dentry();
457 	if (IS_ERR(d_tracer))
458 		return 0;
459 
460 	trace_create_file("stack_max_size", 0644, d_tracer,
461 			&stack_trace_max_size, &stack_max_size_fops);
462 
463 	trace_create_file("stack_trace", 0444, d_tracer,
464 			NULL, &stack_trace_fops);
465 
466 #ifdef CONFIG_DYNAMIC_FTRACE
467 	trace_create_file("stack_trace_filter", 0644, d_tracer,
468 			  &trace_ops, &stack_trace_filter_fops);
469 #endif
470 
471 	if (stack_trace_filter_buf[0])
472 		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
473 
474 	if (stack_tracer_enabled)
475 		register_ftrace_function(&trace_ops);
476 
477 	return 0;
478 }
479 
480 device_initcall(stack_trace_init);
481