xref: /linux-6.15/kernel/trace/trace.c (revision 00387808)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
6  * Copyright (C) 2008 Ingo Molnar <[email protected]>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <[email protected]>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 
54 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
55 
56 #include "trace.h"
57 #include "trace_output.h"
58 
59 #ifdef CONFIG_FTRACE_STARTUP_TEST
60 /*
61  * We need to change this state when a selftest is running.
62  * A selftest will lurk into the ring-buffer to count the
63  * entries inserted during the selftest although some concurrent
64  * insertions into the ring-buffer such as trace_printk could occurred
65  * at the same time, giving false positive or negative results.
66  */
67 static bool __read_mostly tracing_selftest_running;
68 
69 /*
70  * If boot-time tracing including tracers/events via kernel cmdline
71  * is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 void __init disable_tracing_selftest(const char *reason)
76 {
77 	if (!tracing_selftest_disabled) {
78 		tracing_selftest_disabled = true;
79 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
80 	}
81 }
82 #else
83 #define tracing_selftest_running	0
84 #define tracing_selftest_disabled	0
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 static struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static bool traceoff_after_boot __initdata;
92 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
93 
94 /* For tracers that don't implement custom flags */
95 static struct tracer_opt dummy_tracer_opt[] = {
96 	{ }
97 };
98 
99 static int
100 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
101 {
102 	return 0;
103 }
104 
105 /*
106  * To prevent the comm cache from being overwritten when no
107  * tracing is active, only save the comm when a trace event
108  * occurred.
109  */
110 DEFINE_PER_CPU(bool, trace_taskinfo_save);
111 
112 /*
113  * Kill all tracing for good (never come back).
114  * It is initialized to 1 but will turn to zero if the initialization
115  * of the tracer is successful. But that is the only place that sets
116  * this back to zero.
117  */
118 static int tracing_disabled = 1;
119 
120 cpumask_var_t __read_mostly	tracing_buffer_mask;
121 
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputing it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  * Set instance name if you want to dump the specific trace instance
137  * Multiple instance dump is also supported, and instances are seperated
138  * by commas.
139  */
140 /* Set to string format zero to disable by default */
141 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145 
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149 	struct module			*mod;
150 	unsigned long			length;
151 };
152 
153 union trace_eval_map_item;
154 
155 struct trace_eval_map_tail {
156 	/*
157 	 * "end" is first and points to NULL as it must be different
158 	 * than "mod" or "eval_string"
159 	 */
160 	union trace_eval_map_item	*next;
161 	const char			*end;	/* points to NULL */
162 };
163 
164 static DEFINE_MUTEX(trace_eval_mutex);
165 
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174 	struct trace_eval_map		map;
175 	struct trace_eval_map_head	head;
176 	struct trace_eval_map_tail	tail;
177 };
178 
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181 
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 				   struct trace_buffer *buffer,
185 				   unsigned int trace_ctx);
186 
187 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
188 static char *default_bootup_tracer;
189 
190 static bool allocate_snapshot;
191 static bool snapshot_at_boot;
192 
193 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_instance_index;
195 
196 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
197 static int boot_snapshot_index;
198 
199 static int __init set_cmdline_ftrace(char *str)
200 {
201 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
202 	default_bootup_tracer = bootup_tracer_buf;
203 	/* We are using ftrace early, expand it */
204 	trace_set_ring_buffer_expanded(NULL);
205 	return 1;
206 }
207 __setup("ftrace=", set_cmdline_ftrace);
208 
209 int ftrace_dump_on_oops_enabled(void)
210 {
211 	if (!strcmp("0", ftrace_dump_on_oops))
212 		return 0;
213 	else
214 		return 1;
215 }
216 
217 static int __init set_ftrace_dump_on_oops(char *str)
218 {
219 	if (!*str) {
220 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
221 		return 1;
222 	}
223 
224 	if (*str == ',') {
225 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
226 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
227 		return 1;
228 	}
229 
230 	if (*str++ == '=') {
231 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
232 		return 1;
233 	}
234 
235 	return 0;
236 }
237 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
238 
239 static int __init stop_trace_on_warning(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		__disable_trace_on_warning = 1;
243 	return 1;
244 }
245 __setup("traceoff_on_warning", stop_trace_on_warning);
246 
247 static int __init boot_alloc_snapshot(char *str)
248 {
249 	char *slot = boot_snapshot_info + boot_snapshot_index;
250 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
251 	int ret;
252 
253 	if (str[0] == '=') {
254 		str++;
255 		if (strlen(str) >= left)
256 			return -1;
257 
258 		ret = snprintf(slot, left, "%s\t", str);
259 		boot_snapshot_index += ret;
260 	} else {
261 		allocate_snapshot = true;
262 		/* We also need the main ring buffer expanded */
263 		trace_set_ring_buffer_expanded(NULL);
264 	}
265 	return 1;
266 }
267 __setup("alloc_snapshot", boot_alloc_snapshot);
268 
269 
270 static int __init boot_snapshot(char *str)
271 {
272 	snapshot_at_boot = true;
273 	boot_alloc_snapshot(str);
274 	return 1;
275 }
276 __setup("ftrace_boot_snapshot", boot_snapshot);
277 
278 
279 static int __init boot_instance(char *str)
280 {
281 	char *slot = boot_instance_info + boot_instance_index;
282 	int left = sizeof(boot_instance_info) - boot_instance_index;
283 	int ret;
284 
285 	if (strlen(str) >= left)
286 		return -1;
287 
288 	ret = snprintf(slot, left, "%s\t", str);
289 	boot_instance_index += ret;
290 
291 	return 1;
292 }
293 __setup("trace_instance=", boot_instance);
294 
295 
296 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
297 
298 static int __init set_trace_boot_options(char *str)
299 {
300 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
301 	return 1;
302 }
303 __setup("trace_options=", set_trace_boot_options);
304 
305 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
306 static char *trace_boot_clock __initdata;
307 
308 static int __init set_trace_boot_clock(char *str)
309 {
310 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
311 	trace_boot_clock = trace_boot_clock_buf;
312 	return 1;
313 }
314 __setup("trace_clock=", set_trace_boot_clock);
315 
316 static int __init set_tracepoint_printk(char *str)
317 {
318 	/* Ignore the "tp_printk_stop_on_boot" param */
319 	if (*str == '_')
320 		return 0;
321 
322 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
323 		tracepoint_printk = 1;
324 	return 1;
325 }
326 __setup("tp_printk", set_tracepoint_printk);
327 
328 static int __init set_tracepoint_printk_stop(char *str)
329 {
330 	tracepoint_printk_stop_on_boot = true;
331 	return 1;
332 }
333 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
334 
335 static int __init set_traceoff_after_boot(char *str)
336 {
337 	traceoff_after_boot = true;
338 	return 1;
339 }
340 __setup("traceoff_after_boot", set_traceoff_after_boot);
341 
342 unsigned long long ns2usecs(u64 nsec)
343 {
344 	nsec += 500;
345 	do_div(nsec, 1000);
346 	return nsec;
347 }
348 
349 static void
350 trace_process_export(struct trace_export *export,
351 	       struct ring_buffer_event *event, int flag)
352 {
353 	struct trace_entry *entry;
354 	unsigned int size = 0;
355 
356 	if (export->flags & flag) {
357 		entry = ring_buffer_event_data(event);
358 		size = ring_buffer_event_length(event);
359 		export->write(export, entry, size);
360 	}
361 }
362 
363 static DEFINE_MUTEX(ftrace_export_lock);
364 
365 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
366 
367 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
368 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
369 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
370 
371 static inline void ftrace_exports_enable(struct trace_export *export)
372 {
373 	if (export->flags & TRACE_EXPORT_FUNCTION)
374 		static_branch_inc(&trace_function_exports_enabled);
375 
376 	if (export->flags & TRACE_EXPORT_EVENT)
377 		static_branch_inc(&trace_event_exports_enabled);
378 
379 	if (export->flags & TRACE_EXPORT_MARKER)
380 		static_branch_inc(&trace_marker_exports_enabled);
381 }
382 
383 static inline void ftrace_exports_disable(struct trace_export *export)
384 {
385 	if (export->flags & TRACE_EXPORT_FUNCTION)
386 		static_branch_dec(&trace_function_exports_enabled);
387 
388 	if (export->flags & TRACE_EXPORT_EVENT)
389 		static_branch_dec(&trace_event_exports_enabled);
390 
391 	if (export->flags & TRACE_EXPORT_MARKER)
392 		static_branch_dec(&trace_marker_exports_enabled);
393 }
394 
395 static void ftrace_exports(struct ring_buffer_event *event, int flag)
396 {
397 	struct trace_export *export;
398 
399 	preempt_disable_notrace();
400 
401 	export = rcu_dereference_raw_check(ftrace_exports_list);
402 	while (export) {
403 		trace_process_export(export, event, flag);
404 		export = rcu_dereference_raw_check(export->next);
405 	}
406 
407 	preempt_enable_notrace();
408 }
409 
410 static inline void
411 add_trace_export(struct trace_export **list, struct trace_export *export)
412 {
413 	rcu_assign_pointer(export->next, *list);
414 	/*
415 	 * We are entering export into the list but another
416 	 * CPU might be walking that list. We need to make sure
417 	 * the export->next pointer is valid before another CPU sees
418 	 * the export pointer included into the list.
419 	 */
420 	rcu_assign_pointer(*list, export);
421 }
422 
423 static inline int
424 rm_trace_export(struct trace_export **list, struct trace_export *export)
425 {
426 	struct trace_export **p;
427 
428 	for (p = list; *p != NULL; p = &(*p)->next)
429 		if (*p == export)
430 			break;
431 
432 	if (*p != export)
433 		return -1;
434 
435 	rcu_assign_pointer(*p, (*p)->next);
436 
437 	return 0;
438 }
439 
440 static inline void
441 add_ftrace_export(struct trace_export **list, struct trace_export *export)
442 {
443 	ftrace_exports_enable(export);
444 
445 	add_trace_export(list, export);
446 }
447 
448 static inline int
449 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
450 {
451 	int ret;
452 
453 	ret = rm_trace_export(list, export);
454 	ftrace_exports_disable(export);
455 
456 	return ret;
457 }
458 
459 int register_ftrace_export(struct trace_export *export)
460 {
461 	if (WARN_ON_ONCE(!export->write))
462 		return -1;
463 
464 	mutex_lock(&ftrace_export_lock);
465 
466 	add_ftrace_export(&ftrace_exports_list, export);
467 
468 	mutex_unlock(&ftrace_export_lock);
469 
470 	return 0;
471 }
472 EXPORT_SYMBOL_GPL(register_ftrace_export);
473 
474 int unregister_ftrace_export(struct trace_export *export)
475 {
476 	int ret;
477 
478 	mutex_lock(&ftrace_export_lock);
479 
480 	ret = rm_ftrace_export(&ftrace_exports_list, export);
481 
482 	mutex_unlock(&ftrace_export_lock);
483 
484 	return ret;
485 }
486 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
487 
488 /* trace_flags holds trace_options default values */
489 #define TRACE_DEFAULT_FLAGS						\
490 	(FUNCTION_DEFAULT_FLAGS |					\
491 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
492 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
493 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
494 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
495 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
496 
497 /* trace_options that are only supported by global_trace */
498 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
499 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
500 
501 /* trace_flags that are default zero for instances */
502 #define ZEROED_TRACE_FLAGS \
503 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
504 
505 /*
506  * The global_trace is the descriptor that holds the top-level tracing
507  * buffers for the live tracing.
508  */
509 static struct trace_array global_trace = {
510 	.trace_flags = TRACE_DEFAULT_FLAGS,
511 };
512 
513 static struct trace_array *printk_trace = &global_trace;
514 
515 static __always_inline bool printk_binsafe(struct trace_array *tr)
516 {
517 	/*
518 	 * The binary format of traceprintk can cause a crash if used
519 	 * by a buffer from another boot. Force the use of the
520 	 * non binary version of trace_printk if the trace_printk
521 	 * buffer is a boot mapped ring buffer.
522 	 */
523 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
524 }
525 
526 static void update_printk_trace(struct trace_array *tr)
527 {
528 	if (printk_trace == tr)
529 		return;
530 
531 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
532 	printk_trace = tr;
533 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
534 }
535 
536 void trace_set_ring_buffer_expanded(struct trace_array *tr)
537 {
538 	if (!tr)
539 		tr = &global_trace;
540 	tr->ring_buffer_expanded = true;
541 }
542 
543 LIST_HEAD(ftrace_trace_arrays);
544 
545 int trace_array_get(struct trace_array *this_tr)
546 {
547 	struct trace_array *tr;
548 
549 	guard(mutex)(&trace_types_lock);
550 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
551 		if (tr == this_tr) {
552 			tr->ref++;
553 			return 0;
554 		}
555 	}
556 
557 	return -ENODEV;
558 }
559 
560 static void __trace_array_put(struct trace_array *this_tr)
561 {
562 	WARN_ON(!this_tr->ref);
563 	this_tr->ref--;
564 }
565 
566 /**
567  * trace_array_put - Decrement the reference counter for this trace array.
568  * @this_tr : pointer to the trace array
569  *
570  * NOTE: Use this when we no longer need the trace array returned by
571  * trace_array_get_by_name(). This ensures the trace array can be later
572  * destroyed.
573  *
574  */
575 void trace_array_put(struct trace_array *this_tr)
576 {
577 	if (!this_tr)
578 		return;
579 
580 	mutex_lock(&trace_types_lock);
581 	__trace_array_put(this_tr);
582 	mutex_unlock(&trace_types_lock);
583 }
584 EXPORT_SYMBOL_GPL(trace_array_put);
585 
586 int tracing_check_open_get_tr(struct trace_array *tr)
587 {
588 	int ret;
589 
590 	ret = security_locked_down(LOCKDOWN_TRACEFS);
591 	if (ret)
592 		return ret;
593 
594 	if (tracing_disabled)
595 		return -ENODEV;
596 
597 	if (tr && trace_array_get(tr) < 0)
598 		return -ENODEV;
599 
600 	return 0;
601 }
602 
603 /**
604  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
605  * @filtered_pids: The list of pids to check
606  * @search_pid: The PID to find in @filtered_pids
607  *
608  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
609  */
610 bool
611 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
612 {
613 	return trace_pid_list_is_set(filtered_pids, search_pid);
614 }
615 
616 /**
617  * trace_ignore_this_task - should a task be ignored for tracing
618  * @filtered_pids: The list of pids to check
619  * @filtered_no_pids: The list of pids not to be traced
620  * @task: The task that should be ignored if not filtered
621  *
622  * Checks if @task should be traced or not from @filtered_pids.
623  * Returns true if @task should *NOT* be traced.
624  * Returns false if @task should be traced.
625  */
626 bool
627 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
628 		       struct trace_pid_list *filtered_no_pids,
629 		       struct task_struct *task)
630 {
631 	/*
632 	 * If filtered_no_pids is not empty, and the task's pid is listed
633 	 * in filtered_no_pids, then return true.
634 	 * Otherwise, if filtered_pids is empty, that means we can
635 	 * trace all tasks. If it has content, then only trace pids
636 	 * within filtered_pids.
637 	 */
638 
639 	return (filtered_pids &&
640 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
641 		(filtered_no_pids &&
642 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
643 }
644 
645 /**
646  * trace_filter_add_remove_task - Add or remove a task from a pid_list
647  * @pid_list: The list to modify
648  * @self: The current task for fork or NULL for exit
649  * @task: The task to add or remove
650  *
651  * If adding a task, if @self is defined, the task is only added if @self
652  * is also included in @pid_list. This happens on fork and tasks should
653  * only be added when the parent is listed. If @self is NULL, then the
654  * @task pid will be removed from the list, which would happen on exit
655  * of a task.
656  */
657 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
658 				  struct task_struct *self,
659 				  struct task_struct *task)
660 {
661 	if (!pid_list)
662 		return;
663 
664 	/* For forks, we only add if the forking task is listed */
665 	if (self) {
666 		if (!trace_find_filtered_pid(pid_list, self->pid))
667 			return;
668 	}
669 
670 	/* "self" is set for forks, and NULL for exits */
671 	if (self)
672 		trace_pid_list_set(pid_list, task->pid);
673 	else
674 		trace_pid_list_clear(pid_list, task->pid);
675 }
676 
677 /**
678  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
679  * @pid_list: The pid list to show
680  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
681  * @pos: The position of the file
682  *
683  * This is used by the seq_file "next" operation to iterate the pids
684  * listed in a trace_pid_list structure.
685  *
686  * Returns the pid+1 as we want to display pid of zero, but NULL would
687  * stop the iteration.
688  */
689 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
690 {
691 	long pid = (unsigned long)v;
692 	unsigned int next;
693 
694 	(*pos)++;
695 
696 	/* pid already is +1 of the actual previous bit */
697 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
698 		return NULL;
699 
700 	pid = next;
701 
702 	/* Return pid + 1 to allow zero to be represented */
703 	return (void *)(pid + 1);
704 }
705 
706 /**
707  * trace_pid_start - Used for seq_file to start reading pid lists
708  * @pid_list: The pid list to show
709  * @pos: The position of the file
710  *
711  * This is used by seq_file "start" operation to start the iteration
712  * of listing pids.
713  *
714  * Returns the pid+1 as we want to display pid of zero, but NULL would
715  * stop the iteration.
716  */
717 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
718 {
719 	unsigned long pid;
720 	unsigned int first;
721 	loff_t l = 0;
722 
723 	if (trace_pid_list_first(pid_list, &first) < 0)
724 		return NULL;
725 
726 	pid = first;
727 
728 	/* Return pid + 1 so that zero can be the exit value */
729 	for (pid++; pid && l < *pos;
730 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
731 		;
732 	return (void *)pid;
733 }
734 
735 /**
736  * trace_pid_show - show the current pid in seq_file processing
737  * @m: The seq_file structure to write into
738  * @v: A void pointer of the pid (+1) value to display
739  *
740  * Can be directly used by seq_file operations to display the current
741  * pid value.
742  */
743 int trace_pid_show(struct seq_file *m, void *v)
744 {
745 	unsigned long pid = (unsigned long)v - 1;
746 
747 	seq_printf(m, "%lu\n", pid);
748 	return 0;
749 }
750 
751 /* 128 should be much more than enough */
752 #define PID_BUF_SIZE		127
753 
754 int trace_pid_write(struct trace_pid_list *filtered_pids,
755 		    struct trace_pid_list **new_pid_list,
756 		    const char __user *ubuf, size_t cnt)
757 {
758 	struct trace_pid_list *pid_list;
759 	struct trace_parser parser;
760 	unsigned long val;
761 	int nr_pids = 0;
762 	ssize_t read = 0;
763 	ssize_t ret;
764 	loff_t pos;
765 	pid_t pid;
766 
767 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
768 		return -ENOMEM;
769 
770 	/*
771 	 * Always recreate a new array. The write is an all or nothing
772 	 * operation. Always create a new array when adding new pids by
773 	 * the user. If the operation fails, then the current list is
774 	 * not modified.
775 	 */
776 	pid_list = trace_pid_list_alloc();
777 	if (!pid_list) {
778 		trace_parser_put(&parser);
779 		return -ENOMEM;
780 	}
781 
782 	if (filtered_pids) {
783 		/* copy the current bits to the new max */
784 		ret = trace_pid_list_first(filtered_pids, &pid);
785 		while (!ret) {
786 			trace_pid_list_set(pid_list, pid);
787 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
788 			nr_pids++;
789 		}
790 	}
791 
792 	ret = 0;
793 	while (cnt > 0) {
794 
795 		pos = 0;
796 
797 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
798 		if (ret < 0)
799 			break;
800 
801 		read += ret;
802 		ubuf += ret;
803 		cnt -= ret;
804 
805 		if (!trace_parser_loaded(&parser))
806 			break;
807 
808 		ret = -EINVAL;
809 		if (kstrtoul(parser.buffer, 0, &val))
810 			break;
811 
812 		pid = (pid_t)val;
813 
814 		if (trace_pid_list_set(pid_list, pid) < 0) {
815 			ret = -1;
816 			break;
817 		}
818 		nr_pids++;
819 
820 		trace_parser_clear(&parser);
821 		ret = 0;
822 	}
823 	trace_parser_put(&parser);
824 
825 	if (ret < 0) {
826 		trace_pid_list_free(pid_list);
827 		return ret;
828 	}
829 
830 	if (!nr_pids) {
831 		/* Cleared the list of pids */
832 		trace_pid_list_free(pid_list);
833 		pid_list = NULL;
834 	}
835 
836 	*new_pid_list = pid_list;
837 
838 	return read;
839 }
840 
841 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
842 {
843 	u64 ts;
844 
845 	/* Early boot up does not have a buffer yet */
846 	if (!buf->buffer)
847 		return trace_clock_local();
848 
849 	ts = ring_buffer_time_stamp(buf->buffer);
850 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
851 
852 	return ts;
853 }
854 
855 u64 ftrace_now(int cpu)
856 {
857 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
858 }
859 
860 /**
861  * tracing_is_enabled - Show if global_trace has been enabled
862  *
863  * Shows if the global trace has been enabled or not. It uses the
864  * mirror flag "buffer_disabled" to be used in fast paths such as for
865  * the irqsoff tracer. But it may be inaccurate due to races. If you
866  * need to know the accurate state, use tracing_is_on() which is a little
867  * slower, but accurate.
868  */
869 int tracing_is_enabled(void)
870 {
871 	/*
872 	 * For quick access (irqsoff uses this in fast path), just
873 	 * return the mirror variable of the state of the ring buffer.
874 	 * It's a little racy, but we don't really care.
875 	 */
876 	smp_rmb();
877 	return !global_trace.buffer_disabled;
878 }
879 
880 /*
881  * trace_buf_size is the size in bytes that is allocated
882  * for a buffer. Note, the number of bytes is always rounded
883  * to page size.
884  *
885  * This number is purposely set to a low number of 16384.
886  * If the dump on oops happens, it will be much appreciated
887  * to not have to wait for all that output. Anyway this can be
888  * boot time and run time configurable.
889  */
890 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
891 
892 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
893 
894 /* trace_types holds a link list of available tracers. */
895 static struct tracer		*trace_types __read_mostly;
896 
897 /*
898  * trace_types_lock is used to protect the trace_types list.
899  */
900 DEFINE_MUTEX(trace_types_lock);
901 
902 /*
903  * serialize the access of the ring buffer
904  *
905  * ring buffer serializes readers, but it is low level protection.
906  * The validity of the events (which returns by ring_buffer_peek() ..etc)
907  * are not protected by ring buffer.
908  *
909  * The content of events may become garbage if we allow other process consumes
910  * these events concurrently:
911  *   A) the page of the consumed events may become a normal page
912  *      (not reader page) in ring buffer, and this page will be rewritten
913  *      by events producer.
914  *   B) The page of the consumed events may become a page for splice_read,
915  *      and this page will be returned to system.
916  *
917  * These primitives allow multi process access to different cpu ring buffer
918  * concurrently.
919  *
920  * These primitives don't distinguish read-only and read-consume access.
921  * Multi read-only access are also serialized.
922  */
923 
924 #ifdef CONFIG_SMP
925 static DECLARE_RWSEM(all_cpu_access_lock);
926 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
927 
928 static inline void trace_access_lock(int cpu)
929 {
930 	if (cpu == RING_BUFFER_ALL_CPUS) {
931 		/* gain it for accessing the whole ring buffer. */
932 		down_write(&all_cpu_access_lock);
933 	} else {
934 		/* gain it for accessing a cpu ring buffer. */
935 
936 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
937 		down_read(&all_cpu_access_lock);
938 
939 		/* Secondly block other access to this @cpu ring buffer. */
940 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
941 	}
942 }
943 
944 static inline void trace_access_unlock(int cpu)
945 {
946 	if (cpu == RING_BUFFER_ALL_CPUS) {
947 		up_write(&all_cpu_access_lock);
948 	} else {
949 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
950 		up_read(&all_cpu_access_lock);
951 	}
952 }
953 
954 static inline void trace_access_lock_init(void)
955 {
956 	int cpu;
957 
958 	for_each_possible_cpu(cpu)
959 		mutex_init(&per_cpu(cpu_access_lock, cpu));
960 }
961 
962 #else
963 
964 static DEFINE_MUTEX(access_lock);
965 
966 static inline void trace_access_lock(int cpu)
967 {
968 	(void)cpu;
969 	mutex_lock(&access_lock);
970 }
971 
972 static inline void trace_access_unlock(int cpu)
973 {
974 	(void)cpu;
975 	mutex_unlock(&access_lock);
976 }
977 
978 static inline void trace_access_lock_init(void)
979 {
980 }
981 
982 #endif
983 
984 #ifdef CONFIG_STACKTRACE
985 static void __ftrace_trace_stack(struct trace_array *tr,
986 				 struct trace_buffer *buffer,
987 				 unsigned int trace_ctx,
988 				 int skip, struct pt_regs *regs);
989 static inline void ftrace_trace_stack(struct trace_array *tr,
990 				      struct trace_buffer *buffer,
991 				      unsigned int trace_ctx,
992 				      int skip, struct pt_regs *regs);
993 
994 #else
995 static inline void __ftrace_trace_stack(struct trace_array *tr,
996 					struct trace_buffer *buffer,
997 					unsigned int trace_ctx,
998 					int skip, struct pt_regs *regs)
999 {
1000 }
1001 static inline void ftrace_trace_stack(struct trace_array *tr,
1002 				      struct trace_buffer *buffer,
1003 				      unsigned long trace_ctx,
1004 				      int skip, struct pt_regs *regs)
1005 {
1006 }
1007 
1008 #endif
1009 
1010 static __always_inline void
1011 trace_event_setup(struct ring_buffer_event *event,
1012 		  int type, unsigned int trace_ctx)
1013 {
1014 	struct trace_entry *ent = ring_buffer_event_data(event);
1015 
1016 	tracing_generic_entry_update(ent, type, trace_ctx);
1017 }
1018 
1019 static __always_inline struct ring_buffer_event *
1020 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1021 			  int type,
1022 			  unsigned long len,
1023 			  unsigned int trace_ctx)
1024 {
1025 	struct ring_buffer_event *event;
1026 
1027 	event = ring_buffer_lock_reserve(buffer, len);
1028 	if (event != NULL)
1029 		trace_event_setup(event, type, trace_ctx);
1030 
1031 	return event;
1032 }
1033 
1034 void tracer_tracing_on(struct trace_array *tr)
1035 {
1036 	if (tr->array_buffer.buffer)
1037 		ring_buffer_record_on(tr->array_buffer.buffer);
1038 	/*
1039 	 * This flag is looked at when buffers haven't been allocated
1040 	 * yet, or by some tracers (like irqsoff), that just want to
1041 	 * know if the ring buffer has been disabled, but it can handle
1042 	 * races of where it gets disabled but we still do a record.
1043 	 * As the check is in the fast path of the tracers, it is more
1044 	 * important to be fast than accurate.
1045 	 */
1046 	tr->buffer_disabled = 0;
1047 	/* Make the flag seen by readers */
1048 	smp_wmb();
1049 }
1050 
1051 /**
1052  * tracing_on - enable tracing buffers
1053  *
1054  * This function enables tracing buffers that may have been
1055  * disabled with tracing_off.
1056  */
1057 void tracing_on(void)
1058 {
1059 	tracer_tracing_on(&global_trace);
1060 }
1061 EXPORT_SYMBOL_GPL(tracing_on);
1062 
1063 
1064 static __always_inline void
1065 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1066 {
1067 	__this_cpu_write(trace_taskinfo_save, true);
1068 
1069 	/* If this is the temp buffer, we need to commit fully */
1070 	if (this_cpu_read(trace_buffered_event) == event) {
1071 		/* Length is in event->array[0] */
1072 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1073 		/* Release the temp buffer */
1074 		this_cpu_dec(trace_buffered_event_cnt);
1075 		/* ring_buffer_unlock_commit() enables preemption */
1076 		preempt_enable_notrace();
1077 	} else
1078 		ring_buffer_unlock_commit(buffer);
1079 }
1080 
1081 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1082 		       const char *str, int size)
1083 {
1084 	struct ring_buffer_event *event;
1085 	struct trace_buffer *buffer;
1086 	struct print_entry *entry;
1087 	unsigned int trace_ctx;
1088 	int alloc;
1089 
1090 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1091 		return 0;
1092 
1093 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1094 		return 0;
1095 
1096 	if (unlikely(tracing_disabled))
1097 		return 0;
1098 
1099 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1100 
1101 	trace_ctx = tracing_gen_ctx();
1102 	buffer = tr->array_buffer.buffer;
1103 	ring_buffer_nest_start(buffer);
1104 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1105 					    trace_ctx);
1106 	if (!event) {
1107 		size = 0;
1108 		goto out;
1109 	}
1110 
1111 	entry = ring_buffer_event_data(event);
1112 	entry->ip = ip;
1113 
1114 	memcpy(&entry->buf, str, size);
1115 
1116 	/* Add a newline if necessary */
1117 	if (entry->buf[size - 1] != '\n') {
1118 		entry->buf[size] = '\n';
1119 		entry->buf[size + 1] = '\0';
1120 	} else
1121 		entry->buf[size] = '\0';
1122 
1123 	__buffer_unlock_commit(buffer, event);
1124 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1125  out:
1126 	ring_buffer_nest_end(buffer);
1127 	return size;
1128 }
1129 EXPORT_SYMBOL_GPL(__trace_array_puts);
1130 
1131 /**
1132  * __trace_puts - write a constant string into the trace buffer.
1133  * @ip:	   The address of the caller
1134  * @str:   The constant string to write
1135  * @size:  The size of the string.
1136  */
1137 int __trace_puts(unsigned long ip, const char *str, int size)
1138 {
1139 	return __trace_array_puts(printk_trace, ip, str, size);
1140 }
1141 EXPORT_SYMBOL_GPL(__trace_puts);
1142 
1143 /**
1144  * __trace_bputs - write the pointer to a constant string into trace buffer
1145  * @ip:	   The address of the caller
1146  * @str:   The constant string to write to the buffer to
1147  */
1148 int __trace_bputs(unsigned long ip, const char *str)
1149 {
1150 	struct trace_array *tr = READ_ONCE(printk_trace);
1151 	struct ring_buffer_event *event;
1152 	struct trace_buffer *buffer;
1153 	struct bputs_entry *entry;
1154 	unsigned int trace_ctx;
1155 	int size = sizeof(struct bputs_entry);
1156 	int ret = 0;
1157 
1158 	if (!printk_binsafe(tr))
1159 		return __trace_puts(ip, str, strlen(str));
1160 
1161 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1162 		return 0;
1163 
1164 	if (unlikely(tracing_selftest_running || tracing_disabled))
1165 		return 0;
1166 
1167 	trace_ctx = tracing_gen_ctx();
1168 	buffer = tr->array_buffer.buffer;
1169 
1170 	ring_buffer_nest_start(buffer);
1171 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1172 					    trace_ctx);
1173 	if (!event)
1174 		goto out;
1175 
1176 	entry = ring_buffer_event_data(event);
1177 	entry->ip			= ip;
1178 	entry->str			= str;
1179 
1180 	__buffer_unlock_commit(buffer, event);
1181 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1182 
1183 	ret = 1;
1184  out:
1185 	ring_buffer_nest_end(buffer);
1186 	return ret;
1187 }
1188 EXPORT_SYMBOL_GPL(__trace_bputs);
1189 
1190 #ifdef CONFIG_TRACER_SNAPSHOT
1191 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1192 					   void *cond_data)
1193 {
1194 	struct tracer *tracer = tr->current_trace;
1195 	unsigned long flags;
1196 
1197 	if (in_nmi()) {
1198 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1199 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1200 		return;
1201 	}
1202 
1203 	if (!tr->allocated_snapshot) {
1204 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1205 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1206 		tracer_tracing_off(tr);
1207 		return;
1208 	}
1209 
1210 	/* Note, snapshot can not be used when the tracer uses it */
1211 	if (tracer->use_max_tr) {
1212 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1213 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1214 		return;
1215 	}
1216 
1217 	if (tr->mapped) {
1218 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1219 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1220 		return;
1221 	}
1222 
1223 	local_irq_save(flags);
1224 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1225 	local_irq_restore(flags);
1226 }
1227 
1228 void tracing_snapshot_instance(struct trace_array *tr)
1229 {
1230 	tracing_snapshot_instance_cond(tr, NULL);
1231 }
1232 
1233 /**
1234  * tracing_snapshot - take a snapshot of the current buffer.
1235  *
1236  * This causes a swap between the snapshot buffer and the current live
1237  * tracing buffer. You can use this to take snapshots of the live
1238  * trace when some condition is triggered, but continue to trace.
1239  *
1240  * Note, make sure to allocate the snapshot with either
1241  * a tracing_snapshot_alloc(), or by doing it manually
1242  * with: echo 1 > /sys/kernel/tracing/snapshot
1243  *
1244  * If the snapshot buffer is not allocated, it will stop tracing.
1245  * Basically making a permanent snapshot.
1246  */
1247 void tracing_snapshot(void)
1248 {
1249 	struct trace_array *tr = &global_trace;
1250 
1251 	tracing_snapshot_instance(tr);
1252 }
1253 EXPORT_SYMBOL_GPL(tracing_snapshot);
1254 
1255 /**
1256  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1257  * @tr:		The tracing instance to snapshot
1258  * @cond_data:	The data to be tested conditionally, and possibly saved
1259  *
1260  * This is the same as tracing_snapshot() except that the snapshot is
1261  * conditional - the snapshot will only happen if the
1262  * cond_snapshot.update() implementation receiving the cond_data
1263  * returns true, which means that the trace array's cond_snapshot
1264  * update() operation used the cond_data to determine whether the
1265  * snapshot should be taken, and if it was, presumably saved it along
1266  * with the snapshot.
1267  */
1268 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1269 {
1270 	tracing_snapshot_instance_cond(tr, cond_data);
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1273 
1274 /**
1275  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1276  * @tr:		The tracing instance
1277  *
1278  * When the user enables a conditional snapshot using
1279  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1280  * with the snapshot.  This accessor is used to retrieve it.
1281  *
1282  * Should not be called from cond_snapshot.update(), since it takes
1283  * the tr->max_lock lock, which the code calling
1284  * cond_snapshot.update() has already done.
1285  *
1286  * Returns the cond_data associated with the trace array's snapshot.
1287  */
1288 void *tracing_cond_snapshot_data(struct trace_array *tr)
1289 {
1290 	void *cond_data = NULL;
1291 
1292 	local_irq_disable();
1293 	arch_spin_lock(&tr->max_lock);
1294 
1295 	if (tr->cond_snapshot)
1296 		cond_data = tr->cond_snapshot->cond_data;
1297 
1298 	arch_spin_unlock(&tr->max_lock);
1299 	local_irq_enable();
1300 
1301 	return cond_data;
1302 }
1303 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1304 
1305 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1306 					struct array_buffer *size_buf, int cpu_id);
1307 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1308 
1309 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1310 {
1311 	int order;
1312 	int ret;
1313 
1314 	if (!tr->allocated_snapshot) {
1315 
1316 		/* Make the snapshot buffer have the same order as main buffer */
1317 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1318 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1319 		if (ret < 0)
1320 			return ret;
1321 
1322 		/* allocate spare buffer */
1323 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1324 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1325 		if (ret < 0)
1326 			return ret;
1327 
1328 		tr->allocated_snapshot = true;
1329 	}
1330 
1331 	return 0;
1332 }
1333 
1334 static void free_snapshot(struct trace_array *tr)
1335 {
1336 	/*
1337 	 * We don't free the ring buffer. instead, resize it because
1338 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1339 	 * we want preserve it.
1340 	 */
1341 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1342 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1343 	set_buffer_entries(&tr->max_buffer, 1);
1344 	tracing_reset_online_cpus(&tr->max_buffer);
1345 	tr->allocated_snapshot = false;
1346 }
1347 
1348 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1349 {
1350 	int ret;
1351 
1352 	lockdep_assert_held(&trace_types_lock);
1353 
1354 	spin_lock(&tr->snapshot_trigger_lock);
1355 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1356 		spin_unlock(&tr->snapshot_trigger_lock);
1357 		return -EBUSY;
1358 	}
1359 
1360 	tr->snapshot++;
1361 	spin_unlock(&tr->snapshot_trigger_lock);
1362 
1363 	ret = tracing_alloc_snapshot_instance(tr);
1364 	if (ret) {
1365 		spin_lock(&tr->snapshot_trigger_lock);
1366 		tr->snapshot--;
1367 		spin_unlock(&tr->snapshot_trigger_lock);
1368 	}
1369 
1370 	return ret;
1371 }
1372 
1373 int tracing_arm_snapshot(struct trace_array *tr)
1374 {
1375 	int ret;
1376 
1377 	mutex_lock(&trace_types_lock);
1378 	ret = tracing_arm_snapshot_locked(tr);
1379 	mutex_unlock(&trace_types_lock);
1380 
1381 	return ret;
1382 }
1383 
1384 void tracing_disarm_snapshot(struct trace_array *tr)
1385 {
1386 	spin_lock(&tr->snapshot_trigger_lock);
1387 	if (!WARN_ON(!tr->snapshot))
1388 		tr->snapshot--;
1389 	spin_unlock(&tr->snapshot_trigger_lock);
1390 }
1391 
1392 /**
1393  * tracing_alloc_snapshot - allocate snapshot buffer.
1394  *
1395  * This only allocates the snapshot buffer if it isn't already
1396  * allocated - it doesn't also take a snapshot.
1397  *
1398  * This is meant to be used in cases where the snapshot buffer needs
1399  * to be set up for events that can't sleep but need to be able to
1400  * trigger a snapshot.
1401  */
1402 int tracing_alloc_snapshot(void)
1403 {
1404 	struct trace_array *tr = &global_trace;
1405 	int ret;
1406 
1407 	ret = tracing_alloc_snapshot_instance(tr);
1408 	WARN_ON(ret < 0);
1409 
1410 	return ret;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1413 
1414 /**
1415  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1416  *
1417  * This is similar to tracing_snapshot(), but it will allocate the
1418  * snapshot buffer if it isn't already allocated. Use this only
1419  * where it is safe to sleep, as the allocation may sleep.
1420  *
1421  * This causes a swap between the snapshot buffer and the current live
1422  * tracing buffer. You can use this to take snapshots of the live
1423  * trace when some condition is triggered, but continue to trace.
1424  */
1425 void tracing_snapshot_alloc(void)
1426 {
1427 	int ret;
1428 
1429 	ret = tracing_alloc_snapshot();
1430 	if (ret < 0)
1431 		return;
1432 
1433 	tracing_snapshot();
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1436 
1437 /**
1438  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1439  * @tr:		The tracing instance
1440  * @cond_data:	User data to associate with the snapshot
1441  * @update:	Implementation of the cond_snapshot update function
1442  *
1443  * Check whether the conditional snapshot for the given instance has
1444  * already been enabled, or if the current tracer is already using a
1445  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1446  * save the cond_data and update function inside.
1447  *
1448  * Returns 0 if successful, error otherwise.
1449  */
1450 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1451 				 cond_update_fn_t update)
1452 {
1453 	struct cond_snapshot *cond_snapshot __free(kfree) =
1454 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1455 	int ret;
1456 
1457 	if (!cond_snapshot)
1458 		return -ENOMEM;
1459 
1460 	cond_snapshot->cond_data = cond_data;
1461 	cond_snapshot->update = update;
1462 
1463 	guard(mutex)(&trace_types_lock);
1464 
1465 	if (tr->current_trace->use_max_tr)
1466 		return -EBUSY;
1467 
1468 	/*
1469 	 * The cond_snapshot can only change to NULL without the
1470 	 * trace_types_lock. We don't care if we race with it going
1471 	 * to NULL, but we want to make sure that it's not set to
1472 	 * something other than NULL when we get here, which we can
1473 	 * do safely with only holding the trace_types_lock and not
1474 	 * having to take the max_lock.
1475 	 */
1476 	if (tr->cond_snapshot)
1477 		return -EBUSY;
1478 
1479 	ret = tracing_arm_snapshot_locked(tr);
1480 	if (ret)
1481 		return ret;
1482 
1483 	local_irq_disable();
1484 	arch_spin_lock(&tr->max_lock);
1485 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1486 	arch_spin_unlock(&tr->max_lock);
1487 	local_irq_enable();
1488 
1489 	return 0;
1490 }
1491 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1492 
1493 /**
1494  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1495  * @tr:		The tracing instance
1496  *
1497  * Check whether the conditional snapshot for the given instance is
1498  * enabled; if so, free the cond_snapshot associated with it,
1499  * otherwise return -EINVAL.
1500  *
1501  * Returns 0 if successful, error otherwise.
1502  */
1503 int tracing_snapshot_cond_disable(struct trace_array *tr)
1504 {
1505 	int ret = 0;
1506 
1507 	local_irq_disable();
1508 	arch_spin_lock(&tr->max_lock);
1509 
1510 	if (!tr->cond_snapshot)
1511 		ret = -EINVAL;
1512 	else {
1513 		kfree(tr->cond_snapshot);
1514 		tr->cond_snapshot = NULL;
1515 	}
1516 
1517 	arch_spin_unlock(&tr->max_lock);
1518 	local_irq_enable();
1519 
1520 	tracing_disarm_snapshot(tr);
1521 
1522 	return ret;
1523 }
1524 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1525 #else
1526 void tracing_snapshot(void)
1527 {
1528 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1529 }
1530 EXPORT_SYMBOL_GPL(tracing_snapshot);
1531 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1532 {
1533 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1534 }
1535 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1536 int tracing_alloc_snapshot(void)
1537 {
1538 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1539 	return -ENODEV;
1540 }
1541 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1542 void tracing_snapshot_alloc(void)
1543 {
1544 	/* Give warning */
1545 	tracing_snapshot();
1546 }
1547 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1548 void *tracing_cond_snapshot_data(struct trace_array *tr)
1549 {
1550 	return NULL;
1551 }
1552 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1553 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1554 {
1555 	return -ENODEV;
1556 }
1557 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1558 int tracing_snapshot_cond_disable(struct trace_array *tr)
1559 {
1560 	return false;
1561 }
1562 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1563 #define free_snapshot(tr)	do { } while (0)
1564 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1565 #endif /* CONFIG_TRACER_SNAPSHOT */
1566 
1567 void tracer_tracing_off(struct trace_array *tr)
1568 {
1569 	if (tr->array_buffer.buffer)
1570 		ring_buffer_record_off(tr->array_buffer.buffer);
1571 	/*
1572 	 * This flag is looked at when buffers haven't been allocated
1573 	 * yet, or by some tracers (like irqsoff), that just want to
1574 	 * know if the ring buffer has been disabled, but it can handle
1575 	 * races of where it gets disabled but we still do a record.
1576 	 * As the check is in the fast path of the tracers, it is more
1577 	 * important to be fast than accurate.
1578 	 */
1579 	tr->buffer_disabled = 1;
1580 	/* Make the flag seen by readers */
1581 	smp_wmb();
1582 }
1583 
1584 /**
1585  * tracing_off - turn off tracing buffers
1586  *
1587  * This function stops the tracing buffers from recording data.
1588  * It does not disable any overhead the tracers themselves may
1589  * be causing. This function simply causes all recording to
1590  * the ring buffers to fail.
1591  */
1592 void tracing_off(void)
1593 {
1594 	tracer_tracing_off(&global_trace);
1595 }
1596 EXPORT_SYMBOL_GPL(tracing_off);
1597 
1598 void disable_trace_on_warning(void)
1599 {
1600 	if (__disable_trace_on_warning) {
1601 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1602 			"Disabling tracing due to warning\n");
1603 		tracing_off();
1604 	}
1605 }
1606 
1607 /**
1608  * tracer_tracing_is_on - show real state of ring buffer enabled
1609  * @tr : the trace array to know if ring buffer is enabled
1610  *
1611  * Shows real state of the ring buffer if it is enabled or not.
1612  */
1613 bool tracer_tracing_is_on(struct trace_array *tr)
1614 {
1615 	if (tr->array_buffer.buffer)
1616 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1617 	return !tr->buffer_disabled;
1618 }
1619 
1620 /**
1621  * tracing_is_on - show state of ring buffers enabled
1622  */
1623 int tracing_is_on(void)
1624 {
1625 	return tracer_tracing_is_on(&global_trace);
1626 }
1627 EXPORT_SYMBOL_GPL(tracing_is_on);
1628 
1629 static int __init set_buf_size(char *str)
1630 {
1631 	unsigned long buf_size;
1632 
1633 	if (!str)
1634 		return 0;
1635 	buf_size = memparse(str, &str);
1636 	/*
1637 	 * nr_entries can not be zero and the startup
1638 	 * tests require some buffer space. Therefore
1639 	 * ensure we have at least 4096 bytes of buffer.
1640 	 */
1641 	trace_buf_size = max(4096UL, buf_size);
1642 	return 1;
1643 }
1644 __setup("trace_buf_size=", set_buf_size);
1645 
1646 static int __init set_tracing_thresh(char *str)
1647 {
1648 	unsigned long threshold;
1649 	int ret;
1650 
1651 	if (!str)
1652 		return 0;
1653 	ret = kstrtoul(str, 0, &threshold);
1654 	if (ret < 0)
1655 		return 0;
1656 	tracing_thresh = threshold * 1000;
1657 	return 1;
1658 }
1659 __setup("tracing_thresh=", set_tracing_thresh);
1660 
1661 unsigned long nsecs_to_usecs(unsigned long nsecs)
1662 {
1663 	return nsecs / 1000;
1664 }
1665 
1666 /*
1667  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1668  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1669  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1670  * of strings in the order that the evals (enum) were defined.
1671  */
1672 #undef C
1673 #define C(a, b) b
1674 
1675 /* These must match the bit positions in trace_iterator_flags */
1676 static const char *trace_options[] = {
1677 	TRACE_FLAGS
1678 	NULL
1679 };
1680 
1681 static struct {
1682 	u64 (*func)(void);
1683 	const char *name;
1684 	int in_ns;		/* is this clock in nanoseconds? */
1685 } trace_clocks[] = {
1686 	{ trace_clock_local,		"local",	1 },
1687 	{ trace_clock_global,		"global",	1 },
1688 	{ trace_clock_counter,		"counter",	0 },
1689 	{ trace_clock_jiffies,		"uptime",	0 },
1690 	{ trace_clock,			"perf",		1 },
1691 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1692 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1693 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1694 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1695 	ARCH_TRACE_CLOCKS
1696 };
1697 
1698 bool trace_clock_in_ns(struct trace_array *tr)
1699 {
1700 	if (trace_clocks[tr->clock_id].in_ns)
1701 		return true;
1702 
1703 	return false;
1704 }
1705 
1706 /*
1707  * trace_parser_get_init - gets the buffer for trace parser
1708  */
1709 int trace_parser_get_init(struct trace_parser *parser, int size)
1710 {
1711 	memset(parser, 0, sizeof(*parser));
1712 
1713 	parser->buffer = kmalloc(size, GFP_KERNEL);
1714 	if (!parser->buffer)
1715 		return 1;
1716 
1717 	parser->size = size;
1718 	return 0;
1719 }
1720 
1721 /*
1722  * trace_parser_put - frees the buffer for trace parser
1723  */
1724 void trace_parser_put(struct trace_parser *parser)
1725 {
1726 	kfree(parser->buffer);
1727 	parser->buffer = NULL;
1728 }
1729 
1730 /*
1731  * trace_get_user - reads the user input string separated by  space
1732  * (matched by isspace(ch))
1733  *
1734  * For each string found the 'struct trace_parser' is updated,
1735  * and the function returns.
1736  *
1737  * Returns number of bytes read.
1738  *
1739  * See kernel/trace/trace.h for 'struct trace_parser' details.
1740  */
1741 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1742 	size_t cnt, loff_t *ppos)
1743 {
1744 	char ch;
1745 	size_t read = 0;
1746 	ssize_t ret;
1747 
1748 	if (!*ppos)
1749 		trace_parser_clear(parser);
1750 
1751 	ret = get_user(ch, ubuf++);
1752 	if (ret)
1753 		goto out;
1754 
1755 	read++;
1756 	cnt--;
1757 
1758 	/*
1759 	 * The parser is not finished with the last write,
1760 	 * continue reading the user input without skipping spaces.
1761 	 */
1762 	if (!parser->cont) {
1763 		/* skip white space */
1764 		while (cnt && isspace(ch)) {
1765 			ret = get_user(ch, ubuf++);
1766 			if (ret)
1767 				goto out;
1768 			read++;
1769 			cnt--;
1770 		}
1771 
1772 		parser->idx = 0;
1773 
1774 		/* only spaces were written */
1775 		if (isspace(ch) || !ch) {
1776 			*ppos += read;
1777 			ret = read;
1778 			goto out;
1779 		}
1780 	}
1781 
1782 	/* read the non-space input */
1783 	while (cnt && !isspace(ch) && ch) {
1784 		if (parser->idx < parser->size - 1)
1785 			parser->buffer[parser->idx++] = ch;
1786 		else {
1787 			ret = -EINVAL;
1788 			goto out;
1789 		}
1790 		ret = get_user(ch, ubuf++);
1791 		if (ret)
1792 			goto out;
1793 		read++;
1794 		cnt--;
1795 	}
1796 
1797 	/* We either got finished input or we have to wait for another call. */
1798 	if (isspace(ch) || !ch) {
1799 		parser->buffer[parser->idx] = 0;
1800 		parser->cont = false;
1801 	} else if (parser->idx < parser->size - 1) {
1802 		parser->cont = true;
1803 		parser->buffer[parser->idx++] = ch;
1804 		/* Make sure the parsed string always terminates with '\0'. */
1805 		parser->buffer[parser->idx] = 0;
1806 	} else {
1807 		ret = -EINVAL;
1808 		goto out;
1809 	}
1810 
1811 	*ppos += read;
1812 	ret = read;
1813 
1814 out:
1815 	return ret;
1816 }
1817 
1818 /* TODO add a seq_buf_to_buffer() */
1819 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1820 {
1821 	int len;
1822 
1823 	if (trace_seq_used(s) <= s->readpos)
1824 		return -EBUSY;
1825 
1826 	len = trace_seq_used(s) - s->readpos;
1827 	if (cnt > len)
1828 		cnt = len;
1829 	memcpy(buf, s->buffer + s->readpos, cnt);
1830 
1831 	s->readpos += cnt;
1832 	return cnt;
1833 }
1834 
1835 unsigned long __read_mostly	tracing_thresh;
1836 
1837 #ifdef CONFIG_TRACER_MAX_TRACE
1838 static const struct file_operations tracing_max_lat_fops;
1839 
1840 #ifdef LATENCY_FS_NOTIFY
1841 
1842 static struct workqueue_struct *fsnotify_wq;
1843 
1844 static void latency_fsnotify_workfn(struct work_struct *work)
1845 {
1846 	struct trace_array *tr = container_of(work, struct trace_array,
1847 					      fsnotify_work);
1848 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1849 }
1850 
1851 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1852 {
1853 	struct trace_array *tr = container_of(iwork, struct trace_array,
1854 					      fsnotify_irqwork);
1855 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1856 }
1857 
1858 static void trace_create_maxlat_file(struct trace_array *tr,
1859 				     struct dentry *d_tracer)
1860 {
1861 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1862 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1863 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1864 					      TRACE_MODE_WRITE,
1865 					      d_tracer, tr,
1866 					      &tracing_max_lat_fops);
1867 }
1868 
1869 __init static int latency_fsnotify_init(void)
1870 {
1871 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1872 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1873 	if (!fsnotify_wq) {
1874 		pr_err("Unable to allocate tr_max_lat_wq\n");
1875 		return -ENOMEM;
1876 	}
1877 	return 0;
1878 }
1879 
1880 late_initcall_sync(latency_fsnotify_init);
1881 
1882 void latency_fsnotify(struct trace_array *tr)
1883 {
1884 	if (!fsnotify_wq)
1885 		return;
1886 	/*
1887 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1888 	 * possible that we are called from __schedule() or do_idle(), which
1889 	 * could cause a deadlock.
1890 	 */
1891 	irq_work_queue(&tr->fsnotify_irqwork);
1892 }
1893 
1894 #else /* !LATENCY_FS_NOTIFY */
1895 
1896 #define trace_create_maxlat_file(tr, d_tracer)				\
1897 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1898 			  d_tracer, tr, &tracing_max_lat_fops)
1899 
1900 #endif
1901 
1902 /*
1903  * Copy the new maximum trace into the separate maximum-trace
1904  * structure. (this way the maximum trace is permanently saved,
1905  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1906  */
1907 static void
1908 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1909 {
1910 	struct array_buffer *trace_buf = &tr->array_buffer;
1911 	struct array_buffer *max_buf = &tr->max_buffer;
1912 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1913 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1914 
1915 	max_buf->cpu = cpu;
1916 	max_buf->time_start = data->preempt_timestamp;
1917 
1918 	max_data->saved_latency = tr->max_latency;
1919 	max_data->critical_start = data->critical_start;
1920 	max_data->critical_end = data->critical_end;
1921 
1922 	strscpy(max_data->comm, tsk->comm);
1923 	max_data->pid = tsk->pid;
1924 	/*
1925 	 * If tsk == current, then use current_uid(), as that does not use
1926 	 * RCU. The irq tracer can be called out of RCU scope.
1927 	 */
1928 	if (tsk == current)
1929 		max_data->uid = current_uid();
1930 	else
1931 		max_data->uid = task_uid(tsk);
1932 
1933 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1934 	max_data->policy = tsk->policy;
1935 	max_data->rt_priority = tsk->rt_priority;
1936 
1937 	/* record this tasks comm */
1938 	tracing_record_cmdline(tsk);
1939 	latency_fsnotify(tr);
1940 }
1941 
1942 /**
1943  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1944  * @tr: tracer
1945  * @tsk: the task with the latency
1946  * @cpu: The cpu that initiated the trace.
1947  * @cond_data: User data associated with a conditional snapshot
1948  *
1949  * Flip the buffers between the @tr and the max_tr and record information
1950  * about which task was the cause of this latency.
1951  */
1952 void
1953 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1954 	      void *cond_data)
1955 {
1956 	if (tr->stop_count)
1957 		return;
1958 
1959 	WARN_ON_ONCE(!irqs_disabled());
1960 
1961 	if (!tr->allocated_snapshot) {
1962 		/* Only the nop tracer should hit this when disabling */
1963 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1964 		return;
1965 	}
1966 
1967 	arch_spin_lock(&tr->max_lock);
1968 
1969 	/* Inherit the recordable setting from array_buffer */
1970 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1971 		ring_buffer_record_on(tr->max_buffer.buffer);
1972 	else
1973 		ring_buffer_record_off(tr->max_buffer.buffer);
1974 
1975 #ifdef CONFIG_TRACER_SNAPSHOT
1976 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1977 		arch_spin_unlock(&tr->max_lock);
1978 		return;
1979 	}
1980 #endif
1981 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1982 
1983 	__update_max_tr(tr, tsk, cpu);
1984 
1985 	arch_spin_unlock(&tr->max_lock);
1986 
1987 	/* Any waiters on the old snapshot buffer need to wake up */
1988 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1989 }
1990 
1991 /**
1992  * update_max_tr_single - only copy one trace over, and reset the rest
1993  * @tr: tracer
1994  * @tsk: task with the latency
1995  * @cpu: the cpu of the buffer to copy.
1996  *
1997  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1998  */
1999 void
2000 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2001 {
2002 	int ret;
2003 
2004 	if (tr->stop_count)
2005 		return;
2006 
2007 	WARN_ON_ONCE(!irqs_disabled());
2008 	if (!tr->allocated_snapshot) {
2009 		/* Only the nop tracer should hit this when disabling */
2010 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2011 		return;
2012 	}
2013 
2014 	arch_spin_lock(&tr->max_lock);
2015 
2016 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2017 
2018 	if (ret == -EBUSY) {
2019 		/*
2020 		 * We failed to swap the buffer due to a commit taking
2021 		 * place on this CPU. We fail to record, but we reset
2022 		 * the max trace buffer (no one writes directly to it)
2023 		 * and flag that it failed.
2024 		 * Another reason is resize is in progress.
2025 		 */
2026 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2027 			"Failed to swap buffers due to commit or resize in progress\n");
2028 	}
2029 
2030 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2031 
2032 	__update_max_tr(tr, tsk, cpu);
2033 	arch_spin_unlock(&tr->max_lock);
2034 }
2035 
2036 #endif /* CONFIG_TRACER_MAX_TRACE */
2037 
2038 struct pipe_wait {
2039 	struct trace_iterator		*iter;
2040 	int				wait_index;
2041 };
2042 
2043 static bool wait_pipe_cond(void *data)
2044 {
2045 	struct pipe_wait *pwait = data;
2046 	struct trace_iterator *iter = pwait->iter;
2047 
2048 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2049 		return true;
2050 
2051 	return iter->closed;
2052 }
2053 
2054 static int wait_on_pipe(struct trace_iterator *iter, int full)
2055 {
2056 	struct pipe_wait pwait;
2057 	int ret;
2058 
2059 	/* Iterators are static, they should be filled or empty */
2060 	if (trace_buffer_iter(iter, iter->cpu_file))
2061 		return 0;
2062 
2063 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2064 	pwait.iter = iter;
2065 
2066 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2067 			       wait_pipe_cond, &pwait);
2068 
2069 #ifdef CONFIG_TRACER_MAX_TRACE
2070 	/*
2071 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2072 	 * to happen, this would now be the main buffer.
2073 	 */
2074 	if (iter->snapshot)
2075 		iter->array_buffer = &iter->tr->max_buffer;
2076 #endif
2077 	return ret;
2078 }
2079 
2080 #ifdef CONFIG_FTRACE_STARTUP_TEST
2081 static bool selftests_can_run;
2082 
2083 struct trace_selftests {
2084 	struct list_head		list;
2085 	struct tracer			*type;
2086 };
2087 
2088 static LIST_HEAD(postponed_selftests);
2089 
2090 static int save_selftest(struct tracer *type)
2091 {
2092 	struct trace_selftests *selftest;
2093 
2094 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2095 	if (!selftest)
2096 		return -ENOMEM;
2097 
2098 	selftest->type = type;
2099 	list_add(&selftest->list, &postponed_selftests);
2100 	return 0;
2101 }
2102 
2103 static int run_tracer_selftest(struct tracer *type)
2104 {
2105 	struct trace_array *tr = &global_trace;
2106 	struct tracer *saved_tracer = tr->current_trace;
2107 	int ret;
2108 
2109 	if (!type->selftest || tracing_selftest_disabled)
2110 		return 0;
2111 
2112 	/*
2113 	 * If a tracer registers early in boot up (before scheduling is
2114 	 * initialized and such), then do not run its selftests yet.
2115 	 * Instead, run it a little later in the boot process.
2116 	 */
2117 	if (!selftests_can_run)
2118 		return save_selftest(type);
2119 
2120 	if (!tracing_is_on()) {
2121 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2122 			type->name);
2123 		return 0;
2124 	}
2125 
2126 	/*
2127 	 * Run a selftest on this tracer.
2128 	 * Here we reset the trace buffer, and set the current
2129 	 * tracer to be this tracer. The tracer can then run some
2130 	 * internal tracing to verify that everything is in order.
2131 	 * If we fail, we do not register this tracer.
2132 	 */
2133 	tracing_reset_online_cpus(&tr->array_buffer);
2134 
2135 	tr->current_trace = type;
2136 
2137 #ifdef CONFIG_TRACER_MAX_TRACE
2138 	if (type->use_max_tr) {
2139 		/* If we expanded the buffers, make sure the max is expanded too */
2140 		if (tr->ring_buffer_expanded)
2141 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2142 					   RING_BUFFER_ALL_CPUS);
2143 		tr->allocated_snapshot = true;
2144 	}
2145 #endif
2146 
2147 	/* the test is responsible for initializing and enabling */
2148 	pr_info("Testing tracer %s: ", type->name);
2149 	ret = type->selftest(type, tr);
2150 	/* the test is responsible for resetting too */
2151 	tr->current_trace = saved_tracer;
2152 	if (ret) {
2153 		printk(KERN_CONT "FAILED!\n");
2154 		/* Add the warning after printing 'FAILED' */
2155 		WARN_ON(1);
2156 		return -1;
2157 	}
2158 	/* Only reset on passing, to avoid touching corrupted buffers */
2159 	tracing_reset_online_cpus(&tr->array_buffer);
2160 
2161 #ifdef CONFIG_TRACER_MAX_TRACE
2162 	if (type->use_max_tr) {
2163 		tr->allocated_snapshot = false;
2164 
2165 		/* Shrink the max buffer again */
2166 		if (tr->ring_buffer_expanded)
2167 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2168 					   RING_BUFFER_ALL_CPUS);
2169 	}
2170 #endif
2171 
2172 	printk(KERN_CONT "PASSED\n");
2173 	return 0;
2174 }
2175 
2176 static int do_run_tracer_selftest(struct tracer *type)
2177 {
2178 	int ret;
2179 
2180 	/*
2181 	 * Tests can take a long time, especially if they are run one after the
2182 	 * other, as does happen during bootup when all the tracers are
2183 	 * registered. This could cause the soft lockup watchdog to trigger.
2184 	 */
2185 	cond_resched();
2186 
2187 	tracing_selftest_running = true;
2188 	ret = run_tracer_selftest(type);
2189 	tracing_selftest_running = false;
2190 
2191 	return ret;
2192 }
2193 
2194 static __init int init_trace_selftests(void)
2195 {
2196 	struct trace_selftests *p, *n;
2197 	struct tracer *t, **last;
2198 	int ret;
2199 
2200 	selftests_can_run = true;
2201 
2202 	guard(mutex)(&trace_types_lock);
2203 
2204 	if (list_empty(&postponed_selftests))
2205 		return 0;
2206 
2207 	pr_info("Running postponed tracer tests:\n");
2208 
2209 	tracing_selftest_running = true;
2210 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2211 		/* This loop can take minutes when sanitizers are enabled, so
2212 		 * lets make sure we allow RCU processing.
2213 		 */
2214 		cond_resched();
2215 		ret = run_tracer_selftest(p->type);
2216 		/* If the test fails, then warn and remove from available_tracers */
2217 		if (ret < 0) {
2218 			WARN(1, "tracer: %s failed selftest, disabling\n",
2219 			     p->type->name);
2220 			last = &trace_types;
2221 			for (t = trace_types; t; t = t->next) {
2222 				if (t == p->type) {
2223 					*last = t->next;
2224 					break;
2225 				}
2226 				last = &t->next;
2227 			}
2228 		}
2229 		list_del(&p->list);
2230 		kfree(p);
2231 	}
2232 	tracing_selftest_running = false;
2233 
2234 	return 0;
2235 }
2236 core_initcall(init_trace_selftests);
2237 #else
2238 static inline int do_run_tracer_selftest(struct tracer *type)
2239 {
2240 	return 0;
2241 }
2242 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2243 
2244 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2245 
2246 static void __init apply_trace_boot_options(void);
2247 
2248 /**
2249  * register_tracer - register a tracer with the ftrace system.
2250  * @type: the plugin for the tracer
2251  *
2252  * Register a new plugin tracer.
2253  */
2254 int __init register_tracer(struct tracer *type)
2255 {
2256 	struct tracer *t;
2257 	int ret = 0;
2258 
2259 	if (!type->name) {
2260 		pr_info("Tracer must have a name\n");
2261 		return -1;
2262 	}
2263 
2264 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2265 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2266 		return -1;
2267 	}
2268 
2269 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2270 		pr_warn("Can not register tracer %s due to lockdown\n",
2271 			   type->name);
2272 		return -EPERM;
2273 	}
2274 
2275 	mutex_lock(&trace_types_lock);
2276 
2277 	for (t = trace_types; t; t = t->next) {
2278 		if (strcmp(type->name, t->name) == 0) {
2279 			/* already found */
2280 			pr_info("Tracer %s already registered\n",
2281 				type->name);
2282 			ret = -1;
2283 			goto out;
2284 		}
2285 	}
2286 
2287 	if (!type->set_flag)
2288 		type->set_flag = &dummy_set_flag;
2289 	if (!type->flags) {
2290 		/*allocate a dummy tracer_flags*/
2291 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2292 		if (!type->flags) {
2293 			ret = -ENOMEM;
2294 			goto out;
2295 		}
2296 		type->flags->val = 0;
2297 		type->flags->opts = dummy_tracer_opt;
2298 	} else
2299 		if (!type->flags->opts)
2300 			type->flags->opts = dummy_tracer_opt;
2301 
2302 	/* store the tracer for __set_tracer_option */
2303 	type->flags->trace = type;
2304 
2305 	ret = do_run_tracer_selftest(type);
2306 	if (ret < 0)
2307 		goto out;
2308 
2309 	type->next = trace_types;
2310 	trace_types = type;
2311 	add_tracer_options(&global_trace, type);
2312 
2313  out:
2314 	mutex_unlock(&trace_types_lock);
2315 
2316 	if (ret || !default_bootup_tracer)
2317 		goto out_unlock;
2318 
2319 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2320 		goto out_unlock;
2321 
2322 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2323 	/* Do we want this tracer to start on bootup? */
2324 	tracing_set_tracer(&global_trace, type->name);
2325 	default_bootup_tracer = NULL;
2326 
2327 	apply_trace_boot_options();
2328 
2329 	/* disable other selftests, since this will break it. */
2330 	disable_tracing_selftest("running a tracer");
2331 
2332  out_unlock:
2333 	return ret;
2334 }
2335 
2336 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2337 {
2338 	struct trace_buffer *buffer = buf->buffer;
2339 
2340 	if (!buffer)
2341 		return;
2342 
2343 	ring_buffer_record_disable(buffer);
2344 
2345 	/* Make sure all commits have finished */
2346 	synchronize_rcu();
2347 	ring_buffer_reset_cpu(buffer, cpu);
2348 
2349 	ring_buffer_record_enable(buffer);
2350 }
2351 
2352 void tracing_reset_online_cpus(struct array_buffer *buf)
2353 {
2354 	struct trace_buffer *buffer = buf->buffer;
2355 
2356 	if (!buffer)
2357 		return;
2358 
2359 	ring_buffer_record_disable(buffer);
2360 
2361 	/* Make sure all commits have finished */
2362 	synchronize_rcu();
2363 
2364 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2365 
2366 	ring_buffer_reset_online_cpus(buffer);
2367 
2368 	ring_buffer_record_enable(buffer);
2369 }
2370 
2371 static void tracing_reset_all_cpus(struct array_buffer *buf)
2372 {
2373 	struct trace_buffer *buffer = buf->buffer;
2374 
2375 	if (!buffer)
2376 		return;
2377 
2378 	ring_buffer_record_disable(buffer);
2379 
2380 	/* Make sure all commits have finished */
2381 	synchronize_rcu();
2382 
2383 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2384 
2385 	ring_buffer_reset(buffer);
2386 
2387 	ring_buffer_record_enable(buffer);
2388 }
2389 
2390 /* Must have trace_types_lock held */
2391 void tracing_reset_all_online_cpus_unlocked(void)
2392 {
2393 	struct trace_array *tr;
2394 
2395 	lockdep_assert_held(&trace_types_lock);
2396 
2397 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2398 		if (!tr->clear_trace)
2399 			continue;
2400 		tr->clear_trace = false;
2401 		tracing_reset_online_cpus(&tr->array_buffer);
2402 #ifdef CONFIG_TRACER_MAX_TRACE
2403 		tracing_reset_online_cpus(&tr->max_buffer);
2404 #endif
2405 	}
2406 }
2407 
2408 void tracing_reset_all_online_cpus(void)
2409 {
2410 	mutex_lock(&trace_types_lock);
2411 	tracing_reset_all_online_cpus_unlocked();
2412 	mutex_unlock(&trace_types_lock);
2413 }
2414 
2415 int is_tracing_stopped(void)
2416 {
2417 	return global_trace.stop_count;
2418 }
2419 
2420 static void tracing_start_tr(struct trace_array *tr)
2421 {
2422 	struct trace_buffer *buffer;
2423 	unsigned long flags;
2424 
2425 	if (tracing_disabled)
2426 		return;
2427 
2428 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2429 	if (--tr->stop_count) {
2430 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2431 			/* Someone screwed up their debugging */
2432 			tr->stop_count = 0;
2433 		}
2434 		goto out;
2435 	}
2436 
2437 	/* Prevent the buffers from switching */
2438 	arch_spin_lock(&tr->max_lock);
2439 
2440 	buffer = tr->array_buffer.buffer;
2441 	if (buffer)
2442 		ring_buffer_record_enable(buffer);
2443 
2444 #ifdef CONFIG_TRACER_MAX_TRACE
2445 	buffer = tr->max_buffer.buffer;
2446 	if (buffer)
2447 		ring_buffer_record_enable(buffer);
2448 #endif
2449 
2450 	arch_spin_unlock(&tr->max_lock);
2451 
2452  out:
2453 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2454 }
2455 
2456 /**
2457  * tracing_start - quick start of the tracer
2458  *
2459  * If tracing is enabled but was stopped by tracing_stop,
2460  * this will start the tracer back up.
2461  */
2462 void tracing_start(void)
2463 
2464 {
2465 	return tracing_start_tr(&global_trace);
2466 }
2467 
2468 static void tracing_stop_tr(struct trace_array *tr)
2469 {
2470 	struct trace_buffer *buffer;
2471 	unsigned long flags;
2472 
2473 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2474 	if (tr->stop_count++)
2475 		goto out;
2476 
2477 	/* Prevent the buffers from switching */
2478 	arch_spin_lock(&tr->max_lock);
2479 
2480 	buffer = tr->array_buffer.buffer;
2481 	if (buffer)
2482 		ring_buffer_record_disable(buffer);
2483 
2484 #ifdef CONFIG_TRACER_MAX_TRACE
2485 	buffer = tr->max_buffer.buffer;
2486 	if (buffer)
2487 		ring_buffer_record_disable(buffer);
2488 #endif
2489 
2490 	arch_spin_unlock(&tr->max_lock);
2491 
2492  out:
2493 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2494 }
2495 
2496 /**
2497  * tracing_stop - quick stop of the tracer
2498  *
2499  * Light weight way to stop tracing. Use in conjunction with
2500  * tracing_start.
2501  */
2502 void tracing_stop(void)
2503 {
2504 	return tracing_stop_tr(&global_trace);
2505 }
2506 
2507 /*
2508  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2509  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2510  * simplifies those functions and keeps them in sync.
2511  */
2512 enum print_line_t trace_handle_return(struct trace_seq *s)
2513 {
2514 	return trace_seq_has_overflowed(s) ?
2515 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2516 }
2517 EXPORT_SYMBOL_GPL(trace_handle_return);
2518 
2519 static unsigned short migration_disable_value(void)
2520 {
2521 #if defined(CONFIG_SMP)
2522 	return current->migration_disabled;
2523 #else
2524 	return 0;
2525 #endif
2526 }
2527 
2528 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2529 {
2530 	unsigned int trace_flags = irqs_status;
2531 	unsigned int pc;
2532 
2533 	pc = preempt_count();
2534 
2535 	if (pc & NMI_MASK)
2536 		trace_flags |= TRACE_FLAG_NMI;
2537 	if (pc & HARDIRQ_MASK)
2538 		trace_flags |= TRACE_FLAG_HARDIRQ;
2539 	if (in_serving_softirq())
2540 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2541 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2542 		trace_flags |= TRACE_FLAG_BH_OFF;
2543 
2544 	if (tif_need_resched())
2545 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2546 	if (test_preempt_need_resched())
2547 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2548 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2549 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2550 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2551 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2552 }
2553 
2554 struct ring_buffer_event *
2555 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2556 			  int type,
2557 			  unsigned long len,
2558 			  unsigned int trace_ctx)
2559 {
2560 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2561 }
2562 
2563 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2564 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2565 static int trace_buffered_event_ref;
2566 
2567 /**
2568  * trace_buffered_event_enable - enable buffering events
2569  *
2570  * When events are being filtered, it is quicker to use a temporary
2571  * buffer to write the event data into if there's a likely chance
2572  * that it will not be committed. The discard of the ring buffer
2573  * is not as fast as committing, and is much slower than copying
2574  * a commit.
2575  *
2576  * When an event is to be filtered, allocate per cpu buffers to
2577  * write the event data into, and if the event is filtered and discarded
2578  * it is simply dropped, otherwise, the entire data is to be committed
2579  * in one shot.
2580  */
2581 void trace_buffered_event_enable(void)
2582 {
2583 	struct ring_buffer_event *event;
2584 	struct page *page;
2585 	int cpu;
2586 
2587 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2588 
2589 	if (trace_buffered_event_ref++)
2590 		return;
2591 
2592 	for_each_tracing_cpu(cpu) {
2593 		page = alloc_pages_node(cpu_to_node(cpu),
2594 					GFP_KERNEL | __GFP_NORETRY, 0);
2595 		/* This is just an optimization and can handle failures */
2596 		if (!page) {
2597 			pr_err("Failed to allocate event buffer\n");
2598 			break;
2599 		}
2600 
2601 		event = page_address(page);
2602 		memset(event, 0, sizeof(*event));
2603 
2604 		per_cpu(trace_buffered_event, cpu) = event;
2605 
2606 		preempt_disable();
2607 		if (cpu == smp_processor_id() &&
2608 		    __this_cpu_read(trace_buffered_event) !=
2609 		    per_cpu(trace_buffered_event, cpu))
2610 			WARN_ON_ONCE(1);
2611 		preempt_enable();
2612 	}
2613 }
2614 
2615 static void enable_trace_buffered_event(void *data)
2616 {
2617 	/* Probably not needed, but do it anyway */
2618 	smp_rmb();
2619 	this_cpu_dec(trace_buffered_event_cnt);
2620 }
2621 
2622 static void disable_trace_buffered_event(void *data)
2623 {
2624 	this_cpu_inc(trace_buffered_event_cnt);
2625 }
2626 
2627 /**
2628  * trace_buffered_event_disable - disable buffering events
2629  *
2630  * When a filter is removed, it is faster to not use the buffered
2631  * events, and to commit directly into the ring buffer. Free up
2632  * the temp buffers when there are no more users. This requires
2633  * special synchronization with current events.
2634  */
2635 void trace_buffered_event_disable(void)
2636 {
2637 	int cpu;
2638 
2639 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2640 
2641 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2642 		return;
2643 
2644 	if (--trace_buffered_event_ref)
2645 		return;
2646 
2647 	/* For each CPU, set the buffer as used. */
2648 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2649 			 NULL, true);
2650 
2651 	/* Wait for all current users to finish */
2652 	synchronize_rcu();
2653 
2654 	for_each_tracing_cpu(cpu) {
2655 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2656 		per_cpu(trace_buffered_event, cpu) = NULL;
2657 	}
2658 
2659 	/*
2660 	 * Wait for all CPUs that potentially started checking if they can use
2661 	 * their event buffer only after the previous synchronize_rcu() call and
2662 	 * they still read a valid pointer from trace_buffered_event. It must be
2663 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2664 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2665 	 */
2666 	synchronize_rcu();
2667 
2668 	/* For each CPU, relinquish the buffer */
2669 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2670 			 true);
2671 }
2672 
2673 static struct trace_buffer *temp_buffer;
2674 
2675 struct ring_buffer_event *
2676 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2677 			  struct trace_event_file *trace_file,
2678 			  int type, unsigned long len,
2679 			  unsigned int trace_ctx)
2680 {
2681 	struct ring_buffer_event *entry;
2682 	struct trace_array *tr = trace_file->tr;
2683 	int val;
2684 
2685 	*current_rb = tr->array_buffer.buffer;
2686 
2687 	if (!tr->no_filter_buffering_ref &&
2688 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2689 		preempt_disable_notrace();
2690 		/*
2691 		 * Filtering is on, so try to use the per cpu buffer first.
2692 		 * This buffer will simulate a ring_buffer_event,
2693 		 * where the type_len is zero and the array[0] will
2694 		 * hold the full length.
2695 		 * (see include/linux/ring-buffer.h for details on
2696 		 *  how the ring_buffer_event is structured).
2697 		 *
2698 		 * Using a temp buffer during filtering and copying it
2699 		 * on a matched filter is quicker than writing directly
2700 		 * into the ring buffer and then discarding it when
2701 		 * it doesn't match. That is because the discard
2702 		 * requires several atomic operations to get right.
2703 		 * Copying on match and doing nothing on a failed match
2704 		 * is still quicker than no copy on match, but having
2705 		 * to discard out of the ring buffer on a failed match.
2706 		 */
2707 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2708 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2709 
2710 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2711 
2712 			/*
2713 			 * Preemption is disabled, but interrupts and NMIs
2714 			 * can still come in now. If that happens after
2715 			 * the above increment, then it will have to go
2716 			 * back to the old method of allocating the event
2717 			 * on the ring buffer, and if the filter fails, it
2718 			 * will have to call ring_buffer_discard_commit()
2719 			 * to remove it.
2720 			 *
2721 			 * Need to also check the unlikely case that the
2722 			 * length is bigger than the temp buffer size.
2723 			 * If that happens, then the reserve is pretty much
2724 			 * guaranteed to fail, as the ring buffer currently
2725 			 * only allows events less than a page. But that may
2726 			 * change in the future, so let the ring buffer reserve
2727 			 * handle the failure in that case.
2728 			 */
2729 			if (val == 1 && likely(len <= max_len)) {
2730 				trace_event_setup(entry, type, trace_ctx);
2731 				entry->array[0] = len;
2732 				/* Return with preemption disabled */
2733 				return entry;
2734 			}
2735 			this_cpu_dec(trace_buffered_event_cnt);
2736 		}
2737 		/* __trace_buffer_lock_reserve() disables preemption */
2738 		preempt_enable_notrace();
2739 	}
2740 
2741 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2742 					    trace_ctx);
2743 	/*
2744 	 * If tracing is off, but we have triggers enabled
2745 	 * we still need to look at the event data. Use the temp_buffer
2746 	 * to store the trace event for the trigger to use. It's recursive
2747 	 * safe and will not be recorded anywhere.
2748 	 */
2749 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2750 		*current_rb = temp_buffer;
2751 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2752 						    trace_ctx);
2753 	}
2754 	return entry;
2755 }
2756 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2757 
2758 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2759 static DEFINE_MUTEX(tracepoint_printk_mutex);
2760 
2761 static void output_printk(struct trace_event_buffer *fbuffer)
2762 {
2763 	struct trace_event_call *event_call;
2764 	struct trace_event_file *file;
2765 	struct trace_event *event;
2766 	unsigned long flags;
2767 	struct trace_iterator *iter = tracepoint_print_iter;
2768 
2769 	/* We should never get here if iter is NULL */
2770 	if (WARN_ON_ONCE(!iter))
2771 		return;
2772 
2773 	event_call = fbuffer->trace_file->event_call;
2774 	if (!event_call || !event_call->event.funcs ||
2775 	    !event_call->event.funcs->trace)
2776 		return;
2777 
2778 	file = fbuffer->trace_file;
2779 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2780 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2781 	     !filter_match_preds(file->filter, fbuffer->entry)))
2782 		return;
2783 
2784 	event = &fbuffer->trace_file->event_call->event;
2785 
2786 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2787 	trace_seq_init(&iter->seq);
2788 	iter->ent = fbuffer->entry;
2789 	event_call->event.funcs->trace(iter, 0, event);
2790 	trace_seq_putc(&iter->seq, 0);
2791 	printk("%s", iter->seq.buffer);
2792 
2793 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2794 }
2795 
2796 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2797 			     void *buffer, size_t *lenp,
2798 			     loff_t *ppos)
2799 {
2800 	int save_tracepoint_printk;
2801 	int ret;
2802 
2803 	guard(mutex)(&tracepoint_printk_mutex);
2804 	save_tracepoint_printk = tracepoint_printk;
2805 
2806 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2807 
2808 	/*
2809 	 * This will force exiting early, as tracepoint_printk
2810 	 * is always zero when tracepoint_printk_iter is not allocated
2811 	 */
2812 	if (!tracepoint_print_iter)
2813 		tracepoint_printk = 0;
2814 
2815 	if (save_tracepoint_printk == tracepoint_printk)
2816 		return ret;
2817 
2818 	if (tracepoint_printk)
2819 		static_key_enable(&tracepoint_printk_key.key);
2820 	else
2821 		static_key_disable(&tracepoint_printk_key.key);
2822 
2823 	return ret;
2824 }
2825 
2826 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2827 {
2828 	enum event_trigger_type tt = ETT_NONE;
2829 	struct trace_event_file *file = fbuffer->trace_file;
2830 
2831 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2832 			fbuffer->entry, &tt))
2833 		goto discard;
2834 
2835 	if (static_key_false(&tracepoint_printk_key.key))
2836 		output_printk(fbuffer);
2837 
2838 	if (static_branch_unlikely(&trace_event_exports_enabled))
2839 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2840 
2841 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2842 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2843 
2844 discard:
2845 	if (tt)
2846 		event_triggers_post_call(file, tt);
2847 
2848 }
2849 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2850 
2851 /*
2852  * Skip 3:
2853  *
2854  *   trace_buffer_unlock_commit_regs()
2855  *   trace_event_buffer_commit()
2856  *   trace_event_raw_event_xxx()
2857  */
2858 # define STACK_SKIP 3
2859 
2860 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2861 				     struct trace_buffer *buffer,
2862 				     struct ring_buffer_event *event,
2863 				     unsigned int trace_ctx,
2864 				     struct pt_regs *regs)
2865 {
2866 	__buffer_unlock_commit(buffer, event);
2867 
2868 	/*
2869 	 * If regs is not set, then skip the necessary functions.
2870 	 * Note, we can still get here via blktrace, wakeup tracer
2871 	 * and mmiotrace, but that's ok if they lose a function or
2872 	 * two. They are not that meaningful.
2873 	 */
2874 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2875 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2876 }
2877 
2878 /*
2879  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2880  */
2881 void
2882 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2883 				   struct ring_buffer_event *event)
2884 {
2885 	__buffer_unlock_commit(buffer, event);
2886 }
2887 
2888 void
2889 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2890 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2891 {
2892 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2893 	struct ring_buffer_event *event;
2894 	struct ftrace_entry *entry;
2895 	int size = sizeof(*entry);
2896 
2897 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2898 
2899 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2900 					    trace_ctx);
2901 	if (!event)
2902 		return;
2903 	entry	= ring_buffer_event_data(event);
2904 	entry->ip			= ip;
2905 	entry->parent_ip		= parent_ip;
2906 
2907 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2908 	if (fregs) {
2909 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2910 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2911 	}
2912 #endif
2913 
2914 	if (static_branch_unlikely(&trace_function_exports_enabled))
2915 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2916 	__buffer_unlock_commit(buffer, event);
2917 }
2918 
2919 #ifdef CONFIG_STACKTRACE
2920 
2921 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2922 #define FTRACE_KSTACK_NESTING	4
2923 
2924 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2925 
2926 struct ftrace_stack {
2927 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2928 };
2929 
2930 
2931 struct ftrace_stacks {
2932 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2933 };
2934 
2935 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2936 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2937 
2938 static void __ftrace_trace_stack(struct trace_array *tr,
2939 				 struct trace_buffer *buffer,
2940 				 unsigned int trace_ctx,
2941 				 int skip, struct pt_regs *regs)
2942 {
2943 	struct ring_buffer_event *event;
2944 	unsigned int size, nr_entries;
2945 	struct ftrace_stack *fstack;
2946 	struct stack_entry *entry;
2947 	int stackidx;
2948 
2949 	/*
2950 	 * Add one, for this function and the call to save_stack_trace()
2951 	 * If regs is set, then these functions will not be in the way.
2952 	 */
2953 #ifndef CONFIG_UNWINDER_ORC
2954 	if (!regs)
2955 		skip++;
2956 #endif
2957 
2958 	preempt_disable_notrace();
2959 
2960 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2961 
2962 	/* This should never happen. If it does, yell once and skip */
2963 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2964 		goto out;
2965 
2966 	/*
2967 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2968 	 * interrupt will either see the value pre increment or post
2969 	 * increment. If the interrupt happens pre increment it will have
2970 	 * restored the counter when it returns.  We just need a barrier to
2971 	 * keep gcc from moving things around.
2972 	 */
2973 	barrier();
2974 
2975 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2976 	size = ARRAY_SIZE(fstack->calls);
2977 
2978 	if (regs) {
2979 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2980 						   size, skip);
2981 	} else {
2982 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2983 	}
2984 
2985 #ifdef CONFIG_DYNAMIC_FTRACE
2986 	/* Mark entry of stack trace as trampoline code */
2987 	if (tr->ops && tr->ops->trampoline) {
2988 		unsigned long tramp_start = tr->ops->trampoline;
2989 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2990 		unsigned long *calls = fstack->calls;
2991 
2992 		for (int i = 0; i < nr_entries; i++) {
2993 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2994 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2995 		}
2996 	}
2997 #endif
2998 
2999 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3000 				    struct_size(entry, caller, nr_entries),
3001 				    trace_ctx);
3002 	if (!event)
3003 		goto out;
3004 	entry = ring_buffer_event_data(event);
3005 
3006 	entry->size = nr_entries;
3007 	memcpy(&entry->caller, fstack->calls,
3008 	       flex_array_size(entry, caller, nr_entries));
3009 
3010 	__buffer_unlock_commit(buffer, event);
3011 
3012  out:
3013 	/* Again, don't let gcc optimize things here */
3014 	barrier();
3015 	__this_cpu_dec(ftrace_stack_reserve);
3016 	preempt_enable_notrace();
3017 
3018 }
3019 
3020 static inline void ftrace_trace_stack(struct trace_array *tr,
3021 				      struct trace_buffer *buffer,
3022 				      unsigned int trace_ctx,
3023 				      int skip, struct pt_regs *regs)
3024 {
3025 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3026 		return;
3027 
3028 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3029 }
3030 
3031 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3032 		   int skip)
3033 {
3034 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3035 
3036 	if (rcu_is_watching()) {
3037 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3038 		return;
3039 	}
3040 
3041 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3042 		return;
3043 
3044 	/*
3045 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3046 	 * but if the above rcu_is_watching() failed, then the NMI
3047 	 * triggered someplace critical, and ct_irq_enter() should
3048 	 * not be called from NMI.
3049 	 */
3050 	if (unlikely(in_nmi()))
3051 		return;
3052 
3053 	ct_irq_enter_irqson();
3054 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3055 	ct_irq_exit_irqson();
3056 }
3057 
3058 /**
3059  * trace_dump_stack - record a stack back trace in the trace buffer
3060  * @skip: Number of functions to skip (helper handlers)
3061  */
3062 void trace_dump_stack(int skip)
3063 {
3064 	if (tracing_disabled || tracing_selftest_running)
3065 		return;
3066 
3067 #ifndef CONFIG_UNWINDER_ORC
3068 	/* Skip 1 to skip this function. */
3069 	skip++;
3070 #endif
3071 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3072 				tracing_gen_ctx(), skip, NULL);
3073 }
3074 EXPORT_SYMBOL_GPL(trace_dump_stack);
3075 
3076 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3077 static DEFINE_PER_CPU(int, user_stack_count);
3078 
3079 static void
3080 ftrace_trace_userstack(struct trace_array *tr,
3081 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3082 {
3083 	struct ring_buffer_event *event;
3084 	struct userstack_entry *entry;
3085 
3086 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3087 		return;
3088 
3089 	/*
3090 	 * NMIs can not handle page faults, even with fix ups.
3091 	 * The save user stack can (and often does) fault.
3092 	 */
3093 	if (unlikely(in_nmi()))
3094 		return;
3095 
3096 	/*
3097 	 * prevent recursion, since the user stack tracing may
3098 	 * trigger other kernel events.
3099 	 */
3100 	preempt_disable();
3101 	if (__this_cpu_read(user_stack_count))
3102 		goto out;
3103 
3104 	__this_cpu_inc(user_stack_count);
3105 
3106 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3107 					    sizeof(*entry), trace_ctx);
3108 	if (!event)
3109 		goto out_drop_count;
3110 	entry	= ring_buffer_event_data(event);
3111 
3112 	entry->tgid		= current->tgid;
3113 	memset(&entry->caller, 0, sizeof(entry->caller));
3114 
3115 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3116 	__buffer_unlock_commit(buffer, event);
3117 
3118  out_drop_count:
3119 	__this_cpu_dec(user_stack_count);
3120  out:
3121 	preempt_enable();
3122 }
3123 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3124 static void ftrace_trace_userstack(struct trace_array *tr,
3125 				   struct trace_buffer *buffer,
3126 				   unsigned int trace_ctx)
3127 {
3128 }
3129 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3130 
3131 #endif /* CONFIG_STACKTRACE */
3132 
3133 static inline void
3134 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3135 			  unsigned long long delta)
3136 {
3137 	entry->bottom_delta_ts = delta & U32_MAX;
3138 	entry->top_delta_ts = (delta >> 32);
3139 }
3140 
3141 void trace_last_func_repeats(struct trace_array *tr,
3142 			     struct trace_func_repeats *last_info,
3143 			     unsigned int trace_ctx)
3144 {
3145 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3146 	struct func_repeats_entry *entry;
3147 	struct ring_buffer_event *event;
3148 	u64 delta;
3149 
3150 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3151 					    sizeof(*entry), trace_ctx);
3152 	if (!event)
3153 		return;
3154 
3155 	delta = ring_buffer_event_time_stamp(buffer, event) -
3156 		last_info->ts_last_call;
3157 
3158 	entry = ring_buffer_event_data(event);
3159 	entry->ip = last_info->ip;
3160 	entry->parent_ip = last_info->parent_ip;
3161 	entry->count = last_info->count;
3162 	func_repeats_set_delta_ts(entry, delta);
3163 
3164 	__buffer_unlock_commit(buffer, event);
3165 }
3166 
3167 /* created for use with alloc_percpu */
3168 struct trace_buffer_struct {
3169 	int nesting;
3170 	char buffer[4][TRACE_BUF_SIZE];
3171 };
3172 
3173 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3174 
3175 /*
3176  * This allows for lockless recording.  If we're nested too deeply, then
3177  * this returns NULL.
3178  */
3179 static char *get_trace_buf(void)
3180 {
3181 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3182 
3183 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3184 		return NULL;
3185 
3186 	buffer->nesting++;
3187 
3188 	/* Interrupts must see nesting incremented before we use the buffer */
3189 	barrier();
3190 	return &buffer->buffer[buffer->nesting - 1][0];
3191 }
3192 
3193 static void put_trace_buf(void)
3194 {
3195 	/* Don't let the decrement of nesting leak before this */
3196 	barrier();
3197 	this_cpu_dec(trace_percpu_buffer->nesting);
3198 }
3199 
3200 static int alloc_percpu_trace_buffer(void)
3201 {
3202 	struct trace_buffer_struct __percpu *buffers;
3203 
3204 	if (trace_percpu_buffer)
3205 		return 0;
3206 
3207 	buffers = alloc_percpu(struct trace_buffer_struct);
3208 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3209 		return -ENOMEM;
3210 
3211 	trace_percpu_buffer = buffers;
3212 	return 0;
3213 }
3214 
3215 static int buffers_allocated;
3216 
3217 void trace_printk_init_buffers(void)
3218 {
3219 	if (buffers_allocated)
3220 		return;
3221 
3222 	if (alloc_percpu_trace_buffer())
3223 		return;
3224 
3225 	/* trace_printk() is for debug use only. Don't use it in production. */
3226 
3227 	pr_warn("\n");
3228 	pr_warn("**********************************************************\n");
3229 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3230 	pr_warn("**                                                      **\n");
3231 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3232 	pr_warn("**                                                      **\n");
3233 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3234 	pr_warn("** unsafe for production use.                           **\n");
3235 	pr_warn("**                                                      **\n");
3236 	pr_warn("** If you see this message and you are not debugging    **\n");
3237 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3238 	pr_warn("**                                                      **\n");
3239 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3240 	pr_warn("**********************************************************\n");
3241 
3242 	/* Expand the buffers to set size */
3243 	tracing_update_buffers(&global_trace);
3244 
3245 	buffers_allocated = 1;
3246 
3247 	/*
3248 	 * trace_printk_init_buffers() can be called by modules.
3249 	 * If that happens, then we need to start cmdline recording
3250 	 * directly here. If the global_trace.buffer is already
3251 	 * allocated here, then this was called by module code.
3252 	 */
3253 	if (global_trace.array_buffer.buffer)
3254 		tracing_start_cmdline_record();
3255 }
3256 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3257 
3258 void trace_printk_start_comm(void)
3259 {
3260 	/* Start tracing comms if trace printk is set */
3261 	if (!buffers_allocated)
3262 		return;
3263 	tracing_start_cmdline_record();
3264 }
3265 
3266 static void trace_printk_start_stop_comm(int enabled)
3267 {
3268 	if (!buffers_allocated)
3269 		return;
3270 
3271 	if (enabled)
3272 		tracing_start_cmdline_record();
3273 	else
3274 		tracing_stop_cmdline_record();
3275 }
3276 
3277 /**
3278  * trace_vbprintk - write binary msg to tracing buffer
3279  * @ip:    The address of the caller
3280  * @fmt:   The string format to write to the buffer
3281  * @args:  Arguments for @fmt
3282  */
3283 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3284 {
3285 	struct ring_buffer_event *event;
3286 	struct trace_buffer *buffer;
3287 	struct trace_array *tr = READ_ONCE(printk_trace);
3288 	struct bprint_entry *entry;
3289 	unsigned int trace_ctx;
3290 	char *tbuffer;
3291 	int len = 0, size;
3292 
3293 	if (!printk_binsafe(tr))
3294 		return trace_vprintk(ip, fmt, args);
3295 
3296 	if (unlikely(tracing_selftest_running || tracing_disabled))
3297 		return 0;
3298 
3299 	/* Don't pollute graph traces with trace_vprintk internals */
3300 	pause_graph_tracing();
3301 
3302 	trace_ctx = tracing_gen_ctx();
3303 	preempt_disable_notrace();
3304 
3305 	tbuffer = get_trace_buf();
3306 	if (!tbuffer) {
3307 		len = 0;
3308 		goto out_nobuffer;
3309 	}
3310 
3311 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3312 
3313 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3314 		goto out_put;
3315 
3316 	size = sizeof(*entry) + sizeof(u32) * len;
3317 	buffer = tr->array_buffer.buffer;
3318 	ring_buffer_nest_start(buffer);
3319 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3320 					    trace_ctx);
3321 	if (!event)
3322 		goto out;
3323 	entry = ring_buffer_event_data(event);
3324 	entry->ip			= ip;
3325 	entry->fmt			= fmt;
3326 
3327 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3328 	__buffer_unlock_commit(buffer, event);
3329 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3330 
3331 out:
3332 	ring_buffer_nest_end(buffer);
3333 out_put:
3334 	put_trace_buf();
3335 
3336 out_nobuffer:
3337 	preempt_enable_notrace();
3338 	unpause_graph_tracing();
3339 
3340 	return len;
3341 }
3342 EXPORT_SYMBOL_GPL(trace_vbprintk);
3343 
3344 static __printf(3, 0)
3345 int __trace_array_vprintk(struct trace_buffer *buffer,
3346 			  unsigned long ip, const char *fmt, va_list args)
3347 {
3348 	struct ring_buffer_event *event;
3349 	int len = 0, size;
3350 	struct print_entry *entry;
3351 	unsigned int trace_ctx;
3352 	char *tbuffer;
3353 
3354 	if (tracing_disabled)
3355 		return 0;
3356 
3357 	/* Don't pollute graph traces with trace_vprintk internals */
3358 	pause_graph_tracing();
3359 
3360 	trace_ctx = tracing_gen_ctx();
3361 	preempt_disable_notrace();
3362 
3363 
3364 	tbuffer = get_trace_buf();
3365 	if (!tbuffer) {
3366 		len = 0;
3367 		goto out_nobuffer;
3368 	}
3369 
3370 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3371 
3372 	size = sizeof(*entry) + len + 1;
3373 	ring_buffer_nest_start(buffer);
3374 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3375 					    trace_ctx);
3376 	if (!event)
3377 		goto out;
3378 	entry = ring_buffer_event_data(event);
3379 	entry->ip = ip;
3380 
3381 	memcpy(&entry->buf, tbuffer, len + 1);
3382 	__buffer_unlock_commit(buffer, event);
3383 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3384 
3385 out:
3386 	ring_buffer_nest_end(buffer);
3387 	put_trace_buf();
3388 
3389 out_nobuffer:
3390 	preempt_enable_notrace();
3391 	unpause_graph_tracing();
3392 
3393 	return len;
3394 }
3395 
3396 int trace_array_vprintk(struct trace_array *tr,
3397 			unsigned long ip, const char *fmt, va_list args)
3398 {
3399 	if (tracing_selftest_running && tr == &global_trace)
3400 		return 0;
3401 
3402 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3403 }
3404 
3405 /**
3406  * trace_array_printk - Print a message to a specific instance
3407  * @tr: The instance trace_array descriptor
3408  * @ip: The instruction pointer that this is called from.
3409  * @fmt: The format to print (printf format)
3410  *
3411  * If a subsystem sets up its own instance, they have the right to
3412  * printk strings into their tracing instance buffer using this
3413  * function. Note, this function will not write into the top level
3414  * buffer (use trace_printk() for that), as writing into the top level
3415  * buffer should only have events that can be individually disabled.
3416  * trace_printk() is only used for debugging a kernel, and should not
3417  * be ever incorporated in normal use.
3418  *
3419  * trace_array_printk() can be used, as it will not add noise to the
3420  * top level tracing buffer.
3421  *
3422  * Note, trace_array_init_printk() must be called on @tr before this
3423  * can be used.
3424  */
3425 int trace_array_printk(struct trace_array *tr,
3426 		       unsigned long ip, const char *fmt, ...)
3427 {
3428 	int ret;
3429 	va_list ap;
3430 
3431 	if (!tr)
3432 		return -ENOENT;
3433 
3434 	/* This is only allowed for created instances */
3435 	if (tr == &global_trace)
3436 		return 0;
3437 
3438 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3439 		return 0;
3440 
3441 	va_start(ap, fmt);
3442 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3443 	va_end(ap);
3444 	return ret;
3445 }
3446 EXPORT_SYMBOL_GPL(trace_array_printk);
3447 
3448 /**
3449  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3450  * @tr: The trace array to initialize the buffers for
3451  *
3452  * As trace_array_printk() only writes into instances, they are OK to
3453  * have in the kernel (unlike trace_printk()). This needs to be called
3454  * before trace_array_printk() can be used on a trace_array.
3455  */
3456 int trace_array_init_printk(struct trace_array *tr)
3457 {
3458 	if (!tr)
3459 		return -ENOENT;
3460 
3461 	/* This is only allowed for created instances */
3462 	if (tr == &global_trace)
3463 		return -EINVAL;
3464 
3465 	return alloc_percpu_trace_buffer();
3466 }
3467 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3468 
3469 int trace_array_printk_buf(struct trace_buffer *buffer,
3470 			   unsigned long ip, const char *fmt, ...)
3471 {
3472 	int ret;
3473 	va_list ap;
3474 
3475 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3476 		return 0;
3477 
3478 	va_start(ap, fmt);
3479 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3480 	va_end(ap);
3481 	return ret;
3482 }
3483 
3484 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3485 {
3486 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3487 }
3488 EXPORT_SYMBOL_GPL(trace_vprintk);
3489 
3490 static void trace_iterator_increment(struct trace_iterator *iter)
3491 {
3492 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3493 
3494 	iter->idx++;
3495 	if (buf_iter)
3496 		ring_buffer_iter_advance(buf_iter);
3497 }
3498 
3499 static struct trace_entry *
3500 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3501 		unsigned long *lost_events)
3502 {
3503 	struct ring_buffer_event *event;
3504 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3505 
3506 	if (buf_iter) {
3507 		event = ring_buffer_iter_peek(buf_iter, ts);
3508 		if (lost_events)
3509 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3510 				(unsigned long)-1 : 0;
3511 	} else {
3512 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3513 					 lost_events);
3514 	}
3515 
3516 	if (event) {
3517 		iter->ent_size = ring_buffer_event_length(event);
3518 		return ring_buffer_event_data(event);
3519 	}
3520 	iter->ent_size = 0;
3521 	return NULL;
3522 }
3523 
3524 static struct trace_entry *
3525 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3526 		  unsigned long *missing_events, u64 *ent_ts)
3527 {
3528 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3529 	struct trace_entry *ent, *next = NULL;
3530 	unsigned long lost_events = 0, next_lost = 0;
3531 	int cpu_file = iter->cpu_file;
3532 	u64 next_ts = 0, ts;
3533 	int next_cpu = -1;
3534 	int next_size = 0;
3535 	int cpu;
3536 
3537 	/*
3538 	 * If we are in a per_cpu trace file, don't bother by iterating over
3539 	 * all cpu and peek directly.
3540 	 */
3541 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3542 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3543 			return NULL;
3544 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3545 		if (ent_cpu)
3546 			*ent_cpu = cpu_file;
3547 
3548 		return ent;
3549 	}
3550 
3551 	for_each_tracing_cpu(cpu) {
3552 
3553 		if (ring_buffer_empty_cpu(buffer, cpu))
3554 			continue;
3555 
3556 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3557 
3558 		/*
3559 		 * Pick the entry with the smallest timestamp:
3560 		 */
3561 		if (ent && (!next || ts < next_ts)) {
3562 			next = ent;
3563 			next_cpu = cpu;
3564 			next_ts = ts;
3565 			next_lost = lost_events;
3566 			next_size = iter->ent_size;
3567 		}
3568 	}
3569 
3570 	iter->ent_size = next_size;
3571 
3572 	if (ent_cpu)
3573 		*ent_cpu = next_cpu;
3574 
3575 	if (ent_ts)
3576 		*ent_ts = next_ts;
3577 
3578 	if (missing_events)
3579 		*missing_events = next_lost;
3580 
3581 	return next;
3582 }
3583 
3584 #define STATIC_FMT_BUF_SIZE	128
3585 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3586 
3587 char *trace_iter_expand_format(struct trace_iterator *iter)
3588 {
3589 	char *tmp;
3590 
3591 	/*
3592 	 * iter->tr is NULL when used with tp_printk, which makes
3593 	 * this get called where it is not safe to call krealloc().
3594 	 */
3595 	if (!iter->tr || iter->fmt == static_fmt_buf)
3596 		return NULL;
3597 
3598 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3599 		       GFP_KERNEL);
3600 	if (tmp) {
3601 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3602 		iter->fmt = tmp;
3603 	}
3604 
3605 	return tmp;
3606 }
3607 
3608 /* Returns true if the string is safe to dereference from an event */
3609 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3610 {
3611 	unsigned long addr = (unsigned long)str;
3612 	struct trace_event *trace_event;
3613 	struct trace_event_call *event;
3614 
3615 	/* OK if part of the event data */
3616 	if ((addr >= (unsigned long)iter->ent) &&
3617 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3618 		return true;
3619 
3620 	/* OK if part of the temp seq buffer */
3621 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3622 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3623 		return true;
3624 
3625 	/* Core rodata can not be freed */
3626 	if (is_kernel_rodata(addr))
3627 		return true;
3628 
3629 	if (trace_is_tracepoint_string(str))
3630 		return true;
3631 
3632 	/*
3633 	 * Now this could be a module event, referencing core module
3634 	 * data, which is OK.
3635 	 */
3636 	if (!iter->ent)
3637 		return false;
3638 
3639 	trace_event = ftrace_find_event(iter->ent->type);
3640 	if (!trace_event)
3641 		return false;
3642 
3643 	event = container_of(trace_event, struct trace_event_call, event);
3644 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3645 		return false;
3646 
3647 	/* Would rather have rodata, but this will suffice */
3648 	if (within_module_core(addr, event->module))
3649 		return true;
3650 
3651 	return false;
3652 }
3653 
3654 /**
3655  * ignore_event - Check dereferenced fields while writing to the seq buffer
3656  * @iter: The iterator that holds the seq buffer and the event being printed
3657  *
3658  * At boot up, test_event_printk() will flag any event that dereferences
3659  * a string with "%s" that does exist in the ring buffer. It may still
3660  * be valid, as the string may point to a static string in the kernel
3661  * rodata that never gets freed. But if the string pointer is pointing
3662  * to something that was allocated, there's a chance that it can be freed
3663  * by the time the user reads the trace. This would cause a bad memory
3664  * access by the kernel and possibly crash the system.
3665  *
3666  * This function will check if the event has any fields flagged as needing
3667  * to be checked at runtime and perform those checks.
3668  *
3669  * If it is found that a field is unsafe, it will write into the @iter->seq
3670  * a message stating what was found to be unsafe.
3671  *
3672  * @return: true if the event is unsafe and should be ignored,
3673  *          false otherwise.
3674  */
3675 bool ignore_event(struct trace_iterator *iter)
3676 {
3677 	struct ftrace_event_field *field;
3678 	struct trace_event *trace_event;
3679 	struct trace_event_call *event;
3680 	struct list_head *head;
3681 	struct trace_seq *seq;
3682 	const void *ptr;
3683 
3684 	trace_event = ftrace_find_event(iter->ent->type);
3685 
3686 	seq = &iter->seq;
3687 
3688 	if (!trace_event) {
3689 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3690 		return true;
3691 	}
3692 
3693 	event = container_of(trace_event, struct trace_event_call, event);
3694 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3695 		return false;
3696 
3697 	head = trace_get_fields(event);
3698 	if (!head) {
3699 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3700 				 trace_event_name(event));
3701 		return true;
3702 	}
3703 
3704 	/* Offsets are from the iter->ent that points to the raw event */
3705 	ptr = iter->ent;
3706 
3707 	list_for_each_entry(field, head, link) {
3708 		const char *str;
3709 		bool good;
3710 
3711 		if (!field->needs_test)
3712 			continue;
3713 
3714 		str = *(const char **)(ptr + field->offset);
3715 
3716 		good = trace_safe_str(iter, str);
3717 
3718 		/*
3719 		 * If you hit this warning, it is likely that the
3720 		 * trace event in question used %s on a string that
3721 		 * was saved at the time of the event, but may not be
3722 		 * around when the trace is read. Use __string(),
3723 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3724 		 * instead. See samples/trace_events/trace-events-sample.h
3725 		 * for reference.
3726 		 */
3727 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3728 			      trace_event_name(event), field->name)) {
3729 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3730 					 trace_event_name(event), field->name);
3731 			return true;
3732 		}
3733 	}
3734 	return false;
3735 }
3736 
3737 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3738 {
3739 	const char *p, *new_fmt;
3740 	char *q;
3741 
3742 	if (WARN_ON_ONCE(!fmt))
3743 		return fmt;
3744 
3745 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3746 		return fmt;
3747 
3748 	p = fmt;
3749 	new_fmt = q = iter->fmt;
3750 	while (*p) {
3751 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3752 			if (!trace_iter_expand_format(iter))
3753 				return fmt;
3754 
3755 			q += iter->fmt - new_fmt;
3756 			new_fmt = iter->fmt;
3757 		}
3758 
3759 		*q++ = *p++;
3760 
3761 		/* Replace %p with %px */
3762 		if (p[-1] == '%') {
3763 			if (p[0] == '%') {
3764 				*q++ = *p++;
3765 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3766 				*q++ = *p++;
3767 				*q++ = 'x';
3768 			}
3769 		}
3770 	}
3771 	*q = '\0';
3772 
3773 	return new_fmt;
3774 }
3775 
3776 #define STATIC_TEMP_BUF_SIZE	128
3777 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3778 
3779 /* Find the next real entry, without updating the iterator itself */
3780 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3781 					  int *ent_cpu, u64 *ent_ts)
3782 {
3783 	/* __find_next_entry will reset ent_size */
3784 	int ent_size = iter->ent_size;
3785 	struct trace_entry *entry;
3786 
3787 	/*
3788 	 * If called from ftrace_dump(), then the iter->temp buffer
3789 	 * will be the static_temp_buf and not created from kmalloc.
3790 	 * If the entry size is greater than the buffer, we can
3791 	 * not save it. Just return NULL in that case. This is only
3792 	 * used to add markers when two consecutive events' time
3793 	 * stamps have a large delta. See trace_print_lat_context()
3794 	 */
3795 	if (iter->temp == static_temp_buf &&
3796 	    STATIC_TEMP_BUF_SIZE < ent_size)
3797 		return NULL;
3798 
3799 	/*
3800 	 * The __find_next_entry() may call peek_next_entry(), which may
3801 	 * call ring_buffer_peek() that may make the contents of iter->ent
3802 	 * undefined. Need to copy iter->ent now.
3803 	 */
3804 	if (iter->ent && iter->ent != iter->temp) {
3805 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3806 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3807 			void *temp;
3808 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3809 			if (!temp)
3810 				return NULL;
3811 			kfree(iter->temp);
3812 			iter->temp = temp;
3813 			iter->temp_size = iter->ent_size;
3814 		}
3815 		memcpy(iter->temp, iter->ent, iter->ent_size);
3816 		iter->ent = iter->temp;
3817 	}
3818 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3819 	/* Put back the original ent_size */
3820 	iter->ent_size = ent_size;
3821 
3822 	return entry;
3823 }
3824 
3825 /* Find the next real entry, and increment the iterator to the next entry */
3826 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3827 {
3828 	iter->ent = __find_next_entry(iter, &iter->cpu,
3829 				      &iter->lost_events, &iter->ts);
3830 
3831 	if (iter->ent)
3832 		trace_iterator_increment(iter);
3833 
3834 	return iter->ent ? iter : NULL;
3835 }
3836 
3837 static void trace_consume(struct trace_iterator *iter)
3838 {
3839 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3840 			    &iter->lost_events);
3841 }
3842 
3843 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3844 {
3845 	struct trace_iterator *iter = m->private;
3846 	int i = (int)*pos;
3847 	void *ent;
3848 
3849 	WARN_ON_ONCE(iter->leftover);
3850 
3851 	(*pos)++;
3852 
3853 	/* can't go backwards */
3854 	if (iter->idx > i)
3855 		return NULL;
3856 
3857 	if (iter->idx < 0)
3858 		ent = trace_find_next_entry_inc(iter);
3859 	else
3860 		ent = iter;
3861 
3862 	while (ent && iter->idx < i)
3863 		ent = trace_find_next_entry_inc(iter);
3864 
3865 	iter->pos = *pos;
3866 
3867 	return ent;
3868 }
3869 
3870 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3871 {
3872 	struct ring_buffer_iter *buf_iter;
3873 	unsigned long entries = 0;
3874 	u64 ts;
3875 
3876 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3877 
3878 	buf_iter = trace_buffer_iter(iter, cpu);
3879 	if (!buf_iter)
3880 		return;
3881 
3882 	ring_buffer_iter_reset(buf_iter);
3883 
3884 	/*
3885 	 * We could have the case with the max latency tracers
3886 	 * that a reset never took place on a cpu. This is evident
3887 	 * by the timestamp being before the start of the buffer.
3888 	 */
3889 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3890 		if (ts >= iter->array_buffer->time_start)
3891 			break;
3892 		entries++;
3893 		ring_buffer_iter_advance(buf_iter);
3894 		/* This could be a big loop */
3895 		cond_resched();
3896 	}
3897 
3898 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3899 }
3900 
3901 /*
3902  * The current tracer is copied to avoid a global locking
3903  * all around.
3904  */
3905 static void *s_start(struct seq_file *m, loff_t *pos)
3906 {
3907 	struct trace_iterator *iter = m->private;
3908 	struct trace_array *tr = iter->tr;
3909 	int cpu_file = iter->cpu_file;
3910 	void *p = NULL;
3911 	loff_t l = 0;
3912 	int cpu;
3913 
3914 	mutex_lock(&trace_types_lock);
3915 	if (unlikely(tr->current_trace != iter->trace)) {
3916 		/* Close iter->trace before switching to the new current tracer */
3917 		if (iter->trace->close)
3918 			iter->trace->close(iter);
3919 		iter->trace = tr->current_trace;
3920 		/* Reopen the new current tracer */
3921 		if (iter->trace->open)
3922 			iter->trace->open(iter);
3923 	}
3924 	mutex_unlock(&trace_types_lock);
3925 
3926 #ifdef CONFIG_TRACER_MAX_TRACE
3927 	if (iter->snapshot && iter->trace->use_max_tr)
3928 		return ERR_PTR(-EBUSY);
3929 #endif
3930 
3931 	if (*pos != iter->pos) {
3932 		iter->ent = NULL;
3933 		iter->cpu = 0;
3934 		iter->idx = -1;
3935 
3936 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3937 			for_each_tracing_cpu(cpu)
3938 				tracing_iter_reset(iter, cpu);
3939 		} else
3940 			tracing_iter_reset(iter, cpu_file);
3941 
3942 		iter->leftover = 0;
3943 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3944 			;
3945 
3946 	} else {
3947 		/*
3948 		 * If we overflowed the seq_file before, then we want
3949 		 * to just reuse the trace_seq buffer again.
3950 		 */
3951 		if (iter->leftover)
3952 			p = iter;
3953 		else {
3954 			l = *pos - 1;
3955 			p = s_next(m, p, &l);
3956 		}
3957 	}
3958 
3959 	trace_event_read_lock();
3960 	trace_access_lock(cpu_file);
3961 	return p;
3962 }
3963 
3964 static void s_stop(struct seq_file *m, void *p)
3965 {
3966 	struct trace_iterator *iter = m->private;
3967 
3968 #ifdef CONFIG_TRACER_MAX_TRACE
3969 	if (iter->snapshot && iter->trace->use_max_tr)
3970 		return;
3971 #endif
3972 
3973 	trace_access_unlock(iter->cpu_file);
3974 	trace_event_read_unlock();
3975 }
3976 
3977 static void
3978 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3979 		      unsigned long *entries, int cpu)
3980 {
3981 	unsigned long count;
3982 
3983 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3984 	/*
3985 	 * If this buffer has skipped entries, then we hold all
3986 	 * entries for the trace and we need to ignore the
3987 	 * ones before the time stamp.
3988 	 */
3989 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3990 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3991 		/* total is the same as the entries */
3992 		*total = count;
3993 	} else
3994 		*total = count +
3995 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3996 	*entries = count;
3997 }
3998 
3999 static void
4000 get_total_entries(struct array_buffer *buf,
4001 		  unsigned long *total, unsigned long *entries)
4002 {
4003 	unsigned long t, e;
4004 	int cpu;
4005 
4006 	*total = 0;
4007 	*entries = 0;
4008 
4009 	for_each_tracing_cpu(cpu) {
4010 		get_total_entries_cpu(buf, &t, &e, cpu);
4011 		*total += t;
4012 		*entries += e;
4013 	}
4014 }
4015 
4016 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4017 {
4018 	unsigned long total, entries;
4019 
4020 	if (!tr)
4021 		tr = &global_trace;
4022 
4023 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4024 
4025 	return entries;
4026 }
4027 
4028 unsigned long trace_total_entries(struct trace_array *tr)
4029 {
4030 	unsigned long total, entries;
4031 
4032 	if (!tr)
4033 		tr = &global_trace;
4034 
4035 	get_total_entries(&tr->array_buffer, &total, &entries);
4036 
4037 	return entries;
4038 }
4039 
4040 static void print_lat_help_header(struct seq_file *m)
4041 {
4042 	seq_puts(m, "#                    _------=> CPU#            \n"
4043 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4044 		    "#                  | / _----=> need-resched    \n"
4045 		    "#                  || / _---=> hardirq/softirq \n"
4046 		    "#                  ||| / _--=> preempt-depth   \n"
4047 		    "#                  |||| / _-=> migrate-disable \n"
4048 		    "#                  ||||| /     delay           \n"
4049 		    "#  cmd     pid     |||||| time  |   caller     \n"
4050 		    "#     \\   /        ||||||  \\    |    /       \n");
4051 }
4052 
4053 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4054 {
4055 	unsigned long total;
4056 	unsigned long entries;
4057 
4058 	get_total_entries(buf, &total, &entries);
4059 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4060 		   entries, total, num_online_cpus());
4061 	seq_puts(m, "#\n");
4062 }
4063 
4064 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4065 				   unsigned int flags)
4066 {
4067 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4068 
4069 	print_event_info(buf, m);
4070 
4071 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4072 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4073 }
4074 
4075 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4076 				       unsigned int flags)
4077 {
4078 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4079 	static const char space[] = "            ";
4080 	int prec = tgid ? 12 : 2;
4081 
4082 	print_event_info(buf, m);
4083 
4084 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4085 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4086 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4087 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4088 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4089 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4090 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4091 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4092 }
4093 
4094 void
4095 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4096 {
4097 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4098 	struct array_buffer *buf = iter->array_buffer;
4099 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4100 	struct tracer *type = iter->trace;
4101 	unsigned long entries;
4102 	unsigned long total;
4103 	const char *name = type->name;
4104 
4105 	get_total_entries(buf, &total, &entries);
4106 
4107 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4108 		   name, init_utsname()->release);
4109 	seq_puts(m, "# -----------------------------------"
4110 		 "---------------------------------\n");
4111 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4112 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4113 		   nsecs_to_usecs(data->saved_latency),
4114 		   entries,
4115 		   total,
4116 		   buf->cpu,
4117 		   preempt_model_str(),
4118 		   /* These are reserved for later use */
4119 		   0, 0, 0, 0);
4120 #ifdef CONFIG_SMP
4121 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4122 #else
4123 	seq_puts(m, ")\n");
4124 #endif
4125 	seq_puts(m, "#    -----------------\n");
4126 	seq_printf(m, "#    | task: %.16s-%d "
4127 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4128 		   data->comm, data->pid,
4129 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4130 		   data->policy, data->rt_priority);
4131 	seq_puts(m, "#    -----------------\n");
4132 
4133 	if (data->critical_start) {
4134 		seq_puts(m, "#  => started at: ");
4135 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4136 		trace_print_seq(m, &iter->seq);
4137 		seq_puts(m, "\n#  => ended at:   ");
4138 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4139 		trace_print_seq(m, &iter->seq);
4140 		seq_puts(m, "\n#\n");
4141 	}
4142 
4143 	seq_puts(m, "#\n");
4144 }
4145 
4146 static void test_cpu_buff_start(struct trace_iterator *iter)
4147 {
4148 	struct trace_seq *s = &iter->seq;
4149 	struct trace_array *tr = iter->tr;
4150 
4151 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4152 		return;
4153 
4154 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4155 		return;
4156 
4157 	if (cpumask_available(iter->started) &&
4158 	    cpumask_test_cpu(iter->cpu, iter->started))
4159 		return;
4160 
4161 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4162 		return;
4163 
4164 	if (cpumask_available(iter->started))
4165 		cpumask_set_cpu(iter->cpu, iter->started);
4166 
4167 	/* Don't print started cpu buffer for the first entry of the trace */
4168 	if (iter->idx > 1)
4169 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4170 				iter->cpu);
4171 }
4172 
4173 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4174 {
4175 	struct trace_array *tr = iter->tr;
4176 	struct trace_seq *s = &iter->seq;
4177 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4178 	struct trace_entry *entry;
4179 	struct trace_event *event;
4180 
4181 	entry = iter->ent;
4182 
4183 	test_cpu_buff_start(iter);
4184 
4185 	event = ftrace_find_event(entry->type);
4186 
4187 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4188 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4189 			trace_print_lat_context(iter);
4190 		else
4191 			trace_print_context(iter);
4192 	}
4193 
4194 	if (trace_seq_has_overflowed(s))
4195 		return TRACE_TYPE_PARTIAL_LINE;
4196 
4197 	if (event) {
4198 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4199 			return print_event_fields(iter, event);
4200 		/*
4201 		 * For TRACE_EVENT() events, the print_fmt is not
4202 		 * safe to use if the array has delta offsets
4203 		 * Force printing via the fields.
4204 		 */
4205 		if ((tr->text_delta) &&
4206 		    event->type > __TRACE_LAST_TYPE)
4207 			return print_event_fields(iter, event);
4208 
4209 		return event->funcs->trace(iter, sym_flags, event);
4210 	}
4211 
4212 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4213 
4214 	return trace_handle_return(s);
4215 }
4216 
4217 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4218 {
4219 	struct trace_array *tr = iter->tr;
4220 	struct trace_seq *s = &iter->seq;
4221 	struct trace_entry *entry;
4222 	struct trace_event *event;
4223 
4224 	entry = iter->ent;
4225 
4226 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4227 		trace_seq_printf(s, "%d %d %llu ",
4228 				 entry->pid, iter->cpu, iter->ts);
4229 
4230 	if (trace_seq_has_overflowed(s))
4231 		return TRACE_TYPE_PARTIAL_LINE;
4232 
4233 	event = ftrace_find_event(entry->type);
4234 	if (event)
4235 		return event->funcs->raw(iter, 0, event);
4236 
4237 	trace_seq_printf(s, "%d ?\n", entry->type);
4238 
4239 	return trace_handle_return(s);
4240 }
4241 
4242 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4243 {
4244 	struct trace_array *tr = iter->tr;
4245 	struct trace_seq *s = &iter->seq;
4246 	unsigned char newline = '\n';
4247 	struct trace_entry *entry;
4248 	struct trace_event *event;
4249 
4250 	entry = iter->ent;
4251 
4252 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4253 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4254 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4255 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4256 		if (trace_seq_has_overflowed(s))
4257 			return TRACE_TYPE_PARTIAL_LINE;
4258 	}
4259 
4260 	event = ftrace_find_event(entry->type);
4261 	if (event) {
4262 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4263 		if (ret != TRACE_TYPE_HANDLED)
4264 			return ret;
4265 	}
4266 
4267 	SEQ_PUT_FIELD(s, newline);
4268 
4269 	return trace_handle_return(s);
4270 }
4271 
4272 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4273 {
4274 	struct trace_array *tr = iter->tr;
4275 	struct trace_seq *s = &iter->seq;
4276 	struct trace_entry *entry;
4277 	struct trace_event *event;
4278 
4279 	entry = iter->ent;
4280 
4281 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4282 		SEQ_PUT_FIELD(s, entry->pid);
4283 		SEQ_PUT_FIELD(s, iter->cpu);
4284 		SEQ_PUT_FIELD(s, iter->ts);
4285 		if (trace_seq_has_overflowed(s))
4286 			return TRACE_TYPE_PARTIAL_LINE;
4287 	}
4288 
4289 	event = ftrace_find_event(entry->type);
4290 	return event ? event->funcs->binary(iter, 0, event) :
4291 		TRACE_TYPE_HANDLED;
4292 }
4293 
4294 int trace_empty(struct trace_iterator *iter)
4295 {
4296 	struct ring_buffer_iter *buf_iter;
4297 	int cpu;
4298 
4299 	/* If we are looking at one CPU buffer, only check that one */
4300 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4301 		cpu = iter->cpu_file;
4302 		buf_iter = trace_buffer_iter(iter, cpu);
4303 		if (buf_iter) {
4304 			if (!ring_buffer_iter_empty(buf_iter))
4305 				return 0;
4306 		} else {
4307 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4308 				return 0;
4309 		}
4310 		return 1;
4311 	}
4312 
4313 	for_each_tracing_cpu(cpu) {
4314 		buf_iter = trace_buffer_iter(iter, cpu);
4315 		if (buf_iter) {
4316 			if (!ring_buffer_iter_empty(buf_iter))
4317 				return 0;
4318 		} else {
4319 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4320 				return 0;
4321 		}
4322 	}
4323 
4324 	return 1;
4325 }
4326 
4327 /*  Called with trace_event_read_lock() held. */
4328 enum print_line_t print_trace_line(struct trace_iterator *iter)
4329 {
4330 	struct trace_array *tr = iter->tr;
4331 	unsigned long trace_flags = tr->trace_flags;
4332 	enum print_line_t ret;
4333 
4334 	if (iter->lost_events) {
4335 		if (iter->lost_events == (unsigned long)-1)
4336 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4337 					 iter->cpu);
4338 		else
4339 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4340 					 iter->cpu, iter->lost_events);
4341 		if (trace_seq_has_overflowed(&iter->seq))
4342 			return TRACE_TYPE_PARTIAL_LINE;
4343 	}
4344 
4345 	if (iter->trace && iter->trace->print_line) {
4346 		ret = iter->trace->print_line(iter);
4347 		if (ret != TRACE_TYPE_UNHANDLED)
4348 			return ret;
4349 	}
4350 
4351 	if (iter->ent->type == TRACE_BPUTS &&
4352 			trace_flags & TRACE_ITER_PRINTK &&
4353 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4354 		return trace_print_bputs_msg_only(iter);
4355 
4356 	if (iter->ent->type == TRACE_BPRINT &&
4357 			trace_flags & TRACE_ITER_PRINTK &&
4358 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4359 		return trace_print_bprintk_msg_only(iter);
4360 
4361 	if (iter->ent->type == TRACE_PRINT &&
4362 			trace_flags & TRACE_ITER_PRINTK &&
4363 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4364 		return trace_print_printk_msg_only(iter);
4365 
4366 	if (trace_flags & TRACE_ITER_BIN)
4367 		return print_bin_fmt(iter);
4368 
4369 	if (trace_flags & TRACE_ITER_HEX)
4370 		return print_hex_fmt(iter);
4371 
4372 	if (trace_flags & TRACE_ITER_RAW)
4373 		return print_raw_fmt(iter);
4374 
4375 	return print_trace_fmt(iter);
4376 }
4377 
4378 void trace_latency_header(struct seq_file *m)
4379 {
4380 	struct trace_iterator *iter = m->private;
4381 	struct trace_array *tr = iter->tr;
4382 
4383 	/* print nothing if the buffers are empty */
4384 	if (trace_empty(iter))
4385 		return;
4386 
4387 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4388 		print_trace_header(m, iter);
4389 
4390 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4391 		print_lat_help_header(m);
4392 }
4393 
4394 void trace_default_header(struct seq_file *m)
4395 {
4396 	struct trace_iterator *iter = m->private;
4397 	struct trace_array *tr = iter->tr;
4398 	unsigned long trace_flags = tr->trace_flags;
4399 
4400 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4401 		return;
4402 
4403 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4404 		/* print nothing if the buffers are empty */
4405 		if (trace_empty(iter))
4406 			return;
4407 		print_trace_header(m, iter);
4408 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4409 			print_lat_help_header(m);
4410 	} else {
4411 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4412 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4413 				print_func_help_header_irq(iter->array_buffer,
4414 							   m, trace_flags);
4415 			else
4416 				print_func_help_header(iter->array_buffer, m,
4417 						       trace_flags);
4418 		}
4419 	}
4420 }
4421 
4422 static void test_ftrace_alive(struct seq_file *m)
4423 {
4424 	if (!ftrace_is_dead())
4425 		return;
4426 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4427 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4428 }
4429 
4430 #ifdef CONFIG_TRACER_MAX_TRACE
4431 static void show_snapshot_main_help(struct seq_file *m)
4432 {
4433 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4434 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4435 		    "#                      Takes a snapshot of the main buffer.\n"
4436 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4437 		    "#                      (Doesn't have to be '2' works with any number that\n"
4438 		    "#                       is not a '0' or '1')\n");
4439 }
4440 
4441 static void show_snapshot_percpu_help(struct seq_file *m)
4442 {
4443 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4444 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4445 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4446 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4447 #else
4448 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4449 		    "#                     Must use main snapshot file to allocate.\n");
4450 #endif
4451 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4452 		    "#                      (Doesn't have to be '2' works with any number that\n"
4453 		    "#                       is not a '0' or '1')\n");
4454 }
4455 
4456 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4457 {
4458 	if (iter->tr->allocated_snapshot)
4459 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4460 	else
4461 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4462 
4463 	seq_puts(m, "# Snapshot commands:\n");
4464 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4465 		show_snapshot_main_help(m);
4466 	else
4467 		show_snapshot_percpu_help(m);
4468 }
4469 #else
4470 /* Should never be called */
4471 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4472 #endif
4473 
4474 static int s_show(struct seq_file *m, void *v)
4475 {
4476 	struct trace_iterator *iter = v;
4477 	int ret;
4478 
4479 	if (iter->ent == NULL) {
4480 		if (iter->tr) {
4481 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4482 			seq_puts(m, "#\n");
4483 			test_ftrace_alive(m);
4484 		}
4485 		if (iter->snapshot && trace_empty(iter))
4486 			print_snapshot_help(m, iter);
4487 		else if (iter->trace && iter->trace->print_header)
4488 			iter->trace->print_header(m);
4489 		else
4490 			trace_default_header(m);
4491 
4492 	} else if (iter->leftover) {
4493 		/*
4494 		 * If we filled the seq_file buffer earlier, we
4495 		 * want to just show it now.
4496 		 */
4497 		ret = trace_print_seq(m, &iter->seq);
4498 
4499 		/* ret should this time be zero, but you never know */
4500 		iter->leftover = ret;
4501 
4502 	} else {
4503 		ret = print_trace_line(iter);
4504 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4505 			iter->seq.full = 0;
4506 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4507 		}
4508 		ret = trace_print_seq(m, &iter->seq);
4509 		/*
4510 		 * If we overflow the seq_file buffer, then it will
4511 		 * ask us for this data again at start up.
4512 		 * Use that instead.
4513 		 *  ret is 0 if seq_file write succeeded.
4514 		 *        -1 otherwise.
4515 		 */
4516 		iter->leftover = ret;
4517 	}
4518 
4519 	return 0;
4520 }
4521 
4522 /*
4523  * Should be used after trace_array_get(), trace_types_lock
4524  * ensures that i_cdev was already initialized.
4525  */
4526 static inline int tracing_get_cpu(struct inode *inode)
4527 {
4528 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4529 		return (long)inode->i_cdev - 1;
4530 	return RING_BUFFER_ALL_CPUS;
4531 }
4532 
4533 static const struct seq_operations tracer_seq_ops = {
4534 	.start		= s_start,
4535 	.next		= s_next,
4536 	.stop		= s_stop,
4537 	.show		= s_show,
4538 };
4539 
4540 /*
4541  * Note, as iter itself can be allocated and freed in different
4542  * ways, this function is only used to free its content, and not
4543  * the iterator itself. The only requirement to all the allocations
4544  * is that it must zero all fields (kzalloc), as freeing works with
4545  * ethier allocated content or NULL.
4546  */
4547 static void free_trace_iter_content(struct trace_iterator *iter)
4548 {
4549 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4550 	if (iter->fmt != static_fmt_buf)
4551 		kfree(iter->fmt);
4552 
4553 	kfree(iter->temp);
4554 	kfree(iter->buffer_iter);
4555 	mutex_destroy(&iter->mutex);
4556 	free_cpumask_var(iter->started);
4557 }
4558 
4559 static struct trace_iterator *
4560 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4561 {
4562 	struct trace_array *tr = inode->i_private;
4563 	struct trace_iterator *iter;
4564 	int cpu;
4565 
4566 	if (tracing_disabled)
4567 		return ERR_PTR(-ENODEV);
4568 
4569 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4570 	if (!iter)
4571 		return ERR_PTR(-ENOMEM);
4572 
4573 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4574 				    GFP_KERNEL);
4575 	if (!iter->buffer_iter)
4576 		goto release;
4577 
4578 	/*
4579 	 * trace_find_next_entry() may need to save off iter->ent.
4580 	 * It will place it into the iter->temp buffer. As most
4581 	 * events are less than 128, allocate a buffer of that size.
4582 	 * If one is greater, then trace_find_next_entry() will
4583 	 * allocate a new buffer to adjust for the bigger iter->ent.
4584 	 * It's not critical if it fails to get allocated here.
4585 	 */
4586 	iter->temp = kmalloc(128, GFP_KERNEL);
4587 	if (iter->temp)
4588 		iter->temp_size = 128;
4589 
4590 	/*
4591 	 * trace_event_printf() may need to modify given format
4592 	 * string to replace %p with %px so that it shows real address
4593 	 * instead of hash value. However, that is only for the event
4594 	 * tracing, other tracer may not need. Defer the allocation
4595 	 * until it is needed.
4596 	 */
4597 	iter->fmt = NULL;
4598 	iter->fmt_size = 0;
4599 
4600 	mutex_lock(&trace_types_lock);
4601 	iter->trace = tr->current_trace;
4602 
4603 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4604 		goto fail;
4605 
4606 	iter->tr = tr;
4607 
4608 #ifdef CONFIG_TRACER_MAX_TRACE
4609 	/* Currently only the top directory has a snapshot */
4610 	if (tr->current_trace->print_max || snapshot)
4611 		iter->array_buffer = &tr->max_buffer;
4612 	else
4613 #endif
4614 		iter->array_buffer = &tr->array_buffer;
4615 	iter->snapshot = snapshot;
4616 	iter->pos = -1;
4617 	iter->cpu_file = tracing_get_cpu(inode);
4618 	mutex_init(&iter->mutex);
4619 
4620 	/* Notify the tracer early; before we stop tracing. */
4621 	if (iter->trace->open)
4622 		iter->trace->open(iter);
4623 
4624 	/* Annotate start of buffers if we had overruns */
4625 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4626 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4627 
4628 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4629 	if (trace_clocks[tr->clock_id].in_ns)
4630 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4631 
4632 	/*
4633 	 * If pause-on-trace is enabled, then stop the trace while
4634 	 * dumping, unless this is the "snapshot" file
4635 	 */
4636 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4637 		tracing_stop_tr(tr);
4638 
4639 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4640 		for_each_tracing_cpu(cpu) {
4641 			iter->buffer_iter[cpu] =
4642 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4643 							 cpu, GFP_KERNEL);
4644 		}
4645 		ring_buffer_read_prepare_sync();
4646 		for_each_tracing_cpu(cpu) {
4647 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4648 			tracing_iter_reset(iter, cpu);
4649 		}
4650 	} else {
4651 		cpu = iter->cpu_file;
4652 		iter->buffer_iter[cpu] =
4653 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4654 						 cpu, GFP_KERNEL);
4655 		ring_buffer_read_prepare_sync();
4656 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4657 		tracing_iter_reset(iter, cpu);
4658 	}
4659 
4660 	mutex_unlock(&trace_types_lock);
4661 
4662 	return iter;
4663 
4664  fail:
4665 	mutex_unlock(&trace_types_lock);
4666 	free_trace_iter_content(iter);
4667 release:
4668 	seq_release_private(inode, file);
4669 	return ERR_PTR(-ENOMEM);
4670 }
4671 
4672 int tracing_open_generic(struct inode *inode, struct file *filp)
4673 {
4674 	int ret;
4675 
4676 	ret = tracing_check_open_get_tr(NULL);
4677 	if (ret)
4678 		return ret;
4679 
4680 	filp->private_data = inode->i_private;
4681 	return 0;
4682 }
4683 
4684 bool tracing_is_disabled(void)
4685 {
4686 	return (tracing_disabled) ? true: false;
4687 }
4688 
4689 /*
4690  * Open and update trace_array ref count.
4691  * Must have the current trace_array passed to it.
4692  */
4693 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4694 {
4695 	struct trace_array *tr = inode->i_private;
4696 	int ret;
4697 
4698 	ret = tracing_check_open_get_tr(tr);
4699 	if (ret)
4700 		return ret;
4701 
4702 	filp->private_data = inode->i_private;
4703 
4704 	return 0;
4705 }
4706 
4707 /*
4708  * The private pointer of the inode is the trace_event_file.
4709  * Update the tr ref count associated to it.
4710  */
4711 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4712 {
4713 	struct trace_event_file *file = inode->i_private;
4714 	int ret;
4715 
4716 	ret = tracing_check_open_get_tr(file->tr);
4717 	if (ret)
4718 		return ret;
4719 
4720 	mutex_lock(&event_mutex);
4721 
4722 	/* Fail if the file is marked for removal */
4723 	if (file->flags & EVENT_FILE_FL_FREED) {
4724 		trace_array_put(file->tr);
4725 		ret = -ENODEV;
4726 	} else {
4727 		event_file_get(file);
4728 	}
4729 
4730 	mutex_unlock(&event_mutex);
4731 	if (ret)
4732 		return ret;
4733 
4734 	filp->private_data = inode->i_private;
4735 
4736 	return 0;
4737 }
4738 
4739 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4740 {
4741 	struct trace_event_file *file = inode->i_private;
4742 
4743 	trace_array_put(file->tr);
4744 	event_file_put(file);
4745 
4746 	return 0;
4747 }
4748 
4749 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4750 {
4751 	tracing_release_file_tr(inode, filp);
4752 	return single_release(inode, filp);
4753 }
4754 
4755 static int tracing_mark_open(struct inode *inode, struct file *filp)
4756 {
4757 	stream_open(inode, filp);
4758 	return tracing_open_generic_tr(inode, filp);
4759 }
4760 
4761 static int tracing_release(struct inode *inode, struct file *file)
4762 {
4763 	struct trace_array *tr = inode->i_private;
4764 	struct seq_file *m = file->private_data;
4765 	struct trace_iterator *iter;
4766 	int cpu;
4767 
4768 	if (!(file->f_mode & FMODE_READ)) {
4769 		trace_array_put(tr);
4770 		return 0;
4771 	}
4772 
4773 	/* Writes do not use seq_file */
4774 	iter = m->private;
4775 	mutex_lock(&trace_types_lock);
4776 
4777 	for_each_tracing_cpu(cpu) {
4778 		if (iter->buffer_iter[cpu])
4779 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4780 	}
4781 
4782 	if (iter->trace && iter->trace->close)
4783 		iter->trace->close(iter);
4784 
4785 	if (!iter->snapshot && tr->stop_count)
4786 		/* reenable tracing if it was previously enabled */
4787 		tracing_start_tr(tr);
4788 
4789 	__trace_array_put(tr);
4790 
4791 	mutex_unlock(&trace_types_lock);
4792 
4793 	free_trace_iter_content(iter);
4794 	seq_release_private(inode, file);
4795 
4796 	return 0;
4797 }
4798 
4799 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4800 {
4801 	struct trace_array *tr = inode->i_private;
4802 
4803 	trace_array_put(tr);
4804 	return 0;
4805 }
4806 
4807 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4808 {
4809 	struct trace_array *tr = inode->i_private;
4810 
4811 	trace_array_put(tr);
4812 
4813 	return single_release(inode, file);
4814 }
4815 
4816 static int tracing_open(struct inode *inode, struct file *file)
4817 {
4818 	struct trace_array *tr = inode->i_private;
4819 	struct trace_iterator *iter;
4820 	int ret;
4821 
4822 	ret = tracing_check_open_get_tr(tr);
4823 	if (ret)
4824 		return ret;
4825 
4826 	/* If this file was open for write, then erase contents */
4827 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4828 		int cpu = tracing_get_cpu(inode);
4829 		struct array_buffer *trace_buf = &tr->array_buffer;
4830 
4831 #ifdef CONFIG_TRACER_MAX_TRACE
4832 		if (tr->current_trace->print_max)
4833 			trace_buf = &tr->max_buffer;
4834 #endif
4835 
4836 		if (cpu == RING_BUFFER_ALL_CPUS)
4837 			tracing_reset_online_cpus(trace_buf);
4838 		else
4839 			tracing_reset_cpu(trace_buf, cpu);
4840 	}
4841 
4842 	if (file->f_mode & FMODE_READ) {
4843 		iter = __tracing_open(inode, file, false);
4844 		if (IS_ERR(iter))
4845 			ret = PTR_ERR(iter);
4846 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4847 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4848 	}
4849 
4850 	if (ret < 0)
4851 		trace_array_put(tr);
4852 
4853 	return ret;
4854 }
4855 
4856 /*
4857  * Some tracers are not suitable for instance buffers.
4858  * A tracer is always available for the global array (toplevel)
4859  * or if it explicitly states that it is.
4860  */
4861 static bool
4862 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4863 {
4864 #ifdef CONFIG_TRACER_SNAPSHOT
4865 	/* arrays with mapped buffer range do not have snapshots */
4866 	if (tr->range_addr_start && t->use_max_tr)
4867 		return false;
4868 #endif
4869 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4870 }
4871 
4872 /* Find the next tracer that this trace array may use */
4873 static struct tracer *
4874 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4875 {
4876 	while (t && !trace_ok_for_array(t, tr))
4877 		t = t->next;
4878 
4879 	return t;
4880 }
4881 
4882 static void *
4883 t_next(struct seq_file *m, void *v, loff_t *pos)
4884 {
4885 	struct trace_array *tr = m->private;
4886 	struct tracer *t = v;
4887 
4888 	(*pos)++;
4889 
4890 	if (t)
4891 		t = get_tracer_for_array(tr, t->next);
4892 
4893 	return t;
4894 }
4895 
4896 static void *t_start(struct seq_file *m, loff_t *pos)
4897 {
4898 	struct trace_array *tr = m->private;
4899 	struct tracer *t;
4900 	loff_t l = 0;
4901 
4902 	mutex_lock(&trace_types_lock);
4903 
4904 	t = get_tracer_for_array(tr, trace_types);
4905 	for (; t && l < *pos; t = t_next(m, t, &l))
4906 			;
4907 
4908 	return t;
4909 }
4910 
4911 static void t_stop(struct seq_file *m, void *p)
4912 {
4913 	mutex_unlock(&trace_types_lock);
4914 }
4915 
4916 static int t_show(struct seq_file *m, void *v)
4917 {
4918 	struct tracer *t = v;
4919 
4920 	if (!t)
4921 		return 0;
4922 
4923 	seq_puts(m, t->name);
4924 	if (t->next)
4925 		seq_putc(m, ' ');
4926 	else
4927 		seq_putc(m, '\n');
4928 
4929 	return 0;
4930 }
4931 
4932 static const struct seq_operations show_traces_seq_ops = {
4933 	.start		= t_start,
4934 	.next		= t_next,
4935 	.stop		= t_stop,
4936 	.show		= t_show,
4937 };
4938 
4939 static int show_traces_open(struct inode *inode, struct file *file)
4940 {
4941 	struct trace_array *tr = inode->i_private;
4942 	struct seq_file *m;
4943 	int ret;
4944 
4945 	ret = tracing_check_open_get_tr(tr);
4946 	if (ret)
4947 		return ret;
4948 
4949 	ret = seq_open(file, &show_traces_seq_ops);
4950 	if (ret) {
4951 		trace_array_put(tr);
4952 		return ret;
4953 	}
4954 
4955 	m = file->private_data;
4956 	m->private = tr;
4957 
4958 	return 0;
4959 }
4960 
4961 static int tracing_seq_release(struct inode *inode, struct file *file)
4962 {
4963 	struct trace_array *tr = inode->i_private;
4964 
4965 	trace_array_put(tr);
4966 	return seq_release(inode, file);
4967 }
4968 
4969 static ssize_t
4970 tracing_write_stub(struct file *filp, const char __user *ubuf,
4971 		   size_t count, loff_t *ppos)
4972 {
4973 	return count;
4974 }
4975 
4976 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4977 {
4978 	int ret;
4979 
4980 	if (file->f_mode & FMODE_READ)
4981 		ret = seq_lseek(file, offset, whence);
4982 	else
4983 		file->f_pos = ret = 0;
4984 
4985 	return ret;
4986 }
4987 
4988 static const struct file_operations tracing_fops = {
4989 	.open		= tracing_open,
4990 	.read		= seq_read,
4991 	.read_iter	= seq_read_iter,
4992 	.splice_read	= copy_splice_read,
4993 	.write		= tracing_write_stub,
4994 	.llseek		= tracing_lseek,
4995 	.release	= tracing_release,
4996 };
4997 
4998 static const struct file_operations show_traces_fops = {
4999 	.open		= show_traces_open,
5000 	.read		= seq_read,
5001 	.llseek		= seq_lseek,
5002 	.release	= tracing_seq_release,
5003 };
5004 
5005 static ssize_t
5006 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5007 		     size_t count, loff_t *ppos)
5008 {
5009 	struct trace_array *tr = file_inode(filp)->i_private;
5010 	char *mask_str;
5011 	int len;
5012 
5013 	len = snprintf(NULL, 0, "%*pb\n",
5014 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5015 	mask_str = kmalloc(len, GFP_KERNEL);
5016 	if (!mask_str)
5017 		return -ENOMEM;
5018 
5019 	len = snprintf(mask_str, len, "%*pb\n",
5020 		       cpumask_pr_args(tr->tracing_cpumask));
5021 	if (len >= count) {
5022 		count = -EINVAL;
5023 		goto out_err;
5024 	}
5025 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5026 
5027 out_err:
5028 	kfree(mask_str);
5029 
5030 	return count;
5031 }
5032 
5033 int tracing_set_cpumask(struct trace_array *tr,
5034 			cpumask_var_t tracing_cpumask_new)
5035 {
5036 	int cpu;
5037 
5038 	if (!tr)
5039 		return -EINVAL;
5040 
5041 	local_irq_disable();
5042 	arch_spin_lock(&tr->max_lock);
5043 	for_each_tracing_cpu(cpu) {
5044 		/*
5045 		 * Increase/decrease the disabled counter if we are
5046 		 * about to flip a bit in the cpumask:
5047 		 */
5048 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5049 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5050 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5051 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5052 #ifdef CONFIG_TRACER_MAX_TRACE
5053 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5054 #endif
5055 		}
5056 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5057 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5058 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5059 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5060 #ifdef CONFIG_TRACER_MAX_TRACE
5061 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5062 #endif
5063 		}
5064 	}
5065 	arch_spin_unlock(&tr->max_lock);
5066 	local_irq_enable();
5067 
5068 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5069 
5070 	return 0;
5071 }
5072 
5073 static ssize_t
5074 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5075 		      size_t count, loff_t *ppos)
5076 {
5077 	struct trace_array *tr = file_inode(filp)->i_private;
5078 	cpumask_var_t tracing_cpumask_new;
5079 	int err;
5080 
5081 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5082 		return -EINVAL;
5083 
5084 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5085 		return -ENOMEM;
5086 
5087 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5088 	if (err)
5089 		goto err_free;
5090 
5091 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5092 	if (err)
5093 		goto err_free;
5094 
5095 	free_cpumask_var(tracing_cpumask_new);
5096 
5097 	return count;
5098 
5099 err_free:
5100 	free_cpumask_var(tracing_cpumask_new);
5101 
5102 	return err;
5103 }
5104 
5105 static const struct file_operations tracing_cpumask_fops = {
5106 	.open		= tracing_open_generic_tr,
5107 	.read		= tracing_cpumask_read,
5108 	.write		= tracing_cpumask_write,
5109 	.release	= tracing_release_generic_tr,
5110 	.llseek		= generic_file_llseek,
5111 };
5112 
5113 static int tracing_trace_options_show(struct seq_file *m, void *v)
5114 {
5115 	struct tracer_opt *trace_opts;
5116 	struct trace_array *tr = m->private;
5117 	u32 tracer_flags;
5118 	int i;
5119 
5120 	guard(mutex)(&trace_types_lock);
5121 
5122 	tracer_flags = tr->current_trace->flags->val;
5123 	trace_opts = tr->current_trace->flags->opts;
5124 
5125 	for (i = 0; trace_options[i]; i++) {
5126 		if (tr->trace_flags & (1 << i))
5127 			seq_printf(m, "%s\n", trace_options[i]);
5128 		else
5129 			seq_printf(m, "no%s\n", trace_options[i]);
5130 	}
5131 
5132 	for (i = 0; trace_opts[i].name; i++) {
5133 		if (tracer_flags & trace_opts[i].bit)
5134 			seq_printf(m, "%s\n", trace_opts[i].name);
5135 		else
5136 			seq_printf(m, "no%s\n", trace_opts[i].name);
5137 	}
5138 
5139 	return 0;
5140 }
5141 
5142 static int __set_tracer_option(struct trace_array *tr,
5143 			       struct tracer_flags *tracer_flags,
5144 			       struct tracer_opt *opts, int neg)
5145 {
5146 	struct tracer *trace = tracer_flags->trace;
5147 	int ret;
5148 
5149 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5150 	if (ret)
5151 		return ret;
5152 
5153 	if (neg)
5154 		tracer_flags->val &= ~opts->bit;
5155 	else
5156 		tracer_flags->val |= opts->bit;
5157 	return 0;
5158 }
5159 
5160 /* Try to assign a tracer specific option */
5161 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5162 {
5163 	struct tracer *trace = tr->current_trace;
5164 	struct tracer_flags *tracer_flags = trace->flags;
5165 	struct tracer_opt *opts = NULL;
5166 	int i;
5167 
5168 	for (i = 0; tracer_flags->opts[i].name; i++) {
5169 		opts = &tracer_flags->opts[i];
5170 
5171 		if (strcmp(cmp, opts->name) == 0)
5172 			return __set_tracer_option(tr, trace->flags, opts, neg);
5173 	}
5174 
5175 	return -EINVAL;
5176 }
5177 
5178 /* Some tracers require overwrite to stay enabled */
5179 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5180 {
5181 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5182 		return -1;
5183 
5184 	return 0;
5185 }
5186 
5187 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5188 {
5189 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5190 	    (mask == TRACE_ITER_RECORD_CMD) ||
5191 	    (mask == TRACE_ITER_TRACE_PRINTK))
5192 		lockdep_assert_held(&event_mutex);
5193 
5194 	/* do nothing if flag is already set */
5195 	if (!!(tr->trace_flags & mask) == !!enabled)
5196 		return 0;
5197 
5198 	/* Give the tracer a chance to approve the change */
5199 	if (tr->current_trace->flag_changed)
5200 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5201 			return -EINVAL;
5202 
5203 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5204 		if (enabled) {
5205 			update_printk_trace(tr);
5206 		} else {
5207 			/*
5208 			 * The global_trace cannot clear this.
5209 			 * It's flag only gets cleared if another instance sets it.
5210 			 */
5211 			if (printk_trace == &global_trace)
5212 				return -EINVAL;
5213 			/*
5214 			 * An instance must always have it set.
5215 			 * by default, that's the global_trace instane.
5216 			 */
5217 			if (printk_trace == tr)
5218 				update_printk_trace(&global_trace);
5219 		}
5220 	}
5221 
5222 	if (enabled)
5223 		tr->trace_flags |= mask;
5224 	else
5225 		tr->trace_flags &= ~mask;
5226 
5227 	if (mask == TRACE_ITER_RECORD_CMD)
5228 		trace_event_enable_cmd_record(enabled);
5229 
5230 	if (mask == TRACE_ITER_RECORD_TGID) {
5231 
5232 		if (trace_alloc_tgid_map() < 0) {
5233 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5234 			return -ENOMEM;
5235 		}
5236 
5237 		trace_event_enable_tgid_record(enabled);
5238 	}
5239 
5240 	if (mask == TRACE_ITER_EVENT_FORK)
5241 		trace_event_follow_fork(tr, enabled);
5242 
5243 	if (mask == TRACE_ITER_FUNC_FORK)
5244 		ftrace_pid_follow_fork(tr, enabled);
5245 
5246 	if (mask == TRACE_ITER_OVERWRITE) {
5247 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5248 #ifdef CONFIG_TRACER_MAX_TRACE
5249 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5250 #endif
5251 	}
5252 
5253 	if (mask == TRACE_ITER_PRINTK) {
5254 		trace_printk_start_stop_comm(enabled);
5255 		trace_printk_control(enabled);
5256 	}
5257 
5258 	return 0;
5259 }
5260 
5261 int trace_set_options(struct trace_array *tr, char *option)
5262 {
5263 	char *cmp;
5264 	int neg = 0;
5265 	int ret;
5266 	size_t orig_len = strlen(option);
5267 	int len;
5268 
5269 	cmp = strstrip(option);
5270 
5271 	len = str_has_prefix(cmp, "no");
5272 	if (len)
5273 		neg = 1;
5274 
5275 	cmp += len;
5276 
5277 	mutex_lock(&event_mutex);
5278 	mutex_lock(&trace_types_lock);
5279 
5280 	ret = match_string(trace_options, -1, cmp);
5281 	/* If no option could be set, test the specific tracer options */
5282 	if (ret < 0)
5283 		ret = set_tracer_option(tr, cmp, neg);
5284 	else
5285 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5286 
5287 	mutex_unlock(&trace_types_lock);
5288 	mutex_unlock(&event_mutex);
5289 
5290 	/*
5291 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5292 	 * turn it back into a space.
5293 	 */
5294 	if (orig_len > strlen(option))
5295 		option[strlen(option)] = ' ';
5296 
5297 	return ret;
5298 }
5299 
5300 static void __init apply_trace_boot_options(void)
5301 {
5302 	char *buf = trace_boot_options_buf;
5303 	char *option;
5304 
5305 	while (true) {
5306 		option = strsep(&buf, ",");
5307 
5308 		if (!option)
5309 			break;
5310 
5311 		if (*option)
5312 			trace_set_options(&global_trace, option);
5313 
5314 		/* Put back the comma to allow this to be called again */
5315 		if (buf)
5316 			*(buf - 1) = ',';
5317 	}
5318 }
5319 
5320 static ssize_t
5321 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5322 			size_t cnt, loff_t *ppos)
5323 {
5324 	struct seq_file *m = filp->private_data;
5325 	struct trace_array *tr = m->private;
5326 	char buf[64];
5327 	int ret;
5328 
5329 	if (cnt >= sizeof(buf))
5330 		return -EINVAL;
5331 
5332 	if (copy_from_user(buf, ubuf, cnt))
5333 		return -EFAULT;
5334 
5335 	buf[cnt] = 0;
5336 
5337 	ret = trace_set_options(tr, buf);
5338 	if (ret < 0)
5339 		return ret;
5340 
5341 	*ppos += cnt;
5342 
5343 	return cnt;
5344 }
5345 
5346 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5347 {
5348 	struct trace_array *tr = inode->i_private;
5349 	int ret;
5350 
5351 	ret = tracing_check_open_get_tr(tr);
5352 	if (ret)
5353 		return ret;
5354 
5355 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5356 	if (ret < 0)
5357 		trace_array_put(tr);
5358 
5359 	return ret;
5360 }
5361 
5362 static const struct file_operations tracing_iter_fops = {
5363 	.open		= tracing_trace_options_open,
5364 	.read		= seq_read,
5365 	.llseek		= seq_lseek,
5366 	.release	= tracing_single_release_tr,
5367 	.write		= tracing_trace_options_write,
5368 };
5369 
5370 static const char readme_msg[] =
5371 	"tracing mini-HOWTO:\n\n"
5372 	"By default tracefs removes all OTH file permission bits.\n"
5373 	"When mounting tracefs an optional group id can be specified\n"
5374 	"which adds the group to every directory and file in tracefs:\n\n"
5375 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5376 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5377 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5378 	" Important files:\n"
5379 	"  trace\t\t\t- The static contents of the buffer\n"
5380 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5381 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5382 	"  current_tracer\t- function and latency tracers\n"
5383 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5384 	"  error_log\t- error log for failed commands (that support it)\n"
5385 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5386 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5387 	"  trace_clock\t\t- change the clock used to order events\n"
5388 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5389 	"      global:   Synced across CPUs but slows tracing down.\n"
5390 	"     counter:   Not a clock, but just an increment\n"
5391 	"      uptime:   Jiffy counter from time of boot\n"
5392 	"        perf:   Same clock that perf events use\n"
5393 #ifdef CONFIG_X86_64
5394 	"     x86-tsc:   TSC cycle counter\n"
5395 #endif
5396 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5397 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5398 	"    absolute:   Absolute (standalone) timestamp\n"
5399 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5400 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5401 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5402 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5403 	"\t\t\t  Remove sub-buffer with rmdir\n"
5404 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5405 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5406 	"\t\t\t  option name\n"
5407 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5408 #ifdef CONFIG_DYNAMIC_FTRACE
5409 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5410 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5411 	"\t\t\t  functions\n"
5412 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5413 	"\t     modules: Can select a group via module\n"
5414 	"\t      Format: :mod:<module-name>\n"
5415 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5416 	"\t    triggers: a command to perform when function is hit\n"
5417 	"\t      Format: <function>:<trigger>[:count]\n"
5418 	"\t     trigger: traceon, traceoff\n"
5419 	"\t\t      enable_event:<system>:<event>\n"
5420 	"\t\t      disable_event:<system>:<event>\n"
5421 #ifdef CONFIG_STACKTRACE
5422 	"\t\t      stacktrace\n"
5423 #endif
5424 #ifdef CONFIG_TRACER_SNAPSHOT
5425 	"\t\t      snapshot\n"
5426 #endif
5427 	"\t\t      dump\n"
5428 	"\t\t      cpudump\n"
5429 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5430 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5431 	"\t     The first one will disable tracing every time do_fault is hit\n"
5432 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5433 	"\t       The first time do trap is hit and it disables tracing, the\n"
5434 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5435 	"\t       the counter will not decrement. It only decrements when the\n"
5436 	"\t       trigger did work\n"
5437 	"\t     To remove trigger without count:\n"
5438 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5439 	"\t     To remove trigger with a count:\n"
5440 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5441 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5442 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5443 	"\t    modules: Can select a group via module command :mod:\n"
5444 	"\t    Does not accept triggers\n"
5445 #endif /* CONFIG_DYNAMIC_FTRACE */
5446 #ifdef CONFIG_FUNCTION_TRACER
5447 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5448 	"\t\t    (function)\n"
5449 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5450 	"\t\t    (function)\n"
5451 #endif
5452 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5453 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5454 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5455 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5456 #endif
5457 #ifdef CONFIG_TRACER_SNAPSHOT
5458 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5459 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5460 	"\t\t\t  information\n"
5461 #endif
5462 #ifdef CONFIG_STACK_TRACER
5463 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5464 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5465 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5466 	"\t\t\t  new trace)\n"
5467 #ifdef CONFIG_DYNAMIC_FTRACE
5468 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5469 	"\t\t\t  traces\n"
5470 #endif
5471 #endif /* CONFIG_STACK_TRACER */
5472 #ifdef CONFIG_DYNAMIC_EVENTS
5473 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5474 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5475 #endif
5476 #ifdef CONFIG_KPROBE_EVENTS
5477 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5478 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5479 #endif
5480 #ifdef CONFIG_UPROBE_EVENTS
5481 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5482 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5483 #endif
5484 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5485     defined(CONFIG_FPROBE_EVENTS)
5486 	"\t  accepts: event-definitions (one definition per line)\n"
5487 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5488 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5489 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5490 #endif
5491 #ifdef CONFIG_FPROBE_EVENTS
5492 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5493 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5494 #endif
5495 #ifdef CONFIG_HIST_TRIGGERS
5496 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5497 #endif
5498 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5499 	"\t           -:[<group>/][<event>]\n"
5500 #ifdef CONFIG_KPROBE_EVENTS
5501 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5502   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5503 #endif
5504 #ifdef CONFIG_UPROBE_EVENTS
5505   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5506 #endif
5507 	"\t     args: <name>=fetcharg[:type]\n"
5508 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5509 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5510 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5511 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5512 	"\t           <argname>[->field[->field|.field...]],\n"
5513 #endif
5514 #else
5515 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5516 #endif
5517 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5518 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5519 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5520 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5521 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5522 #ifdef CONFIG_HIST_TRIGGERS
5523 	"\t    field: <stype> <name>;\n"
5524 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5525 	"\t           [unsigned] char/int/long\n"
5526 #endif
5527 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5528 	"\t            of the <attached-group>/<attached-event>.\n"
5529 #endif
5530 	"  set_event\t\t- Enables events by name written into it\n"
5531 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5532 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5533 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5534 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5535 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5536 	"\t\t\t  events\n"
5537 	"      filter\t\t- If set, only events passing filter are traced\n"
5538 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5539 	"\t\t\t  <event>:\n"
5540 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5541 	"      filter\t\t- If set, only events passing filter are traced\n"
5542 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5543 	"\t    Format: <trigger>[:count][if <filter>]\n"
5544 	"\t   trigger: traceon, traceoff\n"
5545 	"\t            enable_event:<system>:<event>\n"
5546 	"\t            disable_event:<system>:<event>\n"
5547 #ifdef CONFIG_HIST_TRIGGERS
5548 	"\t            enable_hist:<system>:<event>\n"
5549 	"\t            disable_hist:<system>:<event>\n"
5550 #endif
5551 #ifdef CONFIG_STACKTRACE
5552 	"\t\t    stacktrace\n"
5553 #endif
5554 #ifdef CONFIG_TRACER_SNAPSHOT
5555 	"\t\t    snapshot\n"
5556 #endif
5557 #ifdef CONFIG_HIST_TRIGGERS
5558 	"\t\t    hist (see below)\n"
5559 #endif
5560 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5561 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5562 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5563 	"\t                  events/block/block_unplug/trigger\n"
5564 	"\t   The first disables tracing every time block_unplug is hit.\n"
5565 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5566 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5567 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5568 	"\t   Like function triggers, the counter is only decremented if it\n"
5569 	"\t    enabled or disabled tracing.\n"
5570 	"\t   To remove a trigger without a count:\n"
5571 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5572 	"\t   To remove a trigger with a count:\n"
5573 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5574 	"\t   Filters can be ignored when removing a trigger.\n"
5575 #ifdef CONFIG_HIST_TRIGGERS
5576 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5577 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5578 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5579 	"\t            [:values=<field1[,field2,...]>]\n"
5580 	"\t            [:sort=<field1[,field2,...]>]\n"
5581 	"\t            [:size=#entries]\n"
5582 	"\t            [:pause][:continue][:clear]\n"
5583 	"\t            [:name=histname1]\n"
5584 	"\t            [:nohitcount]\n"
5585 	"\t            [:<handler>.<action>]\n"
5586 	"\t            [if <filter>]\n\n"
5587 	"\t    Note, special fields can be used as well:\n"
5588 	"\t            common_timestamp - to record current timestamp\n"
5589 	"\t            common_cpu - to record the CPU the event happened on\n"
5590 	"\n"
5591 	"\t    A hist trigger variable can be:\n"
5592 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5593 	"\t        - a reference to another variable e.g. y=$x,\n"
5594 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5595 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5596 	"\n"
5597 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5598 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5599 	"\t    variable reference, field or numeric literal.\n"
5600 	"\n"
5601 	"\t    When a matching event is hit, an entry is added to a hash\n"
5602 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5603 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5604 	"\t    correspond to fields in the event's format description.  Keys\n"
5605 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5606 	"\t    Compound keys consisting of up to two fields can be specified\n"
5607 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5608 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5609 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5610 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5611 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5612 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5613 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5614 	"\t    its histogram data will be shared with other triggers of the\n"
5615 	"\t    same name, and trigger hits will update this common data.\n\n"
5616 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5617 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5618 	"\t    triggers attached to an event, there will be a table for each\n"
5619 	"\t    trigger in the output.  The table displayed for a named\n"
5620 	"\t    trigger will be the same as any other instance having the\n"
5621 	"\t    same name.  The default format used to display a given field\n"
5622 	"\t    can be modified by appending any of the following modifiers\n"
5623 	"\t    to the field name, as applicable:\n\n"
5624 	"\t            .hex        display a number as a hex value\n"
5625 	"\t            .sym        display an address as a symbol\n"
5626 	"\t            .sym-offset display an address as a symbol and offset\n"
5627 	"\t            .execname   display a common_pid as a program name\n"
5628 	"\t            .syscall    display a syscall id as a syscall name\n"
5629 	"\t            .log2       display log2 value rather than raw number\n"
5630 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5631 	"\t            .usecs      display a common_timestamp in microseconds\n"
5632 	"\t            .percent    display a number of percentage value\n"
5633 	"\t            .graph      display a bar-graph of a value\n\n"
5634 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5635 	"\t    trigger or to start a hist trigger but not log any events\n"
5636 	"\t    until told to do so.  'continue' can be used to start or\n"
5637 	"\t    restart a paused hist trigger.\n\n"
5638 	"\t    The 'clear' parameter will clear the contents of a running\n"
5639 	"\t    hist trigger and leave its current paused/active state\n"
5640 	"\t    unchanged.\n\n"
5641 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5642 	"\t    raw hitcount in the histogram.\n\n"
5643 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5644 	"\t    have one event conditionally start and stop another event's\n"
5645 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5646 	"\t    the enable_event and disable_event triggers.\n\n"
5647 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5648 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5649 	"\t        <handler>.<action>\n\n"
5650 	"\t    The available handlers are:\n\n"
5651 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5652 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5653 	"\t        onchange(var)            - invoke action if var changes\n\n"
5654 	"\t    The available actions are:\n\n"
5655 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5656 	"\t        save(field,...)                      - save current event fields\n"
5657 #ifdef CONFIG_TRACER_SNAPSHOT
5658 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5659 #endif
5660 #ifdef CONFIG_SYNTH_EVENTS
5661 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5662 	"\t  Write into this file to define/undefine new synthetic events.\n"
5663 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5664 #endif
5665 #endif
5666 ;
5667 
5668 static ssize_t
5669 tracing_readme_read(struct file *filp, char __user *ubuf,
5670 		       size_t cnt, loff_t *ppos)
5671 {
5672 	return simple_read_from_buffer(ubuf, cnt, ppos,
5673 					readme_msg, strlen(readme_msg));
5674 }
5675 
5676 static const struct file_operations tracing_readme_fops = {
5677 	.open		= tracing_open_generic,
5678 	.read		= tracing_readme_read,
5679 	.llseek		= generic_file_llseek,
5680 };
5681 
5682 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5683 static union trace_eval_map_item *
5684 update_eval_map(union trace_eval_map_item *ptr)
5685 {
5686 	if (!ptr->map.eval_string) {
5687 		if (ptr->tail.next) {
5688 			ptr = ptr->tail.next;
5689 			/* Set ptr to the next real item (skip head) */
5690 			ptr++;
5691 		} else
5692 			return NULL;
5693 	}
5694 	return ptr;
5695 }
5696 
5697 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5698 {
5699 	union trace_eval_map_item *ptr = v;
5700 
5701 	/*
5702 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5703 	 * This really should never happen.
5704 	 */
5705 	(*pos)++;
5706 	ptr = update_eval_map(ptr);
5707 	if (WARN_ON_ONCE(!ptr))
5708 		return NULL;
5709 
5710 	ptr++;
5711 	ptr = update_eval_map(ptr);
5712 
5713 	return ptr;
5714 }
5715 
5716 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5717 {
5718 	union trace_eval_map_item *v;
5719 	loff_t l = 0;
5720 
5721 	mutex_lock(&trace_eval_mutex);
5722 
5723 	v = trace_eval_maps;
5724 	if (v)
5725 		v++;
5726 
5727 	while (v && l < *pos) {
5728 		v = eval_map_next(m, v, &l);
5729 	}
5730 
5731 	return v;
5732 }
5733 
5734 static void eval_map_stop(struct seq_file *m, void *v)
5735 {
5736 	mutex_unlock(&trace_eval_mutex);
5737 }
5738 
5739 static int eval_map_show(struct seq_file *m, void *v)
5740 {
5741 	union trace_eval_map_item *ptr = v;
5742 
5743 	seq_printf(m, "%s %ld (%s)\n",
5744 		   ptr->map.eval_string, ptr->map.eval_value,
5745 		   ptr->map.system);
5746 
5747 	return 0;
5748 }
5749 
5750 static const struct seq_operations tracing_eval_map_seq_ops = {
5751 	.start		= eval_map_start,
5752 	.next		= eval_map_next,
5753 	.stop		= eval_map_stop,
5754 	.show		= eval_map_show,
5755 };
5756 
5757 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5758 {
5759 	int ret;
5760 
5761 	ret = tracing_check_open_get_tr(NULL);
5762 	if (ret)
5763 		return ret;
5764 
5765 	return seq_open(filp, &tracing_eval_map_seq_ops);
5766 }
5767 
5768 static const struct file_operations tracing_eval_map_fops = {
5769 	.open		= tracing_eval_map_open,
5770 	.read		= seq_read,
5771 	.llseek		= seq_lseek,
5772 	.release	= seq_release,
5773 };
5774 
5775 static inline union trace_eval_map_item *
5776 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5777 {
5778 	/* Return tail of array given the head */
5779 	return ptr + ptr->head.length + 1;
5780 }
5781 
5782 static void
5783 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5784 			   int len)
5785 {
5786 	struct trace_eval_map **stop;
5787 	struct trace_eval_map **map;
5788 	union trace_eval_map_item *map_array;
5789 	union trace_eval_map_item *ptr;
5790 
5791 	stop = start + len;
5792 
5793 	/*
5794 	 * The trace_eval_maps contains the map plus a head and tail item,
5795 	 * where the head holds the module and length of array, and the
5796 	 * tail holds a pointer to the next list.
5797 	 */
5798 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5799 	if (!map_array) {
5800 		pr_warn("Unable to allocate trace eval mapping\n");
5801 		return;
5802 	}
5803 
5804 	guard(mutex)(&trace_eval_mutex);
5805 
5806 	if (!trace_eval_maps)
5807 		trace_eval_maps = map_array;
5808 	else {
5809 		ptr = trace_eval_maps;
5810 		for (;;) {
5811 			ptr = trace_eval_jmp_to_tail(ptr);
5812 			if (!ptr->tail.next)
5813 				break;
5814 			ptr = ptr->tail.next;
5815 
5816 		}
5817 		ptr->tail.next = map_array;
5818 	}
5819 	map_array->head.mod = mod;
5820 	map_array->head.length = len;
5821 	map_array++;
5822 
5823 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5824 		map_array->map = **map;
5825 		map_array++;
5826 	}
5827 	memset(map_array, 0, sizeof(*map_array));
5828 }
5829 
5830 static void trace_create_eval_file(struct dentry *d_tracer)
5831 {
5832 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5833 			  NULL, &tracing_eval_map_fops);
5834 }
5835 
5836 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5837 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5838 static inline void trace_insert_eval_map_file(struct module *mod,
5839 			      struct trace_eval_map **start, int len) { }
5840 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5841 
5842 static void trace_insert_eval_map(struct module *mod,
5843 				  struct trace_eval_map **start, int len)
5844 {
5845 	struct trace_eval_map **map;
5846 
5847 	if (len <= 0)
5848 		return;
5849 
5850 	map = start;
5851 
5852 	trace_event_eval_update(map, len);
5853 
5854 	trace_insert_eval_map_file(mod, start, len);
5855 }
5856 
5857 static ssize_t
5858 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5859 		       size_t cnt, loff_t *ppos)
5860 {
5861 	struct trace_array *tr = filp->private_data;
5862 	char buf[MAX_TRACER_SIZE+2];
5863 	int r;
5864 
5865 	mutex_lock(&trace_types_lock);
5866 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5867 	mutex_unlock(&trace_types_lock);
5868 
5869 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5870 }
5871 
5872 int tracer_init(struct tracer *t, struct trace_array *tr)
5873 {
5874 	tracing_reset_online_cpus(&tr->array_buffer);
5875 	return t->init(tr);
5876 }
5877 
5878 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5879 {
5880 	int cpu;
5881 
5882 	for_each_tracing_cpu(cpu)
5883 		per_cpu_ptr(buf->data, cpu)->entries = val;
5884 }
5885 
5886 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5887 {
5888 	if (cpu == RING_BUFFER_ALL_CPUS) {
5889 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5890 	} else {
5891 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5892 	}
5893 }
5894 
5895 #ifdef CONFIG_TRACER_MAX_TRACE
5896 /* resize @tr's buffer to the size of @size_tr's entries */
5897 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5898 					struct array_buffer *size_buf, int cpu_id)
5899 {
5900 	int cpu, ret = 0;
5901 
5902 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5903 		for_each_tracing_cpu(cpu) {
5904 			ret = ring_buffer_resize(trace_buf->buffer,
5905 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5906 			if (ret < 0)
5907 				break;
5908 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5909 				per_cpu_ptr(size_buf->data, cpu)->entries;
5910 		}
5911 	} else {
5912 		ret = ring_buffer_resize(trace_buf->buffer,
5913 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5914 		if (ret == 0)
5915 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5916 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5917 	}
5918 
5919 	return ret;
5920 }
5921 #endif /* CONFIG_TRACER_MAX_TRACE */
5922 
5923 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5924 					unsigned long size, int cpu)
5925 {
5926 	int ret;
5927 
5928 	/*
5929 	 * If kernel or user changes the size of the ring buffer
5930 	 * we use the size that was given, and we can forget about
5931 	 * expanding it later.
5932 	 */
5933 	trace_set_ring_buffer_expanded(tr);
5934 
5935 	/* May be called before buffers are initialized */
5936 	if (!tr->array_buffer.buffer)
5937 		return 0;
5938 
5939 	/* Do not allow tracing while resizing ring buffer */
5940 	tracing_stop_tr(tr);
5941 
5942 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5943 	if (ret < 0)
5944 		goto out_start;
5945 
5946 #ifdef CONFIG_TRACER_MAX_TRACE
5947 	if (!tr->allocated_snapshot)
5948 		goto out;
5949 
5950 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5951 	if (ret < 0) {
5952 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5953 						     &tr->array_buffer, cpu);
5954 		if (r < 0) {
5955 			/*
5956 			 * AARGH! We are left with different
5957 			 * size max buffer!!!!
5958 			 * The max buffer is our "snapshot" buffer.
5959 			 * When a tracer needs a snapshot (one of the
5960 			 * latency tracers), it swaps the max buffer
5961 			 * with the saved snap shot. We succeeded to
5962 			 * update the size of the main buffer, but failed to
5963 			 * update the size of the max buffer. But when we tried
5964 			 * to reset the main buffer to the original size, we
5965 			 * failed there too. This is very unlikely to
5966 			 * happen, but if it does, warn and kill all
5967 			 * tracing.
5968 			 */
5969 			WARN_ON(1);
5970 			tracing_disabled = 1;
5971 		}
5972 		goto out_start;
5973 	}
5974 
5975 	update_buffer_entries(&tr->max_buffer, cpu);
5976 
5977  out:
5978 #endif /* CONFIG_TRACER_MAX_TRACE */
5979 
5980 	update_buffer_entries(&tr->array_buffer, cpu);
5981  out_start:
5982 	tracing_start_tr(tr);
5983 	return ret;
5984 }
5985 
5986 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5987 				  unsigned long size, int cpu_id)
5988 {
5989 	guard(mutex)(&trace_types_lock);
5990 
5991 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5992 		/* make sure, this cpu is enabled in the mask */
5993 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5994 			return -EINVAL;
5995 	}
5996 
5997 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
5998 }
5999 
6000 struct trace_mod_entry {
6001 	unsigned long	mod_addr;
6002 	char		mod_name[MODULE_NAME_LEN];
6003 };
6004 
6005 struct trace_scratch {
6006 	unsigned long		text_addr;
6007 	unsigned long		nr_entries;
6008 	struct trace_mod_entry	entries[];
6009 };
6010 
6011 static DEFINE_MUTEX(scratch_mutex);
6012 
6013 static int cmp_mod_entry(const void *key, const void *pivot)
6014 {
6015 	unsigned long addr = (unsigned long)key;
6016 	const struct trace_mod_entry *ent = pivot;
6017 
6018 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6019 		return 0;
6020 	else
6021 		return addr - ent->mod_addr;
6022 }
6023 
6024 /**
6025  * trace_adjust_address() - Adjust prev boot address to current address.
6026  * @tr: Persistent ring buffer's trace_array.
6027  * @addr: Address in @tr which is adjusted.
6028  */
6029 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6030 {
6031 	struct trace_module_delta *module_delta;
6032 	struct trace_scratch *tscratch;
6033 	struct trace_mod_entry *entry;
6034 	int idx = 0, nr_entries;
6035 
6036 	/* If we don't have last boot delta, return the address */
6037 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6038 		return addr;
6039 
6040 	/* tr->module_delta must be protected by rcu. */
6041 	guard(rcu)();
6042 	tscratch = tr->scratch;
6043 	/* if there is no tscrach, module_delta must be NULL. */
6044 	module_delta = READ_ONCE(tr->module_delta);
6045 	if (!module_delta || tscratch->entries[0].mod_addr > addr)
6046 		return addr + tr->text_delta;
6047 
6048 	/* Note that entries must be sorted. */
6049 	nr_entries = tscratch->nr_entries;
6050 	if (nr_entries == 1 ||
6051 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6052 		idx = nr_entries - 1;
6053 	else {
6054 		entry = __inline_bsearch((void *)addr,
6055 				tscratch->entries,
6056 				nr_entries - 1,
6057 				sizeof(tscratch->entries[0]),
6058 				cmp_mod_entry);
6059 		if (entry)
6060 			idx = entry - tscratch->entries;
6061 	}
6062 
6063 	return addr + module_delta->delta[idx];
6064 }
6065 
6066 #ifdef CONFIG_MODULES
6067 static int save_mod(struct module *mod, void *data)
6068 {
6069 	struct trace_array *tr = data;
6070 	struct trace_scratch *tscratch;
6071 	struct trace_mod_entry *entry;
6072 	unsigned int size;
6073 
6074 	tscratch = tr->scratch;
6075 	if (!tscratch)
6076 		return -1;
6077 	size = tr->scratch_size;
6078 
6079 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6080 		return -1;
6081 
6082 	entry = &tscratch->entries[tscratch->nr_entries];
6083 
6084 	tscratch->nr_entries++;
6085 
6086 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6087 	strscpy(entry->mod_name, mod->name);
6088 
6089 	return 0;
6090 }
6091 #else
6092 static int save_mod(struct module *mod, void *data)
6093 {
6094 	return 0;
6095 }
6096 #endif
6097 
6098 static void update_last_data(struct trace_array *tr)
6099 {
6100 	struct trace_module_delta *module_delta;
6101 	struct trace_scratch *tscratch;
6102 
6103 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6104 		return;
6105 
6106 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6107 		return;
6108 
6109 	/* Only if the buffer has previous boot data clear and update it. */
6110 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6111 
6112 	/* Reset the module list and reload them */
6113 	if (tr->scratch) {
6114 		struct trace_scratch *tscratch = tr->scratch;
6115 
6116 		memset(tscratch->entries, 0,
6117 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6118 		tscratch->nr_entries = 0;
6119 
6120 		guard(mutex)(&scratch_mutex);
6121 		module_for_each_mod(save_mod, tr);
6122 	}
6123 
6124 	/*
6125 	 * Need to clear all CPU buffers as there cannot be events
6126 	 * from the previous boot mixed with events with this boot
6127 	 * as that will cause a confusing trace. Need to clear all
6128 	 * CPU buffers, even for those that may currently be offline.
6129 	 */
6130 	tracing_reset_all_cpus(&tr->array_buffer);
6131 
6132 	/* Using current data now */
6133 	tr->text_delta = 0;
6134 
6135 	if (!tr->scratch)
6136 		return;
6137 
6138 	tscratch = tr->scratch;
6139 	module_delta = READ_ONCE(tr->module_delta);
6140 	WRITE_ONCE(tr->module_delta, NULL);
6141 	kfree_rcu(module_delta, rcu);
6142 
6143 	/* Set the persistent ring buffer meta data to this address */
6144 	tscratch->text_addr = (unsigned long)_text;
6145 }
6146 
6147 /**
6148  * tracing_update_buffers - used by tracing facility to expand ring buffers
6149  * @tr: The tracing instance
6150  *
6151  * To save on memory when the tracing is never used on a system with it
6152  * configured in. The ring buffers are set to a minimum size. But once
6153  * a user starts to use the tracing facility, then they need to grow
6154  * to their default size.
6155  *
6156  * This function is to be called when a tracer is about to be used.
6157  */
6158 int tracing_update_buffers(struct trace_array *tr)
6159 {
6160 	int ret = 0;
6161 
6162 	mutex_lock(&trace_types_lock);
6163 
6164 	update_last_data(tr);
6165 
6166 	if (!tr->ring_buffer_expanded)
6167 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6168 						RING_BUFFER_ALL_CPUS);
6169 	mutex_unlock(&trace_types_lock);
6170 
6171 	return ret;
6172 }
6173 
6174 struct trace_option_dentry;
6175 
6176 static void
6177 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6178 
6179 /*
6180  * Used to clear out the tracer before deletion of an instance.
6181  * Must have trace_types_lock held.
6182  */
6183 static void tracing_set_nop(struct trace_array *tr)
6184 {
6185 	if (tr->current_trace == &nop_trace)
6186 		return;
6187 
6188 	tr->current_trace->enabled--;
6189 
6190 	if (tr->current_trace->reset)
6191 		tr->current_trace->reset(tr);
6192 
6193 	tr->current_trace = &nop_trace;
6194 }
6195 
6196 static bool tracer_options_updated;
6197 
6198 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6199 {
6200 	/* Only enable if the directory has been created already. */
6201 	if (!tr->dir)
6202 		return;
6203 
6204 	/* Only create trace option files after update_tracer_options finish */
6205 	if (!tracer_options_updated)
6206 		return;
6207 
6208 	create_trace_option_files(tr, t);
6209 }
6210 
6211 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6212 {
6213 	struct tracer *t;
6214 #ifdef CONFIG_TRACER_MAX_TRACE
6215 	bool had_max_tr;
6216 #endif
6217 	int ret;
6218 
6219 	guard(mutex)(&trace_types_lock);
6220 
6221 	update_last_data(tr);
6222 
6223 	if (!tr->ring_buffer_expanded) {
6224 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6225 						RING_BUFFER_ALL_CPUS);
6226 		if (ret < 0)
6227 			return ret;
6228 		ret = 0;
6229 	}
6230 
6231 	for (t = trace_types; t; t = t->next) {
6232 		if (strcmp(t->name, buf) == 0)
6233 			break;
6234 	}
6235 	if (!t)
6236 		return -EINVAL;
6237 
6238 	if (t == tr->current_trace)
6239 		return 0;
6240 
6241 #ifdef CONFIG_TRACER_SNAPSHOT
6242 	if (t->use_max_tr) {
6243 		local_irq_disable();
6244 		arch_spin_lock(&tr->max_lock);
6245 		ret = tr->cond_snapshot ? -EBUSY : 0;
6246 		arch_spin_unlock(&tr->max_lock);
6247 		local_irq_enable();
6248 		if (ret)
6249 			return ret;
6250 	}
6251 #endif
6252 	/* Some tracers won't work on kernel command line */
6253 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6254 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6255 			t->name);
6256 		return -EINVAL;
6257 	}
6258 
6259 	/* Some tracers are only allowed for the top level buffer */
6260 	if (!trace_ok_for_array(t, tr))
6261 		return -EINVAL;
6262 
6263 	/* If trace pipe files are being read, we can't change the tracer */
6264 	if (tr->trace_ref)
6265 		return -EBUSY;
6266 
6267 	trace_branch_disable();
6268 
6269 	tr->current_trace->enabled--;
6270 
6271 	if (tr->current_trace->reset)
6272 		tr->current_trace->reset(tr);
6273 
6274 #ifdef CONFIG_TRACER_MAX_TRACE
6275 	had_max_tr = tr->current_trace->use_max_tr;
6276 
6277 	/* Current trace needs to be nop_trace before synchronize_rcu */
6278 	tr->current_trace = &nop_trace;
6279 
6280 	if (had_max_tr && !t->use_max_tr) {
6281 		/*
6282 		 * We need to make sure that the update_max_tr sees that
6283 		 * current_trace changed to nop_trace to keep it from
6284 		 * swapping the buffers after we resize it.
6285 		 * The update_max_tr is called from interrupts disabled
6286 		 * so a synchronized_sched() is sufficient.
6287 		 */
6288 		synchronize_rcu();
6289 		free_snapshot(tr);
6290 		tracing_disarm_snapshot(tr);
6291 	}
6292 
6293 	if (!had_max_tr && t->use_max_tr) {
6294 		ret = tracing_arm_snapshot_locked(tr);
6295 		if (ret)
6296 			return ret;
6297 	}
6298 #else
6299 	tr->current_trace = &nop_trace;
6300 #endif
6301 
6302 	if (t->init) {
6303 		ret = tracer_init(t, tr);
6304 		if (ret) {
6305 #ifdef CONFIG_TRACER_MAX_TRACE
6306 			if (t->use_max_tr)
6307 				tracing_disarm_snapshot(tr);
6308 #endif
6309 			return ret;
6310 		}
6311 	}
6312 
6313 	tr->current_trace = t;
6314 	tr->current_trace->enabled++;
6315 	trace_branch_enable(tr);
6316 
6317 	return 0;
6318 }
6319 
6320 static ssize_t
6321 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6322 			size_t cnt, loff_t *ppos)
6323 {
6324 	struct trace_array *tr = filp->private_data;
6325 	char buf[MAX_TRACER_SIZE+1];
6326 	char *name;
6327 	size_t ret;
6328 	int err;
6329 
6330 	ret = cnt;
6331 
6332 	if (cnt > MAX_TRACER_SIZE)
6333 		cnt = MAX_TRACER_SIZE;
6334 
6335 	if (copy_from_user(buf, ubuf, cnt))
6336 		return -EFAULT;
6337 
6338 	buf[cnt] = 0;
6339 
6340 	name = strim(buf);
6341 
6342 	err = tracing_set_tracer(tr, name);
6343 	if (err)
6344 		return err;
6345 
6346 	*ppos += ret;
6347 
6348 	return ret;
6349 }
6350 
6351 static ssize_t
6352 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6353 		   size_t cnt, loff_t *ppos)
6354 {
6355 	char buf[64];
6356 	int r;
6357 
6358 	r = snprintf(buf, sizeof(buf), "%ld\n",
6359 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6360 	if (r > sizeof(buf))
6361 		r = sizeof(buf);
6362 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6363 }
6364 
6365 static ssize_t
6366 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6367 		    size_t cnt, loff_t *ppos)
6368 {
6369 	unsigned long val;
6370 	int ret;
6371 
6372 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6373 	if (ret)
6374 		return ret;
6375 
6376 	*ptr = val * 1000;
6377 
6378 	return cnt;
6379 }
6380 
6381 static ssize_t
6382 tracing_thresh_read(struct file *filp, char __user *ubuf,
6383 		    size_t cnt, loff_t *ppos)
6384 {
6385 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6386 }
6387 
6388 static ssize_t
6389 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6390 		     size_t cnt, loff_t *ppos)
6391 {
6392 	struct trace_array *tr = filp->private_data;
6393 	int ret;
6394 
6395 	guard(mutex)(&trace_types_lock);
6396 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6397 	if (ret < 0)
6398 		return ret;
6399 
6400 	if (tr->current_trace->update_thresh) {
6401 		ret = tr->current_trace->update_thresh(tr);
6402 		if (ret < 0)
6403 			return ret;
6404 	}
6405 
6406 	return cnt;
6407 }
6408 
6409 #ifdef CONFIG_TRACER_MAX_TRACE
6410 
6411 static ssize_t
6412 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6413 		     size_t cnt, loff_t *ppos)
6414 {
6415 	struct trace_array *tr = filp->private_data;
6416 
6417 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6418 }
6419 
6420 static ssize_t
6421 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6422 		      size_t cnt, loff_t *ppos)
6423 {
6424 	struct trace_array *tr = filp->private_data;
6425 
6426 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6427 }
6428 
6429 #endif
6430 
6431 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6432 {
6433 	if (cpu == RING_BUFFER_ALL_CPUS) {
6434 		if (cpumask_empty(tr->pipe_cpumask)) {
6435 			cpumask_setall(tr->pipe_cpumask);
6436 			return 0;
6437 		}
6438 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6439 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6440 		return 0;
6441 	}
6442 	return -EBUSY;
6443 }
6444 
6445 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6446 {
6447 	if (cpu == RING_BUFFER_ALL_CPUS) {
6448 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6449 		cpumask_clear(tr->pipe_cpumask);
6450 	} else {
6451 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6452 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6453 	}
6454 }
6455 
6456 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6457 {
6458 	struct trace_array *tr = inode->i_private;
6459 	struct trace_iterator *iter;
6460 	int cpu;
6461 	int ret;
6462 
6463 	ret = tracing_check_open_get_tr(tr);
6464 	if (ret)
6465 		return ret;
6466 
6467 	mutex_lock(&trace_types_lock);
6468 	cpu = tracing_get_cpu(inode);
6469 	ret = open_pipe_on_cpu(tr, cpu);
6470 	if (ret)
6471 		goto fail_pipe_on_cpu;
6472 
6473 	/* create a buffer to store the information to pass to userspace */
6474 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6475 	if (!iter) {
6476 		ret = -ENOMEM;
6477 		goto fail_alloc_iter;
6478 	}
6479 
6480 	trace_seq_init(&iter->seq);
6481 	iter->trace = tr->current_trace;
6482 
6483 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6484 		ret = -ENOMEM;
6485 		goto fail;
6486 	}
6487 
6488 	/* trace pipe does not show start of buffer */
6489 	cpumask_setall(iter->started);
6490 
6491 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6492 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6493 
6494 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6495 	if (trace_clocks[tr->clock_id].in_ns)
6496 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6497 
6498 	iter->tr = tr;
6499 	iter->array_buffer = &tr->array_buffer;
6500 	iter->cpu_file = cpu;
6501 	mutex_init(&iter->mutex);
6502 	filp->private_data = iter;
6503 
6504 	if (iter->trace->pipe_open)
6505 		iter->trace->pipe_open(iter);
6506 
6507 	nonseekable_open(inode, filp);
6508 
6509 	tr->trace_ref++;
6510 
6511 	mutex_unlock(&trace_types_lock);
6512 	return ret;
6513 
6514 fail:
6515 	kfree(iter);
6516 fail_alloc_iter:
6517 	close_pipe_on_cpu(tr, cpu);
6518 fail_pipe_on_cpu:
6519 	__trace_array_put(tr);
6520 	mutex_unlock(&trace_types_lock);
6521 	return ret;
6522 }
6523 
6524 static int tracing_release_pipe(struct inode *inode, struct file *file)
6525 {
6526 	struct trace_iterator *iter = file->private_data;
6527 	struct trace_array *tr = inode->i_private;
6528 
6529 	mutex_lock(&trace_types_lock);
6530 
6531 	tr->trace_ref--;
6532 
6533 	if (iter->trace->pipe_close)
6534 		iter->trace->pipe_close(iter);
6535 	close_pipe_on_cpu(tr, iter->cpu_file);
6536 	mutex_unlock(&trace_types_lock);
6537 
6538 	free_trace_iter_content(iter);
6539 	kfree(iter);
6540 
6541 	trace_array_put(tr);
6542 
6543 	return 0;
6544 }
6545 
6546 static __poll_t
6547 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6548 {
6549 	struct trace_array *tr = iter->tr;
6550 
6551 	/* Iterators are static, they should be filled or empty */
6552 	if (trace_buffer_iter(iter, iter->cpu_file))
6553 		return EPOLLIN | EPOLLRDNORM;
6554 
6555 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6556 		/*
6557 		 * Always select as readable when in blocking mode
6558 		 */
6559 		return EPOLLIN | EPOLLRDNORM;
6560 	else
6561 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6562 					     filp, poll_table, iter->tr->buffer_percent);
6563 }
6564 
6565 static __poll_t
6566 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6567 {
6568 	struct trace_iterator *iter = filp->private_data;
6569 
6570 	return trace_poll(iter, filp, poll_table);
6571 }
6572 
6573 /* Must be called with iter->mutex held. */
6574 static int tracing_wait_pipe(struct file *filp)
6575 {
6576 	struct trace_iterator *iter = filp->private_data;
6577 	int ret;
6578 
6579 	while (trace_empty(iter)) {
6580 
6581 		if ((filp->f_flags & O_NONBLOCK)) {
6582 			return -EAGAIN;
6583 		}
6584 
6585 		/*
6586 		 * We block until we read something and tracing is disabled.
6587 		 * We still block if tracing is disabled, but we have never
6588 		 * read anything. This allows a user to cat this file, and
6589 		 * then enable tracing. But after we have read something,
6590 		 * we give an EOF when tracing is again disabled.
6591 		 *
6592 		 * iter->pos will be 0 if we haven't read anything.
6593 		 */
6594 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6595 			break;
6596 
6597 		mutex_unlock(&iter->mutex);
6598 
6599 		ret = wait_on_pipe(iter, 0);
6600 
6601 		mutex_lock(&iter->mutex);
6602 
6603 		if (ret)
6604 			return ret;
6605 	}
6606 
6607 	return 1;
6608 }
6609 
6610 /*
6611  * Consumer reader.
6612  */
6613 static ssize_t
6614 tracing_read_pipe(struct file *filp, char __user *ubuf,
6615 		  size_t cnt, loff_t *ppos)
6616 {
6617 	struct trace_iterator *iter = filp->private_data;
6618 	ssize_t sret;
6619 
6620 	/*
6621 	 * Avoid more than one consumer on a single file descriptor
6622 	 * This is just a matter of traces coherency, the ring buffer itself
6623 	 * is protected.
6624 	 */
6625 	guard(mutex)(&iter->mutex);
6626 
6627 	/* return any leftover data */
6628 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6629 	if (sret != -EBUSY)
6630 		return sret;
6631 
6632 	trace_seq_init(&iter->seq);
6633 
6634 	if (iter->trace->read) {
6635 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6636 		if (sret)
6637 			return sret;
6638 	}
6639 
6640 waitagain:
6641 	sret = tracing_wait_pipe(filp);
6642 	if (sret <= 0)
6643 		return sret;
6644 
6645 	/* stop when tracing is finished */
6646 	if (trace_empty(iter))
6647 		return 0;
6648 
6649 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6650 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6651 
6652 	/* reset all but tr, trace, and overruns */
6653 	trace_iterator_reset(iter);
6654 	cpumask_clear(iter->started);
6655 	trace_seq_init(&iter->seq);
6656 
6657 	trace_event_read_lock();
6658 	trace_access_lock(iter->cpu_file);
6659 	while (trace_find_next_entry_inc(iter) != NULL) {
6660 		enum print_line_t ret;
6661 		int save_len = iter->seq.seq.len;
6662 
6663 		ret = print_trace_line(iter);
6664 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6665 			/*
6666 			 * If one print_trace_line() fills entire trace_seq in one shot,
6667 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6668 			 * In this case, we need to consume it, otherwise, loop will peek
6669 			 * this event next time, resulting in an infinite loop.
6670 			 */
6671 			if (save_len == 0) {
6672 				iter->seq.full = 0;
6673 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6674 				trace_consume(iter);
6675 				break;
6676 			}
6677 
6678 			/* In other cases, don't print partial lines */
6679 			iter->seq.seq.len = save_len;
6680 			break;
6681 		}
6682 		if (ret != TRACE_TYPE_NO_CONSUME)
6683 			trace_consume(iter);
6684 
6685 		if (trace_seq_used(&iter->seq) >= cnt)
6686 			break;
6687 
6688 		/*
6689 		 * Setting the full flag means we reached the trace_seq buffer
6690 		 * size and we should leave by partial output condition above.
6691 		 * One of the trace_seq_* functions is not used properly.
6692 		 */
6693 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6694 			  iter->ent->type);
6695 	}
6696 	trace_access_unlock(iter->cpu_file);
6697 	trace_event_read_unlock();
6698 
6699 	/* Now copy what we have to the user */
6700 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6701 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6702 		trace_seq_init(&iter->seq);
6703 
6704 	/*
6705 	 * If there was nothing to send to user, in spite of consuming trace
6706 	 * entries, go back to wait for more entries.
6707 	 */
6708 	if (sret == -EBUSY)
6709 		goto waitagain;
6710 
6711 	return sret;
6712 }
6713 
6714 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6715 				     unsigned int idx)
6716 {
6717 	__free_page(spd->pages[idx]);
6718 }
6719 
6720 static size_t
6721 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6722 {
6723 	size_t count;
6724 	int save_len;
6725 	int ret;
6726 
6727 	/* Seq buffer is page-sized, exactly what we need. */
6728 	for (;;) {
6729 		save_len = iter->seq.seq.len;
6730 		ret = print_trace_line(iter);
6731 
6732 		if (trace_seq_has_overflowed(&iter->seq)) {
6733 			iter->seq.seq.len = save_len;
6734 			break;
6735 		}
6736 
6737 		/*
6738 		 * This should not be hit, because it should only
6739 		 * be set if the iter->seq overflowed. But check it
6740 		 * anyway to be safe.
6741 		 */
6742 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6743 			iter->seq.seq.len = save_len;
6744 			break;
6745 		}
6746 
6747 		count = trace_seq_used(&iter->seq) - save_len;
6748 		if (rem < count) {
6749 			rem = 0;
6750 			iter->seq.seq.len = save_len;
6751 			break;
6752 		}
6753 
6754 		if (ret != TRACE_TYPE_NO_CONSUME)
6755 			trace_consume(iter);
6756 		rem -= count;
6757 		if (!trace_find_next_entry_inc(iter))	{
6758 			rem = 0;
6759 			iter->ent = NULL;
6760 			break;
6761 		}
6762 	}
6763 
6764 	return rem;
6765 }
6766 
6767 static ssize_t tracing_splice_read_pipe(struct file *filp,
6768 					loff_t *ppos,
6769 					struct pipe_inode_info *pipe,
6770 					size_t len,
6771 					unsigned int flags)
6772 {
6773 	struct page *pages_def[PIPE_DEF_BUFFERS];
6774 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6775 	struct trace_iterator *iter = filp->private_data;
6776 	struct splice_pipe_desc spd = {
6777 		.pages		= pages_def,
6778 		.partial	= partial_def,
6779 		.nr_pages	= 0, /* This gets updated below. */
6780 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6781 		.ops		= &default_pipe_buf_ops,
6782 		.spd_release	= tracing_spd_release_pipe,
6783 	};
6784 	ssize_t ret;
6785 	size_t rem;
6786 	unsigned int i;
6787 
6788 	if (splice_grow_spd(pipe, &spd))
6789 		return -ENOMEM;
6790 
6791 	mutex_lock(&iter->mutex);
6792 
6793 	if (iter->trace->splice_read) {
6794 		ret = iter->trace->splice_read(iter, filp,
6795 					       ppos, pipe, len, flags);
6796 		if (ret)
6797 			goto out_err;
6798 	}
6799 
6800 	ret = tracing_wait_pipe(filp);
6801 	if (ret <= 0)
6802 		goto out_err;
6803 
6804 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6805 		ret = -EFAULT;
6806 		goto out_err;
6807 	}
6808 
6809 	trace_event_read_lock();
6810 	trace_access_lock(iter->cpu_file);
6811 
6812 	/* Fill as many pages as possible. */
6813 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6814 		spd.pages[i] = alloc_page(GFP_KERNEL);
6815 		if (!spd.pages[i])
6816 			break;
6817 
6818 		rem = tracing_fill_pipe_page(rem, iter);
6819 
6820 		/* Copy the data into the page, so we can start over. */
6821 		ret = trace_seq_to_buffer(&iter->seq,
6822 					  page_address(spd.pages[i]),
6823 					  trace_seq_used(&iter->seq));
6824 		if (ret < 0) {
6825 			__free_page(spd.pages[i]);
6826 			break;
6827 		}
6828 		spd.partial[i].offset = 0;
6829 		spd.partial[i].len = trace_seq_used(&iter->seq);
6830 
6831 		trace_seq_init(&iter->seq);
6832 	}
6833 
6834 	trace_access_unlock(iter->cpu_file);
6835 	trace_event_read_unlock();
6836 	mutex_unlock(&iter->mutex);
6837 
6838 	spd.nr_pages = i;
6839 
6840 	if (i)
6841 		ret = splice_to_pipe(pipe, &spd);
6842 	else
6843 		ret = 0;
6844 out:
6845 	splice_shrink_spd(&spd);
6846 	return ret;
6847 
6848 out_err:
6849 	mutex_unlock(&iter->mutex);
6850 	goto out;
6851 }
6852 
6853 static ssize_t
6854 tracing_entries_read(struct file *filp, char __user *ubuf,
6855 		     size_t cnt, loff_t *ppos)
6856 {
6857 	struct inode *inode = file_inode(filp);
6858 	struct trace_array *tr = inode->i_private;
6859 	int cpu = tracing_get_cpu(inode);
6860 	char buf[64];
6861 	int r = 0;
6862 	ssize_t ret;
6863 
6864 	mutex_lock(&trace_types_lock);
6865 
6866 	if (cpu == RING_BUFFER_ALL_CPUS) {
6867 		int cpu, buf_size_same;
6868 		unsigned long size;
6869 
6870 		size = 0;
6871 		buf_size_same = 1;
6872 		/* check if all cpu sizes are same */
6873 		for_each_tracing_cpu(cpu) {
6874 			/* fill in the size from first enabled cpu */
6875 			if (size == 0)
6876 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6877 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6878 				buf_size_same = 0;
6879 				break;
6880 			}
6881 		}
6882 
6883 		if (buf_size_same) {
6884 			if (!tr->ring_buffer_expanded)
6885 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6886 					    size >> 10,
6887 					    trace_buf_size >> 10);
6888 			else
6889 				r = sprintf(buf, "%lu\n", size >> 10);
6890 		} else
6891 			r = sprintf(buf, "X\n");
6892 	} else
6893 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6894 
6895 	mutex_unlock(&trace_types_lock);
6896 
6897 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6898 	return ret;
6899 }
6900 
6901 static ssize_t
6902 tracing_entries_write(struct file *filp, const char __user *ubuf,
6903 		      size_t cnt, loff_t *ppos)
6904 {
6905 	struct inode *inode = file_inode(filp);
6906 	struct trace_array *tr = inode->i_private;
6907 	unsigned long val;
6908 	int ret;
6909 
6910 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6911 	if (ret)
6912 		return ret;
6913 
6914 	/* must have at least 1 entry */
6915 	if (!val)
6916 		return -EINVAL;
6917 
6918 	/* value is in KB */
6919 	val <<= 10;
6920 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6921 	if (ret < 0)
6922 		return ret;
6923 
6924 	*ppos += cnt;
6925 
6926 	return cnt;
6927 }
6928 
6929 static ssize_t
6930 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6931 				size_t cnt, loff_t *ppos)
6932 {
6933 	struct trace_array *tr = filp->private_data;
6934 	char buf[64];
6935 	int r, cpu;
6936 	unsigned long size = 0, expanded_size = 0;
6937 
6938 	mutex_lock(&trace_types_lock);
6939 	for_each_tracing_cpu(cpu) {
6940 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6941 		if (!tr->ring_buffer_expanded)
6942 			expanded_size += trace_buf_size >> 10;
6943 	}
6944 	if (tr->ring_buffer_expanded)
6945 		r = sprintf(buf, "%lu\n", size);
6946 	else
6947 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6948 	mutex_unlock(&trace_types_lock);
6949 
6950 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6951 }
6952 
6953 #define LAST_BOOT_HEADER ((void *)1)
6954 
6955 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6956 {
6957 	struct trace_array *tr = m->private;
6958 	struct trace_scratch *tscratch = tr->scratch;
6959 	unsigned int index = *pos;
6960 
6961 	(*pos)++;
6962 
6963 	if (*pos == 1)
6964 		return LAST_BOOT_HEADER;
6965 
6966 	/* Only show offsets of the last boot data */
6967 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6968 		return NULL;
6969 
6970 	/* *pos 0 is for the header, 1 is for the first module */
6971 	index--;
6972 
6973 	if (index >= tscratch->nr_entries)
6974 		return NULL;
6975 
6976 	return &tscratch->entries[index];
6977 }
6978 
6979 static void *l_start(struct seq_file *m, loff_t *pos)
6980 {
6981 	mutex_lock(&scratch_mutex);
6982 
6983 	return l_next(m, NULL, pos);
6984 }
6985 
6986 static void l_stop(struct seq_file *m, void *p)
6987 {
6988 	mutex_unlock(&scratch_mutex);
6989 }
6990 
6991 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6992 {
6993 	struct trace_scratch *tscratch = tr->scratch;
6994 
6995 	/*
6996 	 * Do not leak KASLR address. This only shows the KASLR address of
6997 	 * the last boot. When the ring buffer is started, the LAST_BOOT
6998 	 * flag gets cleared, and this should only report "current".
6999 	 * Otherwise it shows the KASLR address from the previous boot which
7000 	 * should not be the same as the current boot.
7001 	 */
7002 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7003 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7004 	else
7005 		seq_puts(m, "# Current\n");
7006 }
7007 
7008 static int l_show(struct seq_file *m, void *v)
7009 {
7010 	struct trace_array *tr = m->private;
7011 	struct trace_mod_entry *entry = v;
7012 
7013 	if (v == LAST_BOOT_HEADER) {
7014 		show_last_boot_header(m, tr);
7015 		return 0;
7016 	}
7017 
7018 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7019 	return 0;
7020 }
7021 
7022 static const struct seq_operations last_boot_seq_ops = {
7023 	.start		= l_start,
7024 	.next		= l_next,
7025 	.stop		= l_stop,
7026 	.show		= l_show,
7027 };
7028 
7029 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7030 {
7031 	struct trace_array *tr = inode->i_private;
7032 	struct seq_file *m;
7033 	int ret;
7034 
7035 	ret = tracing_check_open_get_tr(tr);
7036 	if (ret)
7037 		return ret;
7038 
7039 	ret = seq_open(file, &last_boot_seq_ops);
7040 	if (ret) {
7041 		trace_array_put(tr);
7042 		return ret;
7043 	}
7044 
7045 	m = file->private_data;
7046 	m->private = tr;
7047 
7048 	return 0;
7049 }
7050 
7051 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7052 {
7053 	struct trace_array *tr = inode->i_private;
7054 	int cpu = tracing_get_cpu(inode);
7055 	int ret;
7056 
7057 	ret = tracing_check_open_get_tr(tr);
7058 	if (ret)
7059 		return ret;
7060 
7061 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7062 	if (ret < 0)
7063 		__trace_array_put(tr);
7064 	return ret;
7065 }
7066 
7067 static ssize_t
7068 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7069 			  size_t cnt, loff_t *ppos)
7070 {
7071 	/*
7072 	 * There is no need to read what the user has written, this function
7073 	 * is just to make sure that there is no error when "echo" is used
7074 	 */
7075 
7076 	*ppos += cnt;
7077 
7078 	return cnt;
7079 }
7080 
7081 static int
7082 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7083 {
7084 	struct trace_array *tr = inode->i_private;
7085 
7086 	/* disable tracing ? */
7087 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7088 		tracer_tracing_off(tr);
7089 	/* resize the ring buffer to 0 */
7090 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7091 
7092 	trace_array_put(tr);
7093 
7094 	return 0;
7095 }
7096 
7097 #define TRACE_MARKER_MAX_SIZE		4096
7098 
7099 static ssize_t
7100 tracing_mark_write(struct file *filp, const char __user *ubuf,
7101 					size_t cnt, loff_t *fpos)
7102 {
7103 	struct trace_array *tr = filp->private_data;
7104 	struct ring_buffer_event *event;
7105 	enum event_trigger_type tt = ETT_NONE;
7106 	struct trace_buffer *buffer;
7107 	struct print_entry *entry;
7108 	int meta_size;
7109 	ssize_t written;
7110 	size_t size;
7111 	int len;
7112 
7113 /* Used in tracing_mark_raw_write() as well */
7114 #define FAULTED_STR "<faulted>"
7115 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7116 
7117 	if (tracing_disabled)
7118 		return -EINVAL;
7119 
7120 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7121 		return -EINVAL;
7122 
7123 	if ((ssize_t)cnt < 0)
7124 		return -EINVAL;
7125 
7126 	if (cnt > TRACE_MARKER_MAX_SIZE)
7127 		cnt = TRACE_MARKER_MAX_SIZE;
7128 
7129 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7130  again:
7131 	size = cnt + meta_size;
7132 
7133 	/* If less than "<faulted>", then make sure we can still add that */
7134 	if (cnt < FAULTED_SIZE)
7135 		size += FAULTED_SIZE - cnt;
7136 
7137 	buffer = tr->array_buffer.buffer;
7138 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7139 					    tracing_gen_ctx());
7140 	if (unlikely(!event)) {
7141 		/*
7142 		 * If the size was greater than what was allowed, then
7143 		 * make it smaller and try again.
7144 		 */
7145 		if (size > ring_buffer_max_event_size(buffer)) {
7146 			/* cnt < FAULTED size should never be bigger than max */
7147 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7148 				return -EBADF;
7149 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7150 			/* The above should only happen once */
7151 			if (WARN_ON_ONCE(cnt + meta_size == size))
7152 				return -EBADF;
7153 			goto again;
7154 		}
7155 
7156 		/* Ring buffer disabled, return as if not open for write */
7157 		return -EBADF;
7158 	}
7159 
7160 	entry = ring_buffer_event_data(event);
7161 	entry->ip = _THIS_IP_;
7162 
7163 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7164 	if (len) {
7165 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7166 		cnt = FAULTED_SIZE;
7167 		written = -EFAULT;
7168 	} else
7169 		written = cnt;
7170 
7171 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7172 		/* do not add \n before testing triggers, but add \0 */
7173 		entry->buf[cnt] = '\0';
7174 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7175 	}
7176 
7177 	if (entry->buf[cnt - 1] != '\n') {
7178 		entry->buf[cnt] = '\n';
7179 		entry->buf[cnt + 1] = '\0';
7180 	} else
7181 		entry->buf[cnt] = '\0';
7182 
7183 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7184 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7185 	__buffer_unlock_commit(buffer, event);
7186 
7187 	if (tt)
7188 		event_triggers_post_call(tr->trace_marker_file, tt);
7189 
7190 	return written;
7191 }
7192 
7193 static ssize_t
7194 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7195 					size_t cnt, loff_t *fpos)
7196 {
7197 	struct trace_array *tr = filp->private_data;
7198 	struct ring_buffer_event *event;
7199 	struct trace_buffer *buffer;
7200 	struct raw_data_entry *entry;
7201 	ssize_t written;
7202 	int size;
7203 	int len;
7204 
7205 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7206 
7207 	if (tracing_disabled)
7208 		return -EINVAL;
7209 
7210 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7211 		return -EINVAL;
7212 
7213 	/* The marker must at least have a tag id */
7214 	if (cnt < sizeof(unsigned int))
7215 		return -EINVAL;
7216 
7217 	size = sizeof(*entry) + cnt;
7218 	if (cnt < FAULT_SIZE_ID)
7219 		size += FAULT_SIZE_ID - cnt;
7220 
7221 	buffer = tr->array_buffer.buffer;
7222 
7223 	if (size > ring_buffer_max_event_size(buffer))
7224 		return -EINVAL;
7225 
7226 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7227 					    tracing_gen_ctx());
7228 	if (!event)
7229 		/* Ring buffer disabled, return as if not open for write */
7230 		return -EBADF;
7231 
7232 	entry = ring_buffer_event_data(event);
7233 
7234 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7235 	if (len) {
7236 		entry->id = -1;
7237 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7238 		written = -EFAULT;
7239 	} else
7240 		written = cnt;
7241 
7242 	__buffer_unlock_commit(buffer, event);
7243 
7244 	return written;
7245 }
7246 
7247 static int tracing_clock_show(struct seq_file *m, void *v)
7248 {
7249 	struct trace_array *tr = m->private;
7250 	int i;
7251 
7252 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7253 		seq_printf(m,
7254 			"%s%s%s%s", i ? " " : "",
7255 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7256 			i == tr->clock_id ? "]" : "");
7257 	seq_putc(m, '\n');
7258 
7259 	return 0;
7260 }
7261 
7262 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7263 {
7264 	int i;
7265 
7266 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7267 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7268 			break;
7269 	}
7270 	if (i == ARRAY_SIZE(trace_clocks))
7271 		return -EINVAL;
7272 
7273 	mutex_lock(&trace_types_lock);
7274 
7275 	tr->clock_id = i;
7276 
7277 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7278 
7279 	/*
7280 	 * New clock may not be consistent with the previous clock.
7281 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7282 	 */
7283 	tracing_reset_online_cpus(&tr->array_buffer);
7284 
7285 #ifdef CONFIG_TRACER_MAX_TRACE
7286 	if (tr->max_buffer.buffer)
7287 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7288 	tracing_reset_online_cpus(&tr->max_buffer);
7289 #endif
7290 
7291 	mutex_unlock(&trace_types_lock);
7292 
7293 	return 0;
7294 }
7295 
7296 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7297 				   size_t cnt, loff_t *fpos)
7298 {
7299 	struct seq_file *m = filp->private_data;
7300 	struct trace_array *tr = m->private;
7301 	char buf[64];
7302 	const char *clockstr;
7303 	int ret;
7304 
7305 	if (cnt >= sizeof(buf))
7306 		return -EINVAL;
7307 
7308 	if (copy_from_user(buf, ubuf, cnt))
7309 		return -EFAULT;
7310 
7311 	buf[cnt] = 0;
7312 
7313 	clockstr = strstrip(buf);
7314 
7315 	ret = tracing_set_clock(tr, clockstr);
7316 	if (ret)
7317 		return ret;
7318 
7319 	*fpos += cnt;
7320 
7321 	return cnt;
7322 }
7323 
7324 static int tracing_clock_open(struct inode *inode, struct file *file)
7325 {
7326 	struct trace_array *tr = inode->i_private;
7327 	int ret;
7328 
7329 	ret = tracing_check_open_get_tr(tr);
7330 	if (ret)
7331 		return ret;
7332 
7333 	ret = single_open(file, tracing_clock_show, inode->i_private);
7334 	if (ret < 0)
7335 		trace_array_put(tr);
7336 
7337 	return ret;
7338 }
7339 
7340 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7341 {
7342 	struct trace_array *tr = m->private;
7343 
7344 	mutex_lock(&trace_types_lock);
7345 
7346 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7347 		seq_puts(m, "delta [absolute]\n");
7348 	else
7349 		seq_puts(m, "[delta] absolute\n");
7350 
7351 	mutex_unlock(&trace_types_lock);
7352 
7353 	return 0;
7354 }
7355 
7356 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7357 {
7358 	struct trace_array *tr = inode->i_private;
7359 	int ret;
7360 
7361 	ret = tracing_check_open_get_tr(tr);
7362 	if (ret)
7363 		return ret;
7364 
7365 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7366 	if (ret < 0)
7367 		trace_array_put(tr);
7368 
7369 	return ret;
7370 }
7371 
7372 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7373 {
7374 	if (rbe == this_cpu_read(trace_buffered_event))
7375 		return ring_buffer_time_stamp(buffer);
7376 
7377 	return ring_buffer_event_time_stamp(buffer, rbe);
7378 }
7379 
7380 /*
7381  * Set or disable using the per CPU trace_buffer_event when possible.
7382  */
7383 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7384 {
7385 	guard(mutex)(&trace_types_lock);
7386 
7387 	if (set && tr->no_filter_buffering_ref++)
7388 		return 0;
7389 
7390 	if (!set) {
7391 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7392 			return -EINVAL;
7393 
7394 		--tr->no_filter_buffering_ref;
7395 	}
7396 
7397 	return 0;
7398 }
7399 
7400 struct ftrace_buffer_info {
7401 	struct trace_iterator	iter;
7402 	void			*spare;
7403 	unsigned int		spare_cpu;
7404 	unsigned int		spare_size;
7405 	unsigned int		read;
7406 };
7407 
7408 #ifdef CONFIG_TRACER_SNAPSHOT
7409 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7410 {
7411 	struct trace_array *tr = inode->i_private;
7412 	struct trace_iterator *iter;
7413 	struct seq_file *m;
7414 	int ret;
7415 
7416 	ret = tracing_check_open_get_tr(tr);
7417 	if (ret)
7418 		return ret;
7419 
7420 	if (file->f_mode & FMODE_READ) {
7421 		iter = __tracing_open(inode, file, true);
7422 		if (IS_ERR(iter))
7423 			ret = PTR_ERR(iter);
7424 	} else {
7425 		/* Writes still need the seq_file to hold the private data */
7426 		ret = -ENOMEM;
7427 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7428 		if (!m)
7429 			goto out;
7430 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7431 		if (!iter) {
7432 			kfree(m);
7433 			goto out;
7434 		}
7435 		ret = 0;
7436 
7437 		iter->tr = tr;
7438 		iter->array_buffer = &tr->max_buffer;
7439 		iter->cpu_file = tracing_get_cpu(inode);
7440 		m->private = iter;
7441 		file->private_data = m;
7442 	}
7443 out:
7444 	if (ret < 0)
7445 		trace_array_put(tr);
7446 
7447 	return ret;
7448 }
7449 
7450 static void tracing_swap_cpu_buffer(void *tr)
7451 {
7452 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7453 }
7454 
7455 static ssize_t
7456 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7457 		       loff_t *ppos)
7458 {
7459 	struct seq_file *m = filp->private_data;
7460 	struct trace_iterator *iter = m->private;
7461 	struct trace_array *tr = iter->tr;
7462 	unsigned long val;
7463 	int ret;
7464 
7465 	ret = tracing_update_buffers(tr);
7466 	if (ret < 0)
7467 		return ret;
7468 
7469 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7470 	if (ret)
7471 		return ret;
7472 
7473 	guard(mutex)(&trace_types_lock);
7474 
7475 	if (tr->current_trace->use_max_tr)
7476 		return -EBUSY;
7477 
7478 	local_irq_disable();
7479 	arch_spin_lock(&tr->max_lock);
7480 	if (tr->cond_snapshot)
7481 		ret = -EBUSY;
7482 	arch_spin_unlock(&tr->max_lock);
7483 	local_irq_enable();
7484 	if (ret)
7485 		return ret;
7486 
7487 	switch (val) {
7488 	case 0:
7489 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7490 			return -EINVAL;
7491 		if (tr->allocated_snapshot)
7492 			free_snapshot(tr);
7493 		break;
7494 	case 1:
7495 /* Only allow per-cpu swap if the ring buffer supports it */
7496 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7497 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7498 			return -EINVAL;
7499 #endif
7500 		if (tr->allocated_snapshot)
7501 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7502 					&tr->array_buffer, iter->cpu_file);
7503 
7504 		ret = tracing_arm_snapshot_locked(tr);
7505 		if (ret)
7506 			return ret;
7507 
7508 		/* Now, we're going to swap */
7509 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7510 			local_irq_disable();
7511 			update_max_tr(tr, current, smp_processor_id(), NULL);
7512 			local_irq_enable();
7513 		} else {
7514 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7515 						 (void *)tr, 1);
7516 		}
7517 		tracing_disarm_snapshot(tr);
7518 		break;
7519 	default:
7520 		if (tr->allocated_snapshot) {
7521 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7522 				tracing_reset_online_cpus(&tr->max_buffer);
7523 			else
7524 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7525 		}
7526 		break;
7527 	}
7528 
7529 	if (ret >= 0) {
7530 		*ppos += cnt;
7531 		ret = cnt;
7532 	}
7533 
7534 	return ret;
7535 }
7536 
7537 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7538 {
7539 	struct seq_file *m = file->private_data;
7540 	int ret;
7541 
7542 	ret = tracing_release(inode, file);
7543 
7544 	if (file->f_mode & FMODE_READ)
7545 		return ret;
7546 
7547 	/* If write only, the seq_file is just a stub */
7548 	if (m)
7549 		kfree(m->private);
7550 	kfree(m);
7551 
7552 	return 0;
7553 }
7554 
7555 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7556 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7557 				    size_t count, loff_t *ppos);
7558 static int tracing_buffers_release(struct inode *inode, struct file *file);
7559 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7560 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7561 
7562 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7563 {
7564 	struct ftrace_buffer_info *info;
7565 	int ret;
7566 
7567 	/* The following checks for tracefs lockdown */
7568 	ret = tracing_buffers_open(inode, filp);
7569 	if (ret < 0)
7570 		return ret;
7571 
7572 	info = filp->private_data;
7573 
7574 	if (info->iter.trace->use_max_tr) {
7575 		tracing_buffers_release(inode, filp);
7576 		return -EBUSY;
7577 	}
7578 
7579 	info->iter.snapshot = true;
7580 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7581 
7582 	return ret;
7583 }
7584 
7585 #endif /* CONFIG_TRACER_SNAPSHOT */
7586 
7587 
7588 static const struct file_operations tracing_thresh_fops = {
7589 	.open		= tracing_open_generic,
7590 	.read		= tracing_thresh_read,
7591 	.write		= tracing_thresh_write,
7592 	.llseek		= generic_file_llseek,
7593 };
7594 
7595 #ifdef CONFIG_TRACER_MAX_TRACE
7596 static const struct file_operations tracing_max_lat_fops = {
7597 	.open		= tracing_open_generic_tr,
7598 	.read		= tracing_max_lat_read,
7599 	.write		= tracing_max_lat_write,
7600 	.llseek		= generic_file_llseek,
7601 	.release	= tracing_release_generic_tr,
7602 };
7603 #endif
7604 
7605 static const struct file_operations set_tracer_fops = {
7606 	.open		= tracing_open_generic_tr,
7607 	.read		= tracing_set_trace_read,
7608 	.write		= tracing_set_trace_write,
7609 	.llseek		= generic_file_llseek,
7610 	.release	= tracing_release_generic_tr,
7611 };
7612 
7613 static const struct file_operations tracing_pipe_fops = {
7614 	.open		= tracing_open_pipe,
7615 	.poll		= tracing_poll_pipe,
7616 	.read		= tracing_read_pipe,
7617 	.splice_read	= tracing_splice_read_pipe,
7618 	.release	= tracing_release_pipe,
7619 };
7620 
7621 static const struct file_operations tracing_entries_fops = {
7622 	.open		= tracing_open_generic_tr,
7623 	.read		= tracing_entries_read,
7624 	.write		= tracing_entries_write,
7625 	.llseek		= generic_file_llseek,
7626 	.release	= tracing_release_generic_tr,
7627 };
7628 
7629 static const struct file_operations tracing_buffer_meta_fops = {
7630 	.open		= tracing_buffer_meta_open,
7631 	.read		= seq_read,
7632 	.llseek		= seq_lseek,
7633 	.release	= tracing_seq_release,
7634 };
7635 
7636 static const struct file_operations tracing_total_entries_fops = {
7637 	.open		= tracing_open_generic_tr,
7638 	.read		= tracing_total_entries_read,
7639 	.llseek		= generic_file_llseek,
7640 	.release	= tracing_release_generic_tr,
7641 };
7642 
7643 static const struct file_operations tracing_free_buffer_fops = {
7644 	.open		= tracing_open_generic_tr,
7645 	.write		= tracing_free_buffer_write,
7646 	.release	= tracing_free_buffer_release,
7647 };
7648 
7649 static const struct file_operations tracing_mark_fops = {
7650 	.open		= tracing_mark_open,
7651 	.write		= tracing_mark_write,
7652 	.release	= tracing_release_generic_tr,
7653 };
7654 
7655 static const struct file_operations tracing_mark_raw_fops = {
7656 	.open		= tracing_mark_open,
7657 	.write		= tracing_mark_raw_write,
7658 	.release	= tracing_release_generic_tr,
7659 };
7660 
7661 static const struct file_operations trace_clock_fops = {
7662 	.open		= tracing_clock_open,
7663 	.read		= seq_read,
7664 	.llseek		= seq_lseek,
7665 	.release	= tracing_single_release_tr,
7666 	.write		= tracing_clock_write,
7667 };
7668 
7669 static const struct file_operations trace_time_stamp_mode_fops = {
7670 	.open		= tracing_time_stamp_mode_open,
7671 	.read		= seq_read,
7672 	.llseek		= seq_lseek,
7673 	.release	= tracing_single_release_tr,
7674 };
7675 
7676 static const struct file_operations last_boot_fops = {
7677 	.open		= tracing_last_boot_open,
7678 	.read		= seq_read,
7679 	.llseek		= seq_lseek,
7680 	.release	= tracing_seq_release,
7681 };
7682 
7683 #ifdef CONFIG_TRACER_SNAPSHOT
7684 static const struct file_operations snapshot_fops = {
7685 	.open		= tracing_snapshot_open,
7686 	.read		= seq_read,
7687 	.write		= tracing_snapshot_write,
7688 	.llseek		= tracing_lseek,
7689 	.release	= tracing_snapshot_release,
7690 };
7691 
7692 static const struct file_operations snapshot_raw_fops = {
7693 	.open		= snapshot_raw_open,
7694 	.read		= tracing_buffers_read,
7695 	.release	= tracing_buffers_release,
7696 	.splice_read	= tracing_buffers_splice_read,
7697 };
7698 
7699 #endif /* CONFIG_TRACER_SNAPSHOT */
7700 
7701 /*
7702  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7703  * @filp: The active open file structure
7704  * @ubuf: The userspace provided buffer to read value into
7705  * @cnt: The maximum number of bytes to read
7706  * @ppos: The current "file" position
7707  *
7708  * This function implements the write interface for a struct trace_min_max_param.
7709  * The filp->private_data must point to a trace_min_max_param structure that
7710  * defines where to write the value, the min and the max acceptable values,
7711  * and a lock to protect the write.
7712  */
7713 static ssize_t
7714 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7715 {
7716 	struct trace_min_max_param *param = filp->private_data;
7717 	u64 val;
7718 	int err;
7719 
7720 	if (!param)
7721 		return -EFAULT;
7722 
7723 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7724 	if (err)
7725 		return err;
7726 
7727 	if (param->lock)
7728 		mutex_lock(param->lock);
7729 
7730 	if (param->min && val < *param->min)
7731 		err = -EINVAL;
7732 
7733 	if (param->max && val > *param->max)
7734 		err = -EINVAL;
7735 
7736 	if (!err)
7737 		*param->val = val;
7738 
7739 	if (param->lock)
7740 		mutex_unlock(param->lock);
7741 
7742 	if (err)
7743 		return err;
7744 
7745 	return cnt;
7746 }
7747 
7748 /*
7749  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7750  * @filp: The active open file structure
7751  * @ubuf: The userspace provided buffer to read value into
7752  * @cnt: The maximum number of bytes to read
7753  * @ppos: The current "file" position
7754  *
7755  * This function implements the read interface for a struct trace_min_max_param.
7756  * The filp->private_data must point to a trace_min_max_param struct with valid
7757  * data.
7758  */
7759 static ssize_t
7760 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7761 {
7762 	struct trace_min_max_param *param = filp->private_data;
7763 	char buf[U64_STR_SIZE];
7764 	int len;
7765 	u64 val;
7766 
7767 	if (!param)
7768 		return -EFAULT;
7769 
7770 	val = *param->val;
7771 
7772 	if (cnt > sizeof(buf))
7773 		cnt = sizeof(buf);
7774 
7775 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7776 
7777 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7778 }
7779 
7780 const struct file_operations trace_min_max_fops = {
7781 	.open		= tracing_open_generic,
7782 	.read		= trace_min_max_read,
7783 	.write		= trace_min_max_write,
7784 };
7785 
7786 #define TRACING_LOG_ERRS_MAX	8
7787 #define TRACING_LOG_LOC_MAX	128
7788 
7789 #define CMD_PREFIX "  Command: "
7790 
7791 struct err_info {
7792 	const char	**errs;	/* ptr to loc-specific array of err strings */
7793 	u8		type;	/* index into errs -> specific err string */
7794 	u16		pos;	/* caret position */
7795 	u64		ts;
7796 };
7797 
7798 struct tracing_log_err {
7799 	struct list_head	list;
7800 	struct err_info		info;
7801 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7802 	char			*cmd;                     /* what caused err */
7803 };
7804 
7805 static DEFINE_MUTEX(tracing_err_log_lock);
7806 
7807 static struct tracing_log_err *alloc_tracing_log_err(int len)
7808 {
7809 	struct tracing_log_err *err;
7810 
7811 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7812 	if (!err)
7813 		return ERR_PTR(-ENOMEM);
7814 
7815 	err->cmd = kzalloc(len, GFP_KERNEL);
7816 	if (!err->cmd) {
7817 		kfree(err);
7818 		return ERR_PTR(-ENOMEM);
7819 	}
7820 
7821 	return err;
7822 }
7823 
7824 static void free_tracing_log_err(struct tracing_log_err *err)
7825 {
7826 	kfree(err->cmd);
7827 	kfree(err);
7828 }
7829 
7830 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7831 						   int len)
7832 {
7833 	struct tracing_log_err *err;
7834 	char *cmd;
7835 
7836 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7837 		err = alloc_tracing_log_err(len);
7838 		if (PTR_ERR(err) != -ENOMEM)
7839 			tr->n_err_log_entries++;
7840 
7841 		return err;
7842 	}
7843 	cmd = kzalloc(len, GFP_KERNEL);
7844 	if (!cmd)
7845 		return ERR_PTR(-ENOMEM);
7846 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7847 	kfree(err->cmd);
7848 	err->cmd = cmd;
7849 	list_del(&err->list);
7850 
7851 	return err;
7852 }
7853 
7854 /**
7855  * err_pos - find the position of a string within a command for error careting
7856  * @cmd: The tracing command that caused the error
7857  * @str: The string to position the caret at within @cmd
7858  *
7859  * Finds the position of the first occurrence of @str within @cmd.  The
7860  * return value can be passed to tracing_log_err() for caret placement
7861  * within @cmd.
7862  *
7863  * Returns the index within @cmd of the first occurrence of @str or 0
7864  * if @str was not found.
7865  */
7866 unsigned int err_pos(char *cmd, const char *str)
7867 {
7868 	char *found;
7869 
7870 	if (WARN_ON(!strlen(cmd)))
7871 		return 0;
7872 
7873 	found = strstr(cmd, str);
7874 	if (found)
7875 		return found - cmd;
7876 
7877 	return 0;
7878 }
7879 
7880 /**
7881  * tracing_log_err - write an error to the tracing error log
7882  * @tr: The associated trace array for the error (NULL for top level array)
7883  * @loc: A string describing where the error occurred
7884  * @cmd: The tracing command that caused the error
7885  * @errs: The array of loc-specific static error strings
7886  * @type: The index into errs[], which produces the specific static err string
7887  * @pos: The position the caret should be placed in the cmd
7888  *
7889  * Writes an error into tracing/error_log of the form:
7890  *
7891  * <loc>: error: <text>
7892  *   Command: <cmd>
7893  *              ^
7894  *
7895  * tracing/error_log is a small log file containing the last
7896  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7897  * unless there has been a tracing error, and the error log can be
7898  * cleared and have its memory freed by writing the empty string in
7899  * truncation mode to it i.e. echo > tracing/error_log.
7900  *
7901  * NOTE: the @errs array along with the @type param are used to
7902  * produce a static error string - this string is not copied and saved
7903  * when the error is logged - only a pointer to it is saved.  See
7904  * existing callers for examples of how static strings are typically
7905  * defined for use with tracing_log_err().
7906  */
7907 void tracing_log_err(struct trace_array *tr,
7908 		     const char *loc, const char *cmd,
7909 		     const char **errs, u8 type, u16 pos)
7910 {
7911 	struct tracing_log_err *err;
7912 	int len = 0;
7913 
7914 	if (!tr)
7915 		tr = &global_trace;
7916 
7917 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7918 
7919 	guard(mutex)(&tracing_err_log_lock);
7920 
7921 	err = get_tracing_log_err(tr, len);
7922 	if (PTR_ERR(err) == -ENOMEM)
7923 		return;
7924 
7925 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7926 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7927 
7928 	err->info.errs = errs;
7929 	err->info.type = type;
7930 	err->info.pos = pos;
7931 	err->info.ts = local_clock();
7932 
7933 	list_add_tail(&err->list, &tr->err_log);
7934 }
7935 
7936 static void clear_tracing_err_log(struct trace_array *tr)
7937 {
7938 	struct tracing_log_err *err, *next;
7939 
7940 	mutex_lock(&tracing_err_log_lock);
7941 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7942 		list_del(&err->list);
7943 		free_tracing_log_err(err);
7944 	}
7945 
7946 	tr->n_err_log_entries = 0;
7947 	mutex_unlock(&tracing_err_log_lock);
7948 }
7949 
7950 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7951 {
7952 	struct trace_array *tr = m->private;
7953 
7954 	mutex_lock(&tracing_err_log_lock);
7955 
7956 	return seq_list_start(&tr->err_log, *pos);
7957 }
7958 
7959 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7960 {
7961 	struct trace_array *tr = m->private;
7962 
7963 	return seq_list_next(v, &tr->err_log, pos);
7964 }
7965 
7966 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7967 {
7968 	mutex_unlock(&tracing_err_log_lock);
7969 }
7970 
7971 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7972 {
7973 	u16 i;
7974 
7975 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7976 		seq_putc(m, ' ');
7977 	for (i = 0; i < pos; i++)
7978 		seq_putc(m, ' ');
7979 	seq_puts(m, "^\n");
7980 }
7981 
7982 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7983 {
7984 	struct tracing_log_err *err = v;
7985 
7986 	if (err) {
7987 		const char *err_text = err->info.errs[err->info.type];
7988 		u64 sec = err->info.ts;
7989 		u32 nsec;
7990 
7991 		nsec = do_div(sec, NSEC_PER_SEC);
7992 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7993 			   err->loc, err_text);
7994 		seq_printf(m, "%s", err->cmd);
7995 		tracing_err_log_show_pos(m, err->info.pos);
7996 	}
7997 
7998 	return 0;
7999 }
8000 
8001 static const struct seq_operations tracing_err_log_seq_ops = {
8002 	.start  = tracing_err_log_seq_start,
8003 	.next   = tracing_err_log_seq_next,
8004 	.stop   = tracing_err_log_seq_stop,
8005 	.show   = tracing_err_log_seq_show
8006 };
8007 
8008 static int tracing_err_log_open(struct inode *inode, struct file *file)
8009 {
8010 	struct trace_array *tr = inode->i_private;
8011 	int ret = 0;
8012 
8013 	ret = tracing_check_open_get_tr(tr);
8014 	if (ret)
8015 		return ret;
8016 
8017 	/* If this file was opened for write, then erase contents */
8018 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8019 		clear_tracing_err_log(tr);
8020 
8021 	if (file->f_mode & FMODE_READ) {
8022 		ret = seq_open(file, &tracing_err_log_seq_ops);
8023 		if (!ret) {
8024 			struct seq_file *m = file->private_data;
8025 			m->private = tr;
8026 		} else {
8027 			trace_array_put(tr);
8028 		}
8029 	}
8030 	return ret;
8031 }
8032 
8033 static ssize_t tracing_err_log_write(struct file *file,
8034 				     const char __user *buffer,
8035 				     size_t count, loff_t *ppos)
8036 {
8037 	return count;
8038 }
8039 
8040 static int tracing_err_log_release(struct inode *inode, struct file *file)
8041 {
8042 	struct trace_array *tr = inode->i_private;
8043 
8044 	trace_array_put(tr);
8045 
8046 	if (file->f_mode & FMODE_READ)
8047 		seq_release(inode, file);
8048 
8049 	return 0;
8050 }
8051 
8052 static const struct file_operations tracing_err_log_fops = {
8053 	.open           = tracing_err_log_open,
8054 	.write		= tracing_err_log_write,
8055 	.read           = seq_read,
8056 	.llseek         = tracing_lseek,
8057 	.release        = tracing_err_log_release,
8058 };
8059 
8060 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8061 {
8062 	struct trace_array *tr = inode->i_private;
8063 	struct ftrace_buffer_info *info;
8064 	int ret;
8065 
8066 	ret = tracing_check_open_get_tr(tr);
8067 	if (ret)
8068 		return ret;
8069 
8070 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8071 	if (!info) {
8072 		trace_array_put(tr);
8073 		return -ENOMEM;
8074 	}
8075 
8076 	mutex_lock(&trace_types_lock);
8077 
8078 	info->iter.tr		= tr;
8079 	info->iter.cpu_file	= tracing_get_cpu(inode);
8080 	info->iter.trace	= tr->current_trace;
8081 	info->iter.array_buffer = &tr->array_buffer;
8082 	info->spare		= NULL;
8083 	/* Force reading ring buffer for first read */
8084 	info->read		= (unsigned int)-1;
8085 
8086 	filp->private_data = info;
8087 
8088 	tr->trace_ref++;
8089 
8090 	mutex_unlock(&trace_types_lock);
8091 
8092 	ret = nonseekable_open(inode, filp);
8093 	if (ret < 0)
8094 		trace_array_put(tr);
8095 
8096 	return ret;
8097 }
8098 
8099 static __poll_t
8100 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8101 {
8102 	struct ftrace_buffer_info *info = filp->private_data;
8103 	struct trace_iterator *iter = &info->iter;
8104 
8105 	return trace_poll(iter, filp, poll_table);
8106 }
8107 
8108 static ssize_t
8109 tracing_buffers_read(struct file *filp, char __user *ubuf,
8110 		     size_t count, loff_t *ppos)
8111 {
8112 	struct ftrace_buffer_info *info = filp->private_data;
8113 	struct trace_iterator *iter = &info->iter;
8114 	void *trace_data;
8115 	int page_size;
8116 	ssize_t ret = 0;
8117 	ssize_t size;
8118 
8119 	if (!count)
8120 		return 0;
8121 
8122 #ifdef CONFIG_TRACER_MAX_TRACE
8123 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8124 		return -EBUSY;
8125 #endif
8126 
8127 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8128 
8129 	/* Make sure the spare matches the current sub buffer size */
8130 	if (info->spare) {
8131 		if (page_size != info->spare_size) {
8132 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8133 						   info->spare_cpu, info->spare);
8134 			info->spare = NULL;
8135 		}
8136 	}
8137 
8138 	if (!info->spare) {
8139 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8140 							  iter->cpu_file);
8141 		if (IS_ERR(info->spare)) {
8142 			ret = PTR_ERR(info->spare);
8143 			info->spare = NULL;
8144 		} else {
8145 			info->spare_cpu = iter->cpu_file;
8146 			info->spare_size = page_size;
8147 		}
8148 	}
8149 	if (!info->spare)
8150 		return ret;
8151 
8152 	/* Do we have previous read data to read? */
8153 	if (info->read < page_size)
8154 		goto read;
8155 
8156  again:
8157 	trace_access_lock(iter->cpu_file);
8158 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8159 				    info->spare,
8160 				    count,
8161 				    iter->cpu_file, 0);
8162 	trace_access_unlock(iter->cpu_file);
8163 
8164 	if (ret < 0) {
8165 		if (trace_empty(iter) && !iter->closed) {
8166 			if ((filp->f_flags & O_NONBLOCK))
8167 				return -EAGAIN;
8168 
8169 			ret = wait_on_pipe(iter, 0);
8170 			if (ret)
8171 				return ret;
8172 
8173 			goto again;
8174 		}
8175 		return 0;
8176 	}
8177 
8178 	info->read = 0;
8179  read:
8180 	size = page_size - info->read;
8181 	if (size > count)
8182 		size = count;
8183 	trace_data = ring_buffer_read_page_data(info->spare);
8184 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8185 	if (ret == size)
8186 		return -EFAULT;
8187 
8188 	size -= ret;
8189 
8190 	*ppos += size;
8191 	info->read += size;
8192 
8193 	return size;
8194 }
8195 
8196 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8197 {
8198 	struct ftrace_buffer_info *info = file->private_data;
8199 	struct trace_iterator *iter = &info->iter;
8200 
8201 	iter->closed = true;
8202 	/* Make sure the waiters see the new wait_index */
8203 	(void)atomic_fetch_inc_release(&iter->wait_index);
8204 
8205 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8206 
8207 	return 0;
8208 }
8209 
8210 static int tracing_buffers_release(struct inode *inode, struct file *file)
8211 {
8212 	struct ftrace_buffer_info *info = file->private_data;
8213 	struct trace_iterator *iter = &info->iter;
8214 
8215 	mutex_lock(&trace_types_lock);
8216 
8217 	iter->tr->trace_ref--;
8218 
8219 	__trace_array_put(iter->tr);
8220 
8221 	if (info->spare)
8222 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8223 					   info->spare_cpu, info->spare);
8224 	kvfree(info);
8225 
8226 	mutex_unlock(&trace_types_lock);
8227 
8228 	return 0;
8229 }
8230 
8231 struct buffer_ref {
8232 	struct trace_buffer	*buffer;
8233 	void			*page;
8234 	int			cpu;
8235 	refcount_t		refcount;
8236 };
8237 
8238 static void buffer_ref_release(struct buffer_ref *ref)
8239 {
8240 	if (!refcount_dec_and_test(&ref->refcount))
8241 		return;
8242 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8243 	kfree(ref);
8244 }
8245 
8246 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8247 				    struct pipe_buffer *buf)
8248 {
8249 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8250 
8251 	buffer_ref_release(ref);
8252 	buf->private = 0;
8253 }
8254 
8255 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8256 				struct pipe_buffer *buf)
8257 {
8258 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8259 
8260 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8261 		return false;
8262 
8263 	refcount_inc(&ref->refcount);
8264 	return true;
8265 }
8266 
8267 /* Pipe buffer operations for a buffer. */
8268 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8269 	.release		= buffer_pipe_buf_release,
8270 	.get			= buffer_pipe_buf_get,
8271 };
8272 
8273 /*
8274  * Callback from splice_to_pipe(), if we need to release some pages
8275  * at the end of the spd in case we error'ed out in filling the pipe.
8276  */
8277 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8278 {
8279 	struct buffer_ref *ref =
8280 		(struct buffer_ref *)spd->partial[i].private;
8281 
8282 	buffer_ref_release(ref);
8283 	spd->partial[i].private = 0;
8284 }
8285 
8286 static ssize_t
8287 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8288 			    struct pipe_inode_info *pipe, size_t len,
8289 			    unsigned int flags)
8290 {
8291 	struct ftrace_buffer_info *info = file->private_data;
8292 	struct trace_iterator *iter = &info->iter;
8293 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8294 	struct page *pages_def[PIPE_DEF_BUFFERS];
8295 	struct splice_pipe_desc spd = {
8296 		.pages		= pages_def,
8297 		.partial	= partial_def,
8298 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8299 		.ops		= &buffer_pipe_buf_ops,
8300 		.spd_release	= buffer_spd_release,
8301 	};
8302 	struct buffer_ref *ref;
8303 	bool woken = false;
8304 	int page_size;
8305 	int entries, i;
8306 	ssize_t ret = 0;
8307 
8308 #ifdef CONFIG_TRACER_MAX_TRACE
8309 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8310 		return -EBUSY;
8311 #endif
8312 
8313 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8314 	if (*ppos & (page_size - 1))
8315 		return -EINVAL;
8316 
8317 	if (len & (page_size - 1)) {
8318 		if (len < page_size)
8319 			return -EINVAL;
8320 		len &= (~(page_size - 1));
8321 	}
8322 
8323 	if (splice_grow_spd(pipe, &spd))
8324 		return -ENOMEM;
8325 
8326  again:
8327 	trace_access_lock(iter->cpu_file);
8328 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8329 
8330 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8331 		struct page *page;
8332 		int r;
8333 
8334 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8335 		if (!ref) {
8336 			ret = -ENOMEM;
8337 			break;
8338 		}
8339 
8340 		refcount_set(&ref->refcount, 1);
8341 		ref->buffer = iter->array_buffer->buffer;
8342 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8343 		if (IS_ERR(ref->page)) {
8344 			ret = PTR_ERR(ref->page);
8345 			ref->page = NULL;
8346 			kfree(ref);
8347 			break;
8348 		}
8349 		ref->cpu = iter->cpu_file;
8350 
8351 		r = ring_buffer_read_page(ref->buffer, ref->page,
8352 					  len, iter->cpu_file, 1);
8353 		if (r < 0) {
8354 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8355 						   ref->page);
8356 			kfree(ref);
8357 			break;
8358 		}
8359 
8360 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8361 
8362 		spd.pages[i] = page;
8363 		spd.partial[i].len = page_size;
8364 		spd.partial[i].offset = 0;
8365 		spd.partial[i].private = (unsigned long)ref;
8366 		spd.nr_pages++;
8367 		*ppos += page_size;
8368 
8369 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8370 	}
8371 
8372 	trace_access_unlock(iter->cpu_file);
8373 	spd.nr_pages = i;
8374 
8375 	/* did we read anything? */
8376 	if (!spd.nr_pages) {
8377 
8378 		if (ret)
8379 			goto out;
8380 
8381 		if (woken)
8382 			goto out;
8383 
8384 		ret = -EAGAIN;
8385 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8386 			goto out;
8387 
8388 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8389 		if (ret)
8390 			goto out;
8391 
8392 		/* No need to wait after waking up when tracing is off */
8393 		if (!tracer_tracing_is_on(iter->tr))
8394 			goto out;
8395 
8396 		/* Iterate one more time to collect any new data then exit */
8397 		woken = true;
8398 
8399 		goto again;
8400 	}
8401 
8402 	ret = splice_to_pipe(pipe, &spd);
8403 out:
8404 	splice_shrink_spd(&spd);
8405 
8406 	return ret;
8407 }
8408 
8409 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8410 {
8411 	struct ftrace_buffer_info *info = file->private_data;
8412 	struct trace_iterator *iter = &info->iter;
8413 	int err;
8414 
8415 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8416 		if (!(file->f_flags & O_NONBLOCK)) {
8417 			err = ring_buffer_wait(iter->array_buffer->buffer,
8418 					       iter->cpu_file,
8419 					       iter->tr->buffer_percent,
8420 					       NULL, NULL);
8421 			if (err)
8422 				return err;
8423 		}
8424 
8425 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8426 						  iter->cpu_file);
8427 	} else if (cmd) {
8428 		return -ENOTTY;
8429 	}
8430 
8431 	/*
8432 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8433 	 * waiters
8434 	 */
8435 	mutex_lock(&trace_types_lock);
8436 
8437 	/* Make sure the waiters see the new wait_index */
8438 	(void)atomic_fetch_inc_release(&iter->wait_index);
8439 
8440 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8441 
8442 	mutex_unlock(&trace_types_lock);
8443 	return 0;
8444 }
8445 
8446 #ifdef CONFIG_TRACER_MAX_TRACE
8447 static int get_snapshot_map(struct trace_array *tr)
8448 {
8449 	int err = 0;
8450 
8451 	/*
8452 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8453 	 * take trace_types_lock. Instead use the specific
8454 	 * snapshot_trigger_lock.
8455 	 */
8456 	spin_lock(&tr->snapshot_trigger_lock);
8457 
8458 	if (tr->snapshot || tr->mapped == UINT_MAX)
8459 		err = -EBUSY;
8460 	else
8461 		tr->mapped++;
8462 
8463 	spin_unlock(&tr->snapshot_trigger_lock);
8464 
8465 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8466 	if (tr->mapped == 1)
8467 		synchronize_rcu();
8468 
8469 	return err;
8470 
8471 }
8472 static void put_snapshot_map(struct trace_array *tr)
8473 {
8474 	spin_lock(&tr->snapshot_trigger_lock);
8475 	if (!WARN_ON(!tr->mapped))
8476 		tr->mapped--;
8477 	spin_unlock(&tr->snapshot_trigger_lock);
8478 }
8479 #else
8480 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8481 static inline void put_snapshot_map(struct trace_array *tr) { }
8482 #endif
8483 
8484 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8485 {
8486 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8487 	struct trace_iterator *iter = &info->iter;
8488 
8489 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8490 	put_snapshot_map(iter->tr);
8491 }
8492 
8493 static const struct vm_operations_struct tracing_buffers_vmops = {
8494 	.close		= tracing_buffers_mmap_close,
8495 };
8496 
8497 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8498 {
8499 	struct ftrace_buffer_info *info = filp->private_data;
8500 	struct trace_iterator *iter = &info->iter;
8501 	int ret = 0;
8502 
8503 	/* Currently the boot mapped buffer is not supported for mmap */
8504 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8505 		return -ENODEV;
8506 
8507 	ret = get_snapshot_map(iter->tr);
8508 	if (ret)
8509 		return ret;
8510 
8511 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8512 	if (ret)
8513 		put_snapshot_map(iter->tr);
8514 
8515 	vma->vm_ops = &tracing_buffers_vmops;
8516 
8517 	return ret;
8518 }
8519 
8520 static const struct file_operations tracing_buffers_fops = {
8521 	.open		= tracing_buffers_open,
8522 	.read		= tracing_buffers_read,
8523 	.poll		= tracing_buffers_poll,
8524 	.release	= tracing_buffers_release,
8525 	.flush		= tracing_buffers_flush,
8526 	.splice_read	= tracing_buffers_splice_read,
8527 	.unlocked_ioctl = tracing_buffers_ioctl,
8528 	.mmap		= tracing_buffers_mmap,
8529 };
8530 
8531 static ssize_t
8532 tracing_stats_read(struct file *filp, char __user *ubuf,
8533 		   size_t count, loff_t *ppos)
8534 {
8535 	struct inode *inode = file_inode(filp);
8536 	struct trace_array *tr = inode->i_private;
8537 	struct array_buffer *trace_buf = &tr->array_buffer;
8538 	int cpu = tracing_get_cpu(inode);
8539 	struct trace_seq *s;
8540 	unsigned long cnt;
8541 	unsigned long long t;
8542 	unsigned long usec_rem;
8543 
8544 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8545 	if (!s)
8546 		return -ENOMEM;
8547 
8548 	trace_seq_init(s);
8549 
8550 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8551 	trace_seq_printf(s, "entries: %ld\n", cnt);
8552 
8553 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8554 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8555 
8556 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8557 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8558 
8559 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8560 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8561 
8562 	if (trace_clocks[tr->clock_id].in_ns) {
8563 		/* local or global for trace_clock */
8564 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8565 		usec_rem = do_div(t, USEC_PER_SEC);
8566 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8567 								t, usec_rem);
8568 
8569 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8570 		usec_rem = do_div(t, USEC_PER_SEC);
8571 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8572 	} else {
8573 		/* counter or tsc mode for trace_clock */
8574 		trace_seq_printf(s, "oldest event ts: %llu\n",
8575 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8576 
8577 		trace_seq_printf(s, "now ts: %llu\n",
8578 				ring_buffer_time_stamp(trace_buf->buffer));
8579 	}
8580 
8581 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8582 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8583 
8584 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8585 	trace_seq_printf(s, "read events: %ld\n", cnt);
8586 
8587 	count = simple_read_from_buffer(ubuf, count, ppos,
8588 					s->buffer, trace_seq_used(s));
8589 
8590 	kfree(s);
8591 
8592 	return count;
8593 }
8594 
8595 static const struct file_operations tracing_stats_fops = {
8596 	.open		= tracing_open_generic_tr,
8597 	.read		= tracing_stats_read,
8598 	.llseek		= generic_file_llseek,
8599 	.release	= tracing_release_generic_tr,
8600 };
8601 
8602 #ifdef CONFIG_DYNAMIC_FTRACE
8603 
8604 static ssize_t
8605 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8606 		  size_t cnt, loff_t *ppos)
8607 {
8608 	ssize_t ret;
8609 	char *buf;
8610 	int r;
8611 
8612 	/* 512 should be plenty to hold the amount needed */
8613 #define DYN_INFO_BUF_SIZE	512
8614 
8615 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8616 	if (!buf)
8617 		return -ENOMEM;
8618 
8619 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8620 		      "%ld pages:%ld groups: %ld\n"
8621 		      "ftrace boot update time = %llu (ns)\n"
8622 		      "ftrace module total update time = %llu (ns)\n",
8623 		      ftrace_update_tot_cnt,
8624 		      ftrace_number_of_pages,
8625 		      ftrace_number_of_groups,
8626 		      ftrace_update_time,
8627 		      ftrace_total_mod_time);
8628 
8629 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8630 	kfree(buf);
8631 	return ret;
8632 }
8633 
8634 static const struct file_operations tracing_dyn_info_fops = {
8635 	.open		= tracing_open_generic,
8636 	.read		= tracing_read_dyn_info,
8637 	.llseek		= generic_file_llseek,
8638 };
8639 #endif /* CONFIG_DYNAMIC_FTRACE */
8640 
8641 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8642 static void
8643 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8644 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8645 		void *data)
8646 {
8647 	tracing_snapshot_instance(tr);
8648 }
8649 
8650 static void
8651 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8652 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8653 		      void *data)
8654 {
8655 	struct ftrace_func_mapper *mapper = data;
8656 	long *count = NULL;
8657 
8658 	if (mapper)
8659 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8660 
8661 	if (count) {
8662 
8663 		if (*count <= 0)
8664 			return;
8665 
8666 		(*count)--;
8667 	}
8668 
8669 	tracing_snapshot_instance(tr);
8670 }
8671 
8672 static int
8673 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8674 		      struct ftrace_probe_ops *ops, void *data)
8675 {
8676 	struct ftrace_func_mapper *mapper = data;
8677 	long *count = NULL;
8678 
8679 	seq_printf(m, "%ps:", (void *)ip);
8680 
8681 	seq_puts(m, "snapshot");
8682 
8683 	if (mapper)
8684 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8685 
8686 	if (count)
8687 		seq_printf(m, ":count=%ld\n", *count);
8688 	else
8689 		seq_puts(m, ":unlimited\n");
8690 
8691 	return 0;
8692 }
8693 
8694 static int
8695 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8696 		     unsigned long ip, void *init_data, void **data)
8697 {
8698 	struct ftrace_func_mapper *mapper = *data;
8699 
8700 	if (!mapper) {
8701 		mapper = allocate_ftrace_func_mapper();
8702 		if (!mapper)
8703 			return -ENOMEM;
8704 		*data = mapper;
8705 	}
8706 
8707 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8708 }
8709 
8710 static void
8711 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8712 		     unsigned long ip, void *data)
8713 {
8714 	struct ftrace_func_mapper *mapper = data;
8715 
8716 	if (!ip) {
8717 		if (!mapper)
8718 			return;
8719 		free_ftrace_func_mapper(mapper, NULL);
8720 		return;
8721 	}
8722 
8723 	ftrace_func_mapper_remove_ip(mapper, ip);
8724 }
8725 
8726 static struct ftrace_probe_ops snapshot_probe_ops = {
8727 	.func			= ftrace_snapshot,
8728 	.print			= ftrace_snapshot_print,
8729 };
8730 
8731 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8732 	.func			= ftrace_count_snapshot,
8733 	.print			= ftrace_snapshot_print,
8734 	.init			= ftrace_snapshot_init,
8735 	.free			= ftrace_snapshot_free,
8736 };
8737 
8738 static int
8739 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8740 			       char *glob, char *cmd, char *param, int enable)
8741 {
8742 	struct ftrace_probe_ops *ops;
8743 	void *count = (void *)-1;
8744 	char *number;
8745 	int ret;
8746 
8747 	if (!tr)
8748 		return -ENODEV;
8749 
8750 	/* hash funcs only work with set_ftrace_filter */
8751 	if (!enable)
8752 		return -EINVAL;
8753 
8754 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8755 
8756 	if (glob[0] == '!') {
8757 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8758 		if (!ret)
8759 			tracing_disarm_snapshot(tr);
8760 
8761 		return ret;
8762 	}
8763 
8764 	if (!param)
8765 		goto out_reg;
8766 
8767 	number = strsep(&param, ":");
8768 
8769 	if (!strlen(number))
8770 		goto out_reg;
8771 
8772 	/*
8773 	 * We use the callback data field (which is a pointer)
8774 	 * as our counter.
8775 	 */
8776 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8777 	if (ret)
8778 		return ret;
8779 
8780  out_reg:
8781 	ret = tracing_arm_snapshot(tr);
8782 	if (ret < 0)
8783 		goto out;
8784 
8785 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8786 	if (ret < 0)
8787 		tracing_disarm_snapshot(tr);
8788  out:
8789 	return ret < 0 ? ret : 0;
8790 }
8791 
8792 static struct ftrace_func_command ftrace_snapshot_cmd = {
8793 	.name			= "snapshot",
8794 	.func			= ftrace_trace_snapshot_callback,
8795 };
8796 
8797 static __init int register_snapshot_cmd(void)
8798 {
8799 	return register_ftrace_command(&ftrace_snapshot_cmd);
8800 }
8801 #else
8802 static inline __init int register_snapshot_cmd(void) { return 0; }
8803 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8804 
8805 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8806 {
8807 	if (WARN_ON(!tr->dir))
8808 		return ERR_PTR(-ENODEV);
8809 
8810 	/* Top directory uses NULL as the parent */
8811 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8812 		return NULL;
8813 
8814 	/* All sub buffers have a descriptor */
8815 	return tr->dir;
8816 }
8817 
8818 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8819 {
8820 	struct dentry *d_tracer;
8821 
8822 	if (tr->percpu_dir)
8823 		return tr->percpu_dir;
8824 
8825 	d_tracer = tracing_get_dentry(tr);
8826 	if (IS_ERR(d_tracer))
8827 		return NULL;
8828 
8829 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8830 
8831 	MEM_FAIL(!tr->percpu_dir,
8832 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8833 
8834 	return tr->percpu_dir;
8835 }
8836 
8837 static struct dentry *
8838 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8839 		      void *data, long cpu, const struct file_operations *fops)
8840 {
8841 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8842 
8843 	if (ret) /* See tracing_get_cpu() */
8844 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8845 	return ret;
8846 }
8847 
8848 static void
8849 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8850 {
8851 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8852 	struct dentry *d_cpu;
8853 	char cpu_dir[30]; /* 30 characters should be more than enough */
8854 
8855 	if (!d_percpu)
8856 		return;
8857 
8858 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8859 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8860 	if (!d_cpu) {
8861 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8862 		return;
8863 	}
8864 
8865 	/* per cpu trace_pipe */
8866 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8867 				tr, cpu, &tracing_pipe_fops);
8868 
8869 	/* per cpu trace */
8870 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8871 				tr, cpu, &tracing_fops);
8872 
8873 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8874 				tr, cpu, &tracing_buffers_fops);
8875 
8876 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8877 				tr, cpu, &tracing_stats_fops);
8878 
8879 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8880 				tr, cpu, &tracing_entries_fops);
8881 
8882 	if (tr->range_addr_start)
8883 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8884 				      tr, cpu, &tracing_buffer_meta_fops);
8885 #ifdef CONFIG_TRACER_SNAPSHOT
8886 	if (!tr->range_addr_start) {
8887 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8888 				      tr, cpu, &snapshot_fops);
8889 
8890 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8891 				      tr, cpu, &snapshot_raw_fops);
8892 	}
8893 #endif
8894 }
8895 
8896 #ifdef CONFIG_FTRACE_SELFTEST
8897 /* Let selftest have access to static functions in this file */
8898 #include "trace_selftest.c"
8899 #endif
8900 
8901 static ssize_t
8902 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8903 			loff_t *ppos)
8904 {
8905 	struct trace_option_dentry *topt = filp->private_data;
8906 	char *buf;
8907 
8908 	if (topt->flags->val & topt->opt->bit)
8909 		buf = "1\n";
8910 	else
8911 		buf = "0\n";
8912 
8913 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8914 }
8915 
8916 static ssize_t
8917 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8918 			 loff_t *ppos)
8919 {
8920 	struct trace_option_dentry *topt = filp->private_data;
8921 	unsigned long val;
8922 	int ret;
8923 
8924 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8925 	if (ret)
8926 		return ret;
8927 
8928 	if (val != 0 && val != 1)
8929 		return -EINVAL;
8930 
8931 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8932 		mutex_lock(&trace_types_lock);
8933 		ret = __set_tracer_option(topt->tr, topt->flags,
8934 					  topt->opt, !val);
8935 		mutex_unlock(&trace_types_lock);
8936 		if (ret)
8937 			return ret;
8938 	}
8939 
8940 	*ppos += cnt;
8941 
8942 	return cnt;
8943 }
8944 
8945 static int tracing_open_options(struct inode *inode, struct file *filp)
8946 {
8947 	struct trace_option_dentry *topt = inode->i_private;
8948 	int ret;
8949 
8950 	ret = tracing_check_open_get_tr(topt->tr);
8951 	if (ret)
8952 		return ret;
8953 
8954 	filp->private_data = inode->i_private;
8955 	return 0;
8956 }
8957 
8958 static int tracing_release_options(struct inode *inode, struct file *file)
8959 {
8960 	struct trace_option_dentry *topt = file->private_data;
8961 
8962 	trace_array_put(topt->tr);
8963 	return 0;
8964 }
8965 
8966 static const struct file_operations trace_options_fops = {
8967 	.open = tracing_open_options,
8968 	.read = trace_options_read,
8969 	.write = trace_options_write,
8970 	.llseek	= generic_file_llseek,
8971 	.release = tracing_release_options,
8972 };
8973 
8974 /*
8975  * In order to pass in both the trace_array descriptor as well as the index
8976  * to the flag that the trace option file represents, the trace_array
8977  * has a character array of trace_flags_index[], which holds the index
8978  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8979  * The address of this character array is passed to the flag option file
8980  * read/write callbacks.
8981  *
8982  * In order to extract both the index and the trace_array descriptor,
8983  * get_tr_index() uses the following algorithm.
8984  *
8985  *   idx = *ptr;
8986  *
8987  * As the pointer itself contains the address of the index (remember
8988  * index[1] == 1).
8989  *
8990  * Then to get the trace_array descriptor, by subtracting that index
8991  * from the ptr, we get to the start of the index itself.
8992  *
8993  *   ptr - idx == &index[0]
8994  *
8995  * Then a simple container_of() from that pointer gets us to the
8996  * trace_array descriptor.
8997  */
8998 static void get_tr_index(void *data, struct trace_array **ptr,
8999 			 unsigned int *pindex)
9000 {
9001 	*pindex = *(unsigned char *)data;
9002 
9003 	*ptr = container_of(data - *pindex, struct trace_array,
9004 			    trace_flags_index);
9005 }
9006 
9007 static ssize_t
9008 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9009 			loff_t *ppos)
9010 {
9011 	void *tr_index = filp->private_data;
9012 	struct trace_array *tr;
9013 	unsigned int index;
9014 	char *buf;
9015 
9016 	get_tr_index(tr_index, &tr, &index);
9017 
9018 	if (tr->trace_flags & (1 << index))
9019 		buf = "1\n";
9020 	else
9021 		buf = "0\n";
9022 
9023 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9024 }
9025 
9026 static ssize_t
9027 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9028 			 loff_t *ppos)
9029 {
9030 	void *tr_index = filp->private_data;
9031 	struct trace_array *tr;
9032 	unsigned int index;
9033 	unsigned long val;
9034 	int ret;
9035 
9036 	get_tr_index(tr_index, &tr, &index);
9037 
9038 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9039 	if (ret)
9040 		return ret;
9041 
9042 	if (val != 0 && val != 1)
9043 		return -EINVAL;
9044 
9045 	mutex_lock(&event_mutex);
9046 	mutex_lock(&trace_types_lock);
9047 	ret = set_tracer_flag(tr, 1 << index, val);
9048 	mutex_unlock(&trace_types_lock);
9049 	mutex_unlock(&event_mutex);
9050 
9051 	if (ret < 0)
9052 		return ret;
9053 
9054 	*ppos += cnt;
9055 
9056 	return cnt;
9057 }
9058 
9059 static const struct file_operations trace_options_core_fops = {
9060 	.open = tracing_open_generic,
9061 	.read = trace_options_core_read,
9062 	.write = trace_options_core_write,
9063 	.llseek = generic_file_llseek,
9064 };
9065 
9066 struct dentry *trace_create_file(const char *name,
9067 				 umode_t mode,
9068 				 struct dentry *parent,
9069 				 void *data,
9070 				 const struct file_operations *fops)
9071 {
9072 	struct dentry *ret;
9073 
9074 	ret = tracefs_create_file(name, mode, parent, data, fops);
9075 	if (!ret)
9076 		pr_warn("Could not create tracefs '%s' entry\n", name);
9077 
9078 	return ret;
9079 }
9080 
9081 
9082 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9083 {
9084 	struct dentry *d_tracer;
9085 
9086 	if (tr->options)
9087 		return tr->options;
9088 
9089 	d_tracer = tracing_get_dentry(tr);
9090 	if (IS_ERR(d_tracer))
9091 		return NULL;
9092 
9093 	tr->options = tracefs_create_dir("options", d_tracer);
9094 	if (!tr->options) {
9095 		pr_warn("Could not create tracefs directory 'options'\n");
9096 		return NULL;
9097 	}
9098 
9099 	return tr->options;
9100 }
9101 
9102 static void
9103 create_trace_option_file(struct trace_array *tr,
9104 			 struct trace_option_dentry *topt,
9105 			 struct tracer_flags *flags,
9106 			 struct tracer_opt *opt)
9107 {
9108 	struct dentry *t_options;
9109 
9110 	t_options = trace_options_init_dentry(tr);
9111 	if (!t_options)
9112 		return;
9113 
9114 	topt->flags = flags;
9115 	topt->opt = opt;
9116 	topt->tr = tr;
9117 
9118 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9119 					t_options, topt, &trace_options_fops);
9120 
9121 }
9122 
9123 static void
9124 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9125 {
9126 	struct trace_option_dentry *topts;
9127 	struct trace_options *tr_topts;
9128 	struct tracer_flags *flags;
9129 	struct tracer_opt *opts;
9130 	int cnt;
9131 	int i;
9132 
9133 	if (!tracer)
9134 		return;
9135 
9136 	flags = tracer->flags;
9137 
9138 	if (!flags || !flags->opts)
9139 		return;
9140 
9141 	/*
9142 	 * If this is an instance, only create flags for tracers
9143 	 * the instance may have.
9144 	 */
9145 	if (!trace_ok_for_array(tracer, tr))
9146 		return;
9147 
9148 	for (i = 0; i < tr->nr_topts; i++) {
9149 		/* Make sure there's no duplicate flags. */
9150 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9151 			return;
9152 	}
9153 
9154 	opts = flags->opts;
9155 
9156 	for (cnt = 0; opts[cnt].name; cnt++)
9157 		;
9158 
9159 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9160 	if (!topts)
9161 		return;
9162 
9163 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9164 			    GFP_KERNEL);
9165 	if (!tr_topts) {
9166 		kfree(topts);
9167 		return;
9168 	}
9169 
9170 	tr->topts = tr_topts;
9171 	tr->topts[tr->nr_topts].tracer = tracer;
9172 	tr->topts[tr->nr_topts].topts = topts;
9173 	tr->nr_topts++;
9174 
9175 	for (cnt = 0; opts[cnt].name; cnt++) {
9176 		create_trace_option_file(tr, &topts[cnt], flags,
9177 					 &opts[cnt]);
9178 		MEM_FAIL(topts[cnt].entry == NULL,
9179 			  "Failed to create trace option: %s",
9180 			  opts[cnt].name);
9181 	}
9182 }
9183 
9184 static struct dentry *
9185 create_trace_option_core_file(struct trace_array *tr,
9186 			      const char *option, long index)
9187 {
9188 	struct dentry *t_options;
9189 
9190 	t_options = trace_options_init_dentry(tr);
9191 	if (!t_options)
9192 		return NULL;
9193 
9194 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9195 				 (void *)&tr->trace_flags_index[index],
9196 				 &trace_options_core_fops);
9197 }
9198 
9199 static void create_trace_options_dir(struct trace_array *tr)
9200 {
9201 	struct dentry *t_options;
9202 	bool top_level = tr == &global_trace;
9203 	int i;
9204 
9205 	t_options = trace_options_init_dentry(tr);
9206 	if (!t_options)
9207 		return;
9208 
9209 	for (i = 0; trace_options[i]; i++) {
9210 		if (top_level ||
9211 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9212 			create_trace_option_core_file(tr, trace_options[i], i);
9213 	}
9214 }
9215 
9216 static ssize_t
9217 rb_simple_read(struct file *filp, char __user *ubuf,
9218 	       size_t cnt, loff_t *ppos)
9219 {
9220 	struct trace_array *tr = filp->private_data;
9221 	char buf[64];
9222 	int r;
9223 
9224 	r = tracer_tracing_is_on(tr);
9225 	r = sprintf(buf, "%d\n", r);
9226 
9227 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9228 }
9229 
9230 static ssize_t
9231 rb_simple_write(struct file *filp, const char __user *ubuf,
9232 		size_t cnt, loff_t *ppos)
9233 {
9234 	struct trace_array *tr = filp->private_data;
9235 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9236 	unsigned long val;
9237 	int ret;
9238 
9239 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9240 	if (ret)
9241 		return ret;
9242 
9243 	if (buffer) {
9244 		mutex_lock(&trace_types_lock);
9245 		if (!!val == tracer_tracing_is_on(tr)) {
9246 			val = 0; /* do nothing */
9247 		} else if (val) {
9248 			tracer_tracing_on(tr);
9249 			if (tr->current_trace->start)
9250 				tr->current_trace->start(tr);
9251 		} else {
9252 			tracer_tracing_off(tr);
9253 			if (tr->current_trace->stop)
9254 				tr->current_trace->stop(tr);
9255 			/* Wake up any waiters */
9256 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9257 		}
9258 		mutex_unlock(&trace_types_lock);
9259 	}
9260 
9261 	(*ppos)++;
9262 
9263 	return cnt;
9264 }
9265 
9266 static const struct file_operations rb_simple_fops = {
9267 	.open		= tracing_open_generic_tr,
9268 	.read		= rb_simple_read,
9269 	.write		= rb_simple_write,
9270 	.release	= tracing_release_generic_tr,
9271 	.llseek		= default_llseek,
9272 };
9273 
9274 static ssize_t
9275 buffer_percent_read(struct file *filp, char __user *ubuf,
9276 		    size_t cnt, loff_t *ppos)
9277 {
9278 	struct trace_array *tr = filp->private_data;
9279 	char buf[64];
9280 	int r;
9281 
9282 	r = tr->buffer_percent;
9283 	r = sprintf(buf, "%d\n", r);
9284 
9285 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9286 }
9287 
9288 static ssize_t
9289 buffer_percent_write(struct file *filp, const char __user *ubuf,
9290 		     size_t cnt, loff_t *ppos)
9291 {
9292 	struct trace_array *tr = filp->private_data;
9293 	unsigned long val;
9294 	int ret;
9295 
9296 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9297 	if (ret)
9298 		return ret;
9299 
9300 	if (val > 100)
9301 		return -EINVAL;
9302 
9303 	tr->buffer_percent = val;
9304 
9305 	(*ppos)++;
9306 
9307 	return cnt;
9308 }
9309 
9310 static const struct file_operations buffer_percent_fops = {
9311 	.open		= tracing_open_generic_tr,
9312 	.read		= buffer_percent_read,
9313 	.write		= buffer_percent_write,
9314 	.release	= tracing_release_generic_tr,
9315 	.llseek		= default_llseek,
9316 };
9317 
9318 static ssize_t
9319 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9320 {
9321 	struct trace_array *tr = filp->private_data;
9322 	size_t size;
9323 	char buf[64];
9324 	int order;
9325 	int r;
9326 
9327 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9328 	size = (PAGE_SIZE << order) / 1024;
9329 
9330 	r = sprintf(buf, "%zd\n", size);
9331 
9332 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9333 }
9334 
9335 static ssize_t
9336 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9337 			 size_t cnt, loff_t *ppos)
9338 {
9339 	struct trace_array *tr = filp->private_data;
9340 	unsigned long val;
9341 	int old_order;
9342 	int order;
9343 	int pages;
9344 	int ret;
9345 
9346 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9347 	if (ret)
9348 		return ret;
9349 
9350 	val *= 1024; /* value passed in is in KB */
9351 
9352 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9353 	order = fls(pages - 1);
9354 
9355 	/* limit between 1 and 128 system pages */
9356 	if (order < 0 || order > 7)
9357 		return -EINVAL;
9358 
9359 	/* Do not allow tracing while changing the order of the ring buffer */
9360 	tracing_stop_tr(tr);
9361 
9362 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9363 	if (old_order == order)
9364 		goto out;
9365 
9366 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9367 	if (ret)
9368 		goto out;
9369 
9370 #ifdef CONFIG_TRACER_MAX_TRACE
9371 
9372 	if (!tr->allocated_snapshot)
9373 		goto out_max;
9374 
9375 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9376 	if (ret) {
9377 		/* Put back the old order */
9378 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9379 		if (WARN_ON_ONCE(cnt)) {
9380 			/*
9381 			 * AARGH! We are left with different orders!
9382 			 * The max buffer is our "snapshot" buffer.
9383 			 * When a tracer needs a snapshot (one of the
9384 			 * latency tracers), it swaps the max buffer
9385 			 * with the saved snap shot. We succeeded to
9386 			 * update the order of the main buffer, but failed to
9387 			 * update the order of the max buffer. But when we tried
9388 			 * to reset the main buffer to the original size, we
9389 			 * failed there too. This is very unlikely to
9390 			 * happen, but if it does, warn and kill all
9391 			 * tracing.
9392 			 */
9393 			tracing_disabled = 1;
9394 		}
9395 		goto out;
9396 	}
9397  out_max:
9398 #endif
9399 	(*ppos)++;
9400  out:
9401 	if (ret)
9402 		cnt = ret;
9403 	tracing_start_tr(tr);
9404 	return cnt;
9405 }
9406 
9407 static const struct file_operations buffer_subbuf_size_fops = {
9408 	.open		= tracing_open_generic_tr,
9409 	.read		= buffer_subbuf_size_read,
9410 	.write		= buffer_subbuf_size_write,
9411 	.release	= tracing_release_generic_tr,
9412 	.llseek		= default_llseek,
9413 };
9414 
9415 static struct dentry *trace_instance_dir;
9416 
9417 static void
9418 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9419 
9420 #ifdef CONFIG_MODULES
9421 static int make_mod_delta(struct module *mod, void *data)
9422 {
9423 	struct trace_module_delta *module_delta;
9424 	struct trace_scratch *tscratch;
9425 	struct trace_mod_entry *entry;
9426 	struct trace_array *tr = data;
9427 	int i;
9428 
9429 	tscratch = tr->scratch;
9430 	module_delta = READ_ONCE(tr->module_delta);
9431 	for (i = 0; i < tscratch->nr_entries; i++) {
9432 		entry = &tscratch->entries[i];
9433 		if (strcmp(mod->name, entry->mod_name))
9434 			continue;
9435 		if (mod->state == MODULE_STATE_GOING)
9436 			module_delta->delta[i] = 0;
9437 		else
9438 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9439 						 - entry->mod_addr;
9440 		break;
9441 	}
9442 	return 0;
9443 }
9444 #else
9445 static int make_mod_delta(struct module *mod, void *data)
9446 {
9447 	return 0;
9448 }
9449 #endif
9450 
9451 static int mod_addr_comp(const void *a, const void *b, const void *data)
9452 {
9453 	const struct trace_mod_entry *e1 = a;
9454 	const struct trace_mod_entry *e2 = b;
9455 
9456 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9457 }
9458 
9459 static void setup_trace_scratch(struct trace_array *tr,
9460 				struct trace_scratch *tscratch, unsigned int size)
9461 {
9462 	struct trace_module_delta *module_delta;
9463 	struct trace_mod_entry *entry;
9464 	int i, nr_entries;
9465 
9466 	if (!tscratch)
9467 		return;
9468 
9469 	tr->scratch = tscratch;
9470 	tr->scratch_size = size;
9471 
9472 	if (tscratch->text_addr)
9473 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9474 
9475 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9476 		goto reset;
9477 
9478 	/* Check if each module name is a valid string */
9479 	for (i = 0; i < tscratch->nr_entries; i++) {
9480 		int n;
9481 
9482 		entry = &tscratch->entries[i];
9483 
9484 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9485 			if (entry->mod_name[n] == '\0')
9486 				break;
9487 			if (!isprint(entry->mod_name[n]))
9488 				goto reset;
9489 		}
9490 		if (n == MODULE_NAME_LEN)
9491 			goto reset;
9492 	}
9493 
9494 	/* Sort the entries so that we can find appropriate module from address. */
9495 	nr_entries = tscratch->nr_entries;
9496 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9497 	       mod_addr_comp, NULL, NULL);
9498 
9499 	if (IS_ENABLED(CONFIG_MODULES)) {
9500 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9501 		if (!module_delta) {
9502 			pr_info("module_delta allocation failed. Not able to decode module address.");
9503 			goto reset;
9504 		}
9505 		init_rcu_head(&module_delta->rcu);
9506 	} else
9507 		module_delta = NULL;
9508 	WRITE_ONCE(tr->module_delta, module_delta);
9509 
9510 	/* Scan modules to make text delta for modules. */
9511 	module_for_each_mod(make_mod_delta, tr);
9512 	return;
9513  reset:
9514 	/* Invalid trace modules */
9515 	memset(tscratch, 0, size);
9516 }
9517 
9518 static int
9519 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9520 {
9521 	enum ring_buffer_flags rb_flags;
9522 	struct trace_scratch *tscratch;
9523 	unsigned int scratch_size = 0;
9524 
9525 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9526 
9527 	buf->tr = tr;
9528 
9529 	if (tr->range_addr_start && tr->range_addr_size) {
9530 		/* Add scratch buffer to handle 128 modules */
9531 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9532 						      tr->range_addr_start,
9533 						      tr->range_addr_size,
9534 						      struct_size(tscratch, entries, 128));
9535 
9536 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9537 		setup_trace_scratch(tr, tscratch, scratch_size);
9538 
9539 		/*
9540 		 * This is basically the same as a mapped buffer,
9541 		 * with the same restrictions.
9542 		 */
9543 		tr->mapped++;
9544 	} else {
9545 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9546 	}
9547 	if (!buf->buffer)
9548 		return -ENOMEM;
9549 
9550 	buf->data = alloc_percpu(struct trace_array_cpu);
9551 	if (!buf->data) {
9552 		ring_buffer_free(buf->buffer);
9553 		buf->buffer = NULL;
9554 		return -ENOMEM;
9555 	}
9556 
9557 	/* Allocate the first page for all buffers */
9558 	set_buffer_entries(&tr->array_buffer,
9559 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9560 
9561 	return 0;
9562 }
9563 
9564 static void free_trace_buffer(struct array_buffer *buf)
9565 {
9566 	if (buf->buffer) {
9567 		ring_buffer_free(buf->buffer);
9568 		buf->buffer = NULL;
9569 		free_percpu(buf->data);
9570 		buf->data = NULL;
9571 	}
9572 }
9573 
9574 static int allocate_trace_buffers(struct trace_array *tr, int size)
9575 {
9576 	int ret;
9577 
9578 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9579 	if (ret)
9580 		return ret;
9581 
9582 #ifdef CONFIG_TRACER_MAX_TRACE
9583 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9584 	if (tr->range_addr_start)
9585 		return 0;
9586 
9587 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9588 				    allocate_snapshot ? size : 1);
9589 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9590 		free_trace_buffer(&tr->array_buffer);
9591 		return -ENOMEM;
9592 	}
9593 	tr->allocated_snapshot = allocate_snapshot;
9594 
9595 	allocate_snapshot = false;
9596 #endif
9597 
9598 	return 0;
9599 }
9600 
9601 static void free_trace_buffers(struct trace_array *tr)
9602 {
9603 	if (!tr)
9604 		return;
9605 
9606 	free_trace_buffer(&tr->array_buffer);
9607 
9608 #ifdef CONFIG_TRACER_MAX_TRACE
9609 	free_trace_buffer(&tr->max_buffer);
9610 #endif
9611 
9612 	if (tr->range_addr_start)
9613 		vunmap((void *)tr->range_addr_start);
9614 }
9615 
9616 static void init_trace_flags_index(struct trace_array *tr)
9617 {
9618 	int i;
9619 
9620 	/* Used by the trace options files */
9621 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9622 		tr->trace_flags_index[i] = i;
9623 }
9624 
9625 static void __update_tracer_options(struct trace_array *tr)
9626 {
9627 	struct tracer *t;
9628 
9629 	for (t = trace_types; t; t = t->next)
9630 		add_tracer_options(tr, t);
9631 }
9632 
9633 static void update_tracer_options(struct trace_array *tr)
9634 {
9635 	mutex_lock(&trace_types_lock);
9636 	tracer_options_updated = true;
9637 	__update_tracer_options(tr);
9638 	mutex_unlock(&trace_types_lock);
9639 }
9640 
9641 /* Must have trace_types_lock held */
9642 struct trace_array *trace_array_find(const char *instance)
9643 {
9644 	struct trace_array *tr, *found = NULL;
9645 
9646 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9647 		if (tr->name && strcmp(tr->name, instance) == 0) {
9648 			found = tr;
9649 			break;
9650 		}
9651 	}
9652 
9653 	return found;
9654 }
9655 
9656 struct trace_array *trace_array_find_get(const char *instance)
9657 {
9658 	struct trace_array *tr;
9659 
9660 	mutex_lock(&trace_types_lock);
9661 	tr = trace_array_find(instance);
9662 	if (tr)
9663 		tr->ref++;
9664 	mutex_unlock(&trace_types_lock);
9665 
9666 	return tr;
9667 }
9668 
9669 static int trace_array_create_dir(struct trace_array *tr)
9670 {
9671 	int ret;
9672 
9673 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9674 	if (!tr->dir)
9675 		return -EINVAL;
9676 
9677 	ret = event_trace_add_tracer(tr->dir, tr);
9678 	if (ret) {
9679 		tracefs_remove(tr->dir);
9680 		return ret;
9681 	}
9682 
9683 	init_tracer_tracefs(tr, tr->dir);
9684 	__update_tracer_options(tr);
9685 
9686 	return ret;
9687 }
9688 
9689 static struct trace_array *
9690 trace_array_create_systems(const char *name, const char *systems,
9691 			   unsigned long range_addr_start,
9692 			   unsigned long range_addr_size)
9693 {
9694 	struct trace_array *tr;
9695 	int ret;
9696 
9697 	ret = -ENOMEM;
9698 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9699 	if (!tr)
9700 		return ERR_PTR(ret);
9701 
9702 	tr->name = kstrdup(name, GFP_KERNEL);
9703 	if (!tr->name)
9704 		goto out_free_tr;
9705 
9706 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9707 		goto out_free_tr;
9708 
9709 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9710 		goto out_free_tr;
9711 
9712 	if (systems) {
9713 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9714 		if (!tr->system_names)
9715 			goto out_free_tr;
9716 	}
9717 
9718 	/* Only for boot up memory mapped ring buffers */
9719 	tr->range_addr_start = range_addr_start;
9720 	tr->range_addr_size = range_addr_size;
9721 
9722 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9723 
9724 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9725 
9726 	raw_spin_lock_init(&tr->start_lock);
9727 
9728 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9729 #ifdef CONFIG_TRACER_MAX_TRACE
9730 	spin_lock_init(&tr->snapshot_trigger_lock);
9731 #endif
9732 	tr->current_trace = &nop_trace;
9733 
9734 	INIT_LIST_HEAD(&tr->systems);
9735 	INIT_LIST_HEAD(&tr->events);
9736 	INIT_LIST_HEAD(&tr->hist_vars);
9737 	INIT_LIST_HEAD(&tr->err_log);
9738 
9739 #ifdef CONFIG_MODULES
9740 	INIT_LIST_HEAD(&tr->mod_events);
9741 #endif
9742 
9743 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9744 		goto out_free_tr;
9745 
9746 	/* The ring buffer is defaultly expanded */
9747 	trace_set_ring_buffer_expanded(tr);
9748 
9749 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9750 		goto out_free_tr;
9751 
9752 	ftrace_init_trace_array(tr);
9753 
9754 	init_trace_flags_index(tr);
9755 
9756 	if (trace_instance_dir) {
9757 		ret = trace_array_create_dir(tr);
9758 		if (ret)
9759 			goto out_free_tr;
9760 	} else
9761 		__trace_early_add_events(tr);
9762 
9763 	list_add(&tr->list, &ftrace_trace_arrays);
9764 
9765 	tr->ref++;
9766 
9767 	return tr;
9768 
9769  out_free_tr:
9770 	ftrace_free_ftrace_ops(tr);
9771 	free_trace_buffers(tr);
9772 	free_cpumask_var(tr->pipe_cpumask);
9773 	free_cpumask_var(tr->tracing_cpumask);
9774 	kfree_const(tr->system_names);
9775 	kfree(tr->range_name);
9776 	kfree(tr->name);
9777 	kfree(tr);
9778 
9779 	return ERR_PTR(ret);
9780 }
9781 
9782 static struct trace_array *trace_array_create(const char *name)
9783 {
9784 	return trace_array_create_systems(name, NULL, 0, 0);
9785 }
9786 
9787 static int instance_mkdir(const char *name)
9788 {
9789 	struct trace_array *tr;
9790 	int ret;
9791 
9792 	guard(mutex)(&event_mutex);
9793 	guard(mutex)(&trace_types_lock);
9794 
9795 	ret = -EEXIST;
9796 	if (trace_array_find(name))
9797 		return -EEXIST;
9798 
9799 	tr = trace_array_create(name);
9800 
9801 	ret = PTR_ERR_OR_ZERO(tr);
9802 
9803 	return ret;
9804 }
9805 
9806 static u64 map_pages(u64 start, u64 size)
9807 {
9808 	struct page **pages;
9809 	phys_addr_t page_start;
9810 	unsigned int page_count;
9811 	unsigned int i;
9812 	void *vaddr;
9813 
9814 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9815 
9816 	page_start = start;
9817 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9818 	if (!pages)
9819 		return 0;
9820 
9821 	for (i = 0; i < page_count; i++) {
9822 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9823 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9824 	}
9825 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9826 	kfree(pages);
9827 
9828 	return (u64)(unsigned long)vaddr;
9829 }
9830 
9831 /**
9832  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9833  * @name: The name of the trace array to be looked up/created.
9834  * @systems: A list of systems to create event directories for (NULL for all)
9835  *
9836  * Returns pointer to trace array with given name.
9837  * NULL, if it cannot be created.
9838  *
9839  * NOTE: This function increments the reference counter associated with the
9840  * trace array returned. This makes sure it cannot be freed while in use.
9841  * Use trace_array_put() once the trace array is no longer needed.
9842  * If the trace_array is to be freed, trace_array_destroy() needs to
9843  * be called after the trace_array_put(), or simply let user space delete
9844  * it from the tracefs instances directory. But until the
9845  * trace_array_put() is called, user space can not delete it.
9846  *
9847  */
9848 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9849 {
9850 	struct trace_array *tr;
9851 
9852 	guard(mutex)(&event_mutex);
9853 	guard(mutex)(&trace_types_lock);
9854 
9855 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9856 		if (tr->name && strcmp(tr->name, name) == 0) {
9857 			tr->ref++;
9858 			return tr;
9859 		}
9860 	}
9861 
9862 	tr = trace_array_create_systems(name, systems, 0, 0);
9863 
9864 	if (IS_ERR(tr))
9865 		tr = NULL;
9866 	else
9867 		tr->ref++;
9868 
9869 	return tr;
9870 }
9871 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9872 
9873 static int __remove_instance(struct trace_array *tr)
9874 {
9875 	int i;
9876 
9877 	/* Reference counter for a newly created trace array = 1. */
9878 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9879 		return -EBUSY;
9880 
9881 	list_del(&tr->list);
9882 
9883 	/* Disable all the flags that were enabled coming in */
9884 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9885 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9886 			set_tracer_flag(tr, 1 << i, 0);
9887 	}
9888 
9889 	if (printk_trace == tr)
9890 		update_printk_trace(&global_trace);
9891 
9892 	tracing_set_nop(tr);
9893 	clear_ftrace_function_probes(tr);
9894 	event_trace_del_tracer(tr);
9895 	ftrace_clear_pids(tr);
9896 	ftrace_destroy_function_files(tr);
9897 	tracefs_remove(tr->dir);
9898 	free_percpu(tr->last_func_repeats);
9899 	free_trace_buffers(tr);
9900 	clear_tracing_err_log(tr);
9901 
9902 	if (tr->range_name) {
9903 		reserve_mem_release_by_name(tr->range_name);
9904 		kfree(tr->range_name);
9905 	}
9906 
9907 	for (i = 0; i < tr->nr_topts; i++) {
9908 		kfree(tr->topts[i].topts);
9909 	}
9910 	kfree(tr->topts);
9911 
9912 	free_cpumask_var(tr->pipe_cpumask);
9913 	free_cpumask_var(tr->tracing_cpumask);
9914 	kfree_const(tr->system_names);
9915 	kfree(tr->name);
9916 	kfree(tr);
9917 
9918 	return 0;
9919 }
9920 
9921 int trace_array_destroy(struct trace_array *this_tr)
9922 {
9923 	struct trace_array *tr;
9924 
9925 	if (!this_tr)
9926 		return -EINVAL;
9927 
9928 	guard(mutex)(&event_mutex);
9929 	guard(mutex)(&trace_types_lock);
9930 
9931 
9932 	/* Making sure trace array exists before destroying it. */
9933 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9934 		if (tr == this_tr)
9935 			return __remove_instance(tr);
9936 	}
9937 
9938 	return -ENODEV;
9939 }
9940 EXPORT_SYMBOL_GPL(trace_array_destroy);
9941 
9942 static int instance_rmdir(const char *name)
9943 {
9944 	struct trace_array *tr;
9945 
9946 	guard(mutex)(&event_mutex);
9947 	guard(mutex)(&trace_types_lock);
9948 
9949 	tr = trace_array_find(name);
9950 	if (!tr)
9951 		return -ENODEV;
9952 
9953 	return __remove_instance(tr);
9954 }
9955 
9956 static __init void create_trace_instances(struct dentry *d_tracer)
9957 {
9958 	struct trace_array *tr;
9959 
9960 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9961 							 instance_mkdir,
9962 							 instance_rmdir);
9963 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9964 		return;
9965 
9966 	guard(mutex)(&event_mutex);
9967 	guard(mutex)(&trace_types_lock);
9968 
9969 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9970 		if (!tr->name)
9971 			continue;
9972 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9973 			     "Failed to create instance directory\n"))
9974 			return;
9975 	}
9976 }
9977 
9978 static void
9979 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9980 {
9981 	int cpu;
9982 
9983 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9984 			tr, &show_traces_fops);
9985 
9986 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9987 			tr, &set_tracer_fops);
9988 
9989 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9990 			  tr, &tracing_cpumask_fops);
9991 
9992 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9993 			  tr, &tracing_iter_fops);
9994 
9995 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9996 			  tr, &tracing_fops);
9997 
9998 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9999 			  tr, &tracing_pipe_fops);
10000 
10001 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10002 			  tr, &tracing_entries_fops);
10003 
10004 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10005 			  tr, &tracing_total_entries_fops);
10006 
10007 	trace_create_file("free_buffer", 0200, d_tracer,
10008 			  tr, &tracing_free_buffer_fops);
10009 
10010 	trace_create_file("trace_marker", 0220, d_tracer,
10011 			  tr, &tracing_mark_fops);
10012 
10013 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10014 
10015 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10016 			  tr, &tracing_mark_raw_fops);
10017 
10018 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10019 			  &trace_clock_fops);
10020 
10021 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10022 			  tr, &rb_simple_fops);
10023 
10024 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10025 			  &trace_time_stamp_mode_fops);
10026 
10027 	tr->buffer_percent = 50;
10028 
10029 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10030 			tr, &buffer_percent_fops);
10031 
10032 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10033 			  tr, &buffer_subbuf_size_fops);
10034 
10035 	create_trace_options_dir(tr);
10036 
10037 #ifdef CONFIG_TRACER_MAX_TRACE
10038 	trace_create_maxlat_file(tr, d_tracer);
10039 #endif
10040 
10041 	if (ftrace_create_function_files(tr, d_tracer))
10042 		MEM_FAIL(1, "Could not allocate function filter files");
10043 
10044 	if (tr->range_addr_start) {
10045 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10046 				  tr, &last_boot_fops);
10047 #ifdef CONFIG_TRACER_SNAPSHOT
10048 	} else {
10049 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10050 				  tr, &snapshot_fops);
10051 #endif
10052 	}
10053 
10054 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10055 			  tr, &tracing_err_log_fops);
10056 
10057 	for_each_tracing_cpu(cpu)
10058 		tracing_init_tracefs_percpu(tr, cpu);
10059 
10060 	ftrace_init_tracefs(tr, d_tracer);
10061 }
10062 
10063 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10064 {
10065 	struct vfsmount *mnt;
10066 	struct file_system_type *type;
10067 
10068 	/*
10069 	 * To maintain backward compatibility for tools that mount
10070 	 * debugfs to get to the tracing facility, tracefs is automatically
10071 	 * mounted to the debugfs/tracing directory.
10072 	 */
10073 	type = get_fs_type("tracefs");
10074 	if (!type)
10075 		return NULL;
10076 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10077 	put_filesystem(type);
10078 	if (IS_ERR(mnt))
10079 		return NULL;
10080 	mntget(mnt);
10081 
10082 	return mnt;
10083 }
10084 
10085 /**
10086  * tracing_init_dentry - initialize top level trace array
10087  *
10088  * This is called when creating files or directories in the tracing
10089  * directory. It is called via fs_initcall() by any of the boot up code
10090  * and expects to return the dentry of the top level tracing directory.
10091  */
10092 int tracing_init_dentry(void)
10093 {
10094 	struct trace_array *tr = &global_trace;
10095 
10096 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10097 		pr_warn("Tracing disabled due to lockdown\n");
10098 		return -EPERM;
10099 	}
10100 
10101 	/* The top level trace array uses  NULL as parent */
10102 	if (tr->dir)
10103 		return 0;
10104 
10105 	if (WARN_ON(!tracefs_initialized()))
10106 		return -ENODEV;
10107 
10108 	/*
10109 	 * As there may still be users that expect the tracing
10110 	 * files to exist in debugfs/tracing, we must automount
10111 	 * the tracefs file system there, so older tools still
10112 	 * work with the newer kernel.
10113 	 */
10114 	tr->dir = debugfs_create_automount("tracing", NULL,
10115 					   trace_automount, NULL);
10116 
10117 	return 0;
10118 }
10119 
10120 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10121 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10122 
10123 static struct workqueue_struct *eval_map_wq __initdata;
10124 static struct work_struct eval_map_work __initdata;
10125 static struct work_struct tracerfs_init_work __initdata;
10126 
10127 static void __init eval_map_work_func(struct work_struct *work)
10128 {
10129 	int len;
10130 
10131 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10132 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10133 }
10134 
10135 static int __init trace_eval_init(void)
10136 {
10137 	INIT_WORK(&eval_map_work, eval_map_work_func);
10138 
10139 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10140 	if (!eval_map_wq) {
10141 		pr_err("Unable to allocate eval_map_wq\n");
10142 		/* Do work here */
10143 		eval_map_work_func(&eval_map_work);
10144 		return -ENOMEM;
10145 	}
10146 
10147 	queue_work(eval_map_wq, &eval_map_work);
10148 	return 0;
10149 }
10150 
10151 subsys_initcall(trace_eval_init);
10152 
10153 static int __init trace_eval_sync(void)
10154 {
10155 	/* Make sure the eval map updates are finished */
10156 	if (eval_map_wq)
10157 		destroy_workqueue(eval_map_wq);
10158 	return 0;
10159 }
10160 
10161 late_initcall_sync(trace_eval_sync);
10162 
10163 
10164 #ifdef CONFIG_MODULES
10165 
10166 bool module_exists(const char *module)
10167 {
10168 	/* All modules have the symbol __this_module */
10169 	static const char this_mod[] = "__this_module";
10170 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10171 	unsigned long val;
10172 	int n;
10173 
10174 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10175 
10176 	if (n > sizeof(modname) - 1)
10177 		return false;
10178 
10179 	val = module_kallsyms_lookup_name(modname);
10180 	return val != 0;
10181 }
10182 
10183 static void trace_module_add_evals(struct module *mod)
10184 {
10185 	if (!mod->num_trace_evals)
10186 		return;
10187 
10188 	/*
10189 	 * Modules with bad taint do not have events created, do
10190 	 * not bother with enums either.
10191 	 */
10192 	if (trace_module_has_bad_taint(mod))
10193 		return;
10194 
10195 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10196 }
10197 
10198 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10199 static void trace_module_remove_evals(struct module *mod)
10200 {
10201 	union trace_eval_map_item *map;
10202 	union trace_eval_map_item **last = &trace_eval_maps;
10203 
10204 	if (!mod->num_trace_evals)
10205 		return;
10206 
10207 	guard(mutex)(&trace_eval_mutex);
10208 
10209 	map = trace_eval_maps;
10210 
10211 	while (map) {
10212 		if (map->head.mod == mod)
10213 			break;
10214 		map = trace_eval_jmp_to_tail(map);
10215 		last = &map->tail.next;
10216 		map = map->tail.next;
10217 	}
10218 	if (!map)
10219 		return;
10220 
10221 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10222 	kfree(map);
10223 }
10224 #else
10225 static inline void trace_module_remove_evals(struct module *mod) { }
10226 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10227 
10228 static void trace_module_record(struct module *mod, bool add)
10229 {
10230 	struct trace_array *tr;
10231 	unsigned long flags;
10232 
10233 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10234 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10235 		/* Update any persistent trace array that has already been started */
10236 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10237 			guard(mutex)(&scratch_mutex);
10238 			save_mod(mod, tr);
10239 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10240 			/* Update delta if the module loaded in previous boot */
10241 			make_mod_delta(mod, tr);
10242 		}
10243 	}
10244 }
10245 
10246 static int trace_module_notify(struct notifier_block *self,
10247 			       unsigned long val, void *data)
10248 {
10249 	struct module *mod = data;
10250 
10251 	switch (val) {
10252 	case MODULE_STATE_COMING:
10253 		trace_module_add_evals(mod);
10254 		trace_module_record(mod, true);
10255 		break;
10256 	case MODULE_STATE_GOING:
10257 		trace_module_remove_evals(mod);
10258 		trace_module_record(mod, false);
10259 		break;
10260 	}
10261 
10262 	return NOTIFY_OK;
10263 }
10264 
10265 static struct notifier_block trace_module_nb = {
10266 	.notifier_call = trace_module_notify,
10267 	.priority = 0,
10268 };
10269 #endif /* CONFIG_MODULES */
10270 
10271 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10272 {
10273 
10274 	event_trace_init();
10275 
10276 	init_tracer_tracefs(&global_trace, NULL);
10277 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10278 
10279 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10280 			&global_trace, &tracing_thresh_fops);
10281 
10282 	trace_create_file("README", TRACE_MODE_READ, NULL,
10283 			NULL, &tracing_readme_fops);
10284 
10285 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10286 			NULL, &tracing_saved_cmdlines_fops);
10287 
10288 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10289 			  NULL, &tracing_saved_cmdlines_size_fops);
10290 
10291 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10292 			NULL, &tracing_saved_tgids_fops);
10293 
10294 	trace_create_eval_file(NULL);
10295 
10296 #ifdef CONFIG_MODULES
10297 	register_module_notifier(&trace_module_nb);
10298 #endif
10299 
10300 #ifdef CONFIG_DYNAMIC_FTRACE
10301 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10302 			NULL, &tracing_dyn_info_fops);
10303 #endif
10304 
10305 	create_trace_instances(NULL);
10306 
10307 	update_tracer_options(&global_trace);
10308 }
10309 
10310 static __init int tracer_init_tracefs(void)
10311 {
10312 	int ret;
10313 
10314 	trace_access_lock_init();
10315 
10316 	ret = tracing_init_dentry();
10317 	if (ret)
10318 		return 0;
10319 
10320 	if (eval_map_wq) {
10321 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10322 		queue_work(eval_map_wq, &tracerfs_init_work);
10323 	} else {
10324 		tracer_init_tracefs_work_func(NULL);
10325 	}
10326 
10327 	rv_init_interface();
10328 
10329 	return 0;
10330 }
10331 
10332 fs_initcall(tracer_init_tracefs);
10333 
10334 static int trace_die_panic_handler(struct notifier_block *self,
10335 				unsigned long ev, void *unused);
10336 
10337 static struct notifier_block trace_panic_notifier = {
10338 	.notifier_call = trace_die_panic_handler,
10339 	.priority = INT_MAX - 1,
10340 };
10341 
10342 static struct notifier_block trace_die_notifier = {
10343 	.notifier_call = trace_die_panic_handler,
10344 	.priority = INT_MAX - 1,
10345 };
10346 
10347 /*
10348  * The idea is to execute the following die/panic callback early, in order
10349  * to avoid showing irrelevant information in the trace (like other panic
10350  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10351  * warnings get disabled (to prevent potential log flooding).
10352  */
10353 static int trace_die_panic_handler(struct notifier_block *self,
10354 				unsigned long ev, void *unused)
10355 {
10356 	if (!ftrace_dump_on_oops_enabled())
10357 		return NOTIFY_DONE;
10358 
10359 	/* The die notifier requires DIE_OOPS to trigger */
10360 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10361 		return NOTIFY_DONE;
10362 
10363 	ftrace_dump(DUMP_PARAM);
10364 
10365 	return NOTIFY_DONE;
10366 }
10367 
10368 /*
10369  * printk is set to max of 1024, we really don't need it that big.
10370  * Nothing should be printing 1000 characters anyway.
10371  */
10372 #define TRACE_MAX_PRINT		1000
10373 
10374 /*
10375  * Define here KERN_TRACE so that we have one place to modify
10376  * it if we decide to change what log level the ftrace dump
10377  * should be at.
10378  */
10379 #define KERN_TRACE		KERN_EMERG
10380 
10381 void
10382 trace_printk_seq(struct trace_seq *s)
10383 {
10384 	/* Probably should print a warning here. */
10385 	if (s->seq.len >= TRACE_MAX_PRINT)
10386 		s->seq.len = TRACE_MAX_PRINT;
10387 
10388 	/*
10389 	 * More paranoid code. Although the buffer size is set to
10390 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10391 	 * an extra layer of protection.
10392 	 */
10393 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10394 		s->seq.len = s->seq.size - 1;
10395 
10396 	/* should be zero ended, but we are paranoid. */
10397 	s->buffer[s->seq.len] = 0;
10398 
10399 	printk(KERN_TRACE "%s", s->buffer);
10400 
10401 	trace_seq_init(s);
10402 }
10403 
10404 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10405 {
10406 	iter->tr = tr;
10407 	iter->trace = iter->tr->current_trace;
10408 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10409 	iter->array_buffer = &tr->array_buffer;
10410 
10411 	if (iter->trace && iter->trace->open)
10412 		iter->trace->open(iter);
10413 
10414 	/* Annotate start of buffers if we had overruns */
10415 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10416 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10417 
10418 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10419 	if (trace_clocks[iter->tr->clock_id].in_ns)
10420 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10421 
10422 	/* Can not use kmalloc for iter.temp and iter.fmt */
10423 	iter->temp = static_temp_buf;
10424 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10425 	iter->fmt = static_fmt_buf;
10426 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10427 }
10428 
10429 void trace_init_global_iter(struct trace_iterator *iter)
10430 {
10431 	trace_init_iter(iter, &global_trace);
10432 }
10433 
10434 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10435 {
10436 	/* use static because iter can be a bit big for the stack */
10437 	static struct trace_iterator iter;
10438 	unsigned int old_userobj;
10439 	unsigned long flags;
10440 	int cnt = 0, cpu;
10441 
10442 	/*
10443 	 * Always turn off tracing when we dump.
10444 	 * We don't need to show trace output of what happens
10445 	 * between multiple crashes.
10446 	 *
10447 	 * If the user does a sysrq-z, then they can re-enable
10448 	 * tracing with echo 1 > tracing_on.
10449 	 */
10450 	tracer_tracing_off(tr);
10451 
10452 	local_irq_save(flags);
10453 
10454 	/* Simulate the iterator */
10455 	trace_init_iter(&iter, tr);
10456 
10457 	for_each_tracing_cpu(cpu) {
10458 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10459 	}
10460 
10461 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10462 
10463 	/* don't look at user memory in panic mode */
10464 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10465 
10466 	if (dump_mode == DUMP_ORIG)
10467 		iter.cpu_file = raw_smp_processor_id();
10468 	else
10469 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10470 
10471 	if (tr == &global_trace)
10472 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10473 	else
10474 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10475 
10476 	/* Did function tracer already get disabled? */
10477 	if (ftrace_is_dead()) {
10478 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10479 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10480 	}
10481 
10482 	/*
10483 	 * We need to stop all tracing on all CPUS to read
10484 	 * the next buffer. This is a bit expensive, but is
10485 	 * not done often. We fill all what we can read,
10486 	 * and then release the locks again.
10487 	 */
10488 
10489 	while (!trace_empty(&iter)) {
10490 
10491 		if (!cnt)
10492 			printk(KERN_TRACE "---------------------------------\n");
10493 
10494 		cnt++;
10495 
10496 		trace_iterator_reset(&iter);
10497 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10498 
10499 		if (trace_find_next_entry_inc(&iter) != NULL) {
10500 			int ret;
10501 
10502 			ret = print_trace_line(&iter);
10503 			if (ret != TRACE_TYPE_NO_CONSUME)
10504 				trace_consume(&iter);
10505 		}
10506 		touch_nmi_watchdog();
10507 
10508 		trace_printk_seq(&iter.seq);
10509 	}
10510 
10511 	if (!cnt)
10512 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10513 	else
10514 		printk(KERN_TRACE "---------------------------------\n");
10515 
10516 	tr->trace_flags |= old_userobj;
10517 
10518 	for_each_tracing_cpu(cpu) {
10519 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10520 	}
10521 	local_irq_restore(flags);
10522 }
10523 
10524 static void ftrace_dump_by_param(void)
10525 {
10526 	bool first_param = true;
10527 	char dump_param[MAX_TRACER_SIZE];
10528 	char *buf, *token, *inst_name;
10529 	struct trace_array *tr;
10530 
10531 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10532 	buf = dump_param;
10533 
10534 	while ((token = strsep(&buf, ",")) != NULL) {
10535 		if (first_param) {
10536 			first_param = false;
10537 			if (!strcmp("0", token))
10538 				continue;
10539 			else if (!strcmp("1", token)) {
10540 				ftrace_dump_one(&global_trace, DUMP_ALL);
10541 				continue;
10542 			}
10543 			else if (!strcmp("2", token) ||
10544 			  !strcmp("orig_cpu", token)) {
10545 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10546 				continue;
10547 			}
10548 		}
10549 
10550 		inst_name = strsep(&token, "=");
10551 		tr = trace_array_find(inst_name);
10552 		if (!tr) {
10553 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10554 			continue;
10555 		}
10556 
10557 		if (token && (!strcmp("2", token) ||
10558 			  !strcmp("orig_cpu", token)))
10559 			ftrace_dump_one(tr, DUMP_ORIG);
10560 		else
10561 			ftrace_dump_one(tr, DUMP_ALL);
10562 	}
10563 }
10564 
10565 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10566 {
10567 	static atomic_t dump_running;
10568 
10569 	/* Only allow one dump user at a time. */
10570 	if (atomic_inc_return(&dump_running) != 1) {
10571 		atomic_dec(&dump_running);
10572 		return;
10573 	}
10574 
10575 	switch (oops_dump_mode) {
10576 	case DUMP_ALL:
10577 		ftrace_dump_one(&global_trace, DUMP_ALL);
10578 		break;
10579 	case DUMP_ORIG:
10580 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10581 		break;
10582 	case DUMP_PARAM:
10583 		ftrace_dump_by_param();
10584 		break;
10585 	case DUMP_NONE:
10586 		break;
10587 	default:
10588 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10589 		ftrace_dump_one(&global_trace, DUMP_ALL);
10590 	}
10591 
10592 	atomic_dec(&dump_running);
10593 }
10594 EXPORT_SYMBOL_GPL(ftrace_dump);
10595 
10596 #define WRITE_BUFSIZE  4096
10597 
10598 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10599 				size_t count, loff_t *ppos,
10600 				int (*createfn)(const char *))
10601 {
10602 	char *kbuf, *buf, *tmp;
10603 	int ret = 0;
10604 	size_t done = 0;
10605 	size_t size;
10606 
10607 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10608 	if (!kbuf)
10609 		return -ENOMEM;
10610 
10611 	while (done < count) {
10612 		size = count - done;
10613 
10614 		if (size >= WRITE_BUFSIZE)
10615 			size = WRITE_BUFSIZE - 1;
10616 
10617 		if (copy_from_user(kbuf, buffer + done, size)) {
10618 			ret = -EFAULT;
10619 			goto out;
10620 		}
10621 		kbuf[size] = '\0';
10622 		buf = kbuf;
10623 		do {
10624 			tmp = strchr(buf, '\n');
10625 			if (tmp) {
10626 				*tmp = '\0';
10627 				size = tmp - buf + 1;
10628 			} else {
10629 				size = strlen(buf);
10630 				if (done + size < count) {
10631 					if (buf != kbuf)
10632 						break;
10633 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10634 					pr_warn("Line length is too long: Should be less than %d\n",
10635 						WRITE_BUFSIZE - 2);
10636 					ret = -EINVAL;
10637 					goto out;
10638 				}
10639 			}
10640 			done += size;
10641 
10642 			/* Remove comments */
10643 			tmp = strchr(buf, '#');
10644 
10645 			if (tmp)
10646 				*tmp = '\0';
10647 
10648 			ret = createfn(buf);
10649 			if (ret)
10650 				goto out;
10651 			buf += size;
10652 
10653 		} while (done < count);
10654 	}
10655 	ret = done;
10656 
10657 out:
10658 	kfree(kbuf);
10659 
10660 	return ret;
10661 }
10662 
10663 #ifdef CONFIG_TRACER_MAX_TRACE
10664 __init static bool tr_needs_alloc_snapshot(const char *name)
10665 {
10666 	char *test;
10667 	int len = strlen(name);
10668 	bool ret;
10669 
10670 	if (!boot_snapshot_index)
10671 		return false;
10672 
10673 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10674 	    boot_snapshot_info[len] == '\t')
10675 		return true;
10676 
10677 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10678 	if (!test)
10679 		return false;
10680 
10681 	sprintf(test, "\t%s\t", name);
10682 	ret = strstr(boot_snapshot_info, test) == NULL;
10683 	kfree(test);
10684 	return ret;
10685 }
10686 
10687 __init static void do_allocate_snapshot(const char *name)
10688 {
10689 	if (!tr_needs_alloc_snapshot(name))
10690 		return;
10691 
10692 	/*
10693 	 * When allocate_snapshot is set, the next call to
10694 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10695 	 * will allocate the snapshot buffer. That will alse clear
10696 	 * this flag.
10697 	 */
10698 	allocate_snapshot = true;
10699 }
10700 #else
10701 static inline void do_allocate_snapshot(const char *name) { }
10702 #endif
10703 
10704 __init static void enable_instances(void)
10705 {
10706 	struct trace_array *tr;
10707 	char *curr_str;
10708 	char *name;
10709 	char *str;
10710 	char *tok;
10711 
10712 	/* A tab is always appended */
10713 	boot_instance_info[boot_instance_index - 1] = '\0';
10714 	str = boot_instance_info;
10715 
10716 	while ((curr_str = strsep(&str, "\t"))) {
10717 		phys_addr_t start = 0;
10718 		phys_addr_t size = 0;
10719 		unsigned long addr = 0;
10720 		bool traceprintk = false;
10721 		bool traceoff = false;
10722 		char *flag_delim;
10723 		char *addr_delim;
10724 		char *rname __free(kfree) = NULL;
10725 
10726 		tok = strsep(&curr_str, ",");
10727 
10728 		flag_delim = strchr(tok, '^');
10729 		addr_delim = strchr(tok, '@');
10730 
10731 		if (addr_delim)
10732 			*addr_delim++ = '\0';
10733 
10734 		if (flag_delim)
10735 			*flag_delim++ = '\0';
10736 
10737 		name = tok;
10738 
10739 		if (flag_delim) {
10740 			char *flag;
10741 
10742 			while ((flag = strsep(&flag_delim, "^"))) {
10743 				if (strcmp(flag, "traceoff") == 0) {
10744 					traceoff = true;
10745 				} else if ((strcmp(flag, "printk") == 0) ||
10746 					   (strcmp(flag, "traceprintk") == 0) ||
10747 					   (strcmp(flag, "trace_printk") == 0)) {
10748 					traceprintk = true;
10749 				} else {
10750 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10751 						flag, name);
10752 				}
10753 			}
10754 		}
10755 
10756 		tok = addr_delim;
10757 		if (tok && isdigit(*tok)) {
10758 			start = memparse(tok, &tok);
10759 			if (!start) {
10760 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10761 					name);
10762 				continue;
10763 			}
10764 			if (*tok != ':') {
10765 				pr_warn("Tracing: No size specified for instance %s\n", name);
10766 				continue;
10767 			}
10768 			tok++;
10769 			size = memparse(tok, &tok);
10770 			if (!size) {
10771 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10772 					name);
10773 				continue;
10774 			}
10775 		} else if (tok) {
10776 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10777 				start = 0;
10778 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10779 				continue;
10780 			}
10781 			rname = kstrdup(tok, GFP_KERNEL);
10782 		}
10783 
10784 		if (start) {
10785 			addr = map_pages(start, size);
10786 			if (addr) {
10787 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10788 					name, &start, (unsigned long)size);
10789 			} else {
10790 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10791 				continue;
10792 			}
10793 		} else {
10794 			/* Only non mapped buffers have snapshot buffers */
10795 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10796 				do_allocate_snapshot(name);
10797 		}
10798 
10799 		tr = trace_array_create_systems(name, NULL, addr, size);
10800 		if (IS_ERR(tr)) {
10801 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10802 			continue;
10803 		}
10804 
10805 		if (traceoff)
10806 			tracer_tracing_off(tr);
10807 
10808 		if (traceprintk)
10809 			update_printk_trace(tr);
10810 
10811 		/*
10812 		 * If start is set, then this is a mapped buffer, and
10813 		 * cannot be deleted by user space, so keep the reference
10814 		 * to it.
10815 		 */
10816 		if (start) {
10817 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10818 			tr->range_name = no_free_ptr(rname);
10819 		}
10820 
10821 		while ((tok = strsep(&curr_str, ","))) {
10822 			early_enable_events(tr, tok, true);
10823 		}
10824 	}
10825 }
10826 
10827 __init static int tracer_alloc_buffers(void)
10828 {
10829 	int ring_buf_size;
10830 	int ret = -ENOMEM;
10831 
10832 
10833 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10834 		pr_warn("Tracing disabled due to lockdown\n");
10835 		return -EPERM;
10836 	}
10837 
10838 	/*
10839 	 * Make sure we don't accidentally add more trace options
10840 	 * than we have bits for.
10841 	 */
10842 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10843 
10844 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10845 		goto out;
10846 
10847 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10848 		goto out_free_buffer_mask;
10849 
10850 	/* Only allocate trace_printk buffers if a trace_printk exists */
10851 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10852 		/* Must be called before global_trace.buffer is allocated */
10853 		trace_printk_init_buffers();
10854 
10855 	/* To save memory, keep the ring buffer size to its minimum */
10856 	if (global_trace.ring_buffer_expanded)
10857 		ring_buf_size = trace_buf_size;
10858 	else
10859 		ring_buf_size = 1;
10860 
10861 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10862 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10863 
10864 	raw_spin_lock_init(&global_trace.start_lock);
10865 
10866 	/*
10867 	 * The prepare callbacks allocates some memory for the ring buffer. We
10868 	 * don't free the buffer if the CPU goes down. If we were to free
10869 	 * the buffer, then the user would lose any trace that was in the
10870 	 * buffer. The memory will be removed once the "instance" is removed.
10871 	 */
10872 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10873 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10874 				      NULL);
10875 	if (ret < 0)
10876 		goto out_free_cpumask;
10877 	/* Used for event triggers */
10878 	ret = -ENOMEM;
10879 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10880 	if (!temp_buffer)
10881 		goto out_rm_hp_state;
10882 
10883 	if (trace_create_savedcmd() < 0)
10884 		goto out_free_temp_buffer;
10885 
10886 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10887 		goto out_free_savedcmd;
10888 
10889 	/* TODO: make the number of buffers hot pluggable with CPUS */
10890 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10891 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10892 		goto out_free_pipe_cpumask;
10893 	}
10894 	if (global_trace.buffer_disabled)
10895 		tracing_off();
10896 
10897 	if (trace_boot_clock) {
10898 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10899 		if (ret < 0)
10900 			pr_warn("Trace clock %s not defined, going back to default\n",
10901 				trace_boot_clock);
10902 	}
10903 
10904 	/*
10905 	 * register_tracer() might reference current_trace, so it
10906 	 * needs to be set before we register anything. This is
10907 	 * just a bootstrap of current_trace anyway.
10908 	 */
10909 	global_trace.current_trace = &nop_trace;
10910 
10911 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10912 #ifdef CONFIG_TRACER_MAX_TRACE
10913 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10914 #endif
10915 	ftrace_init_global_array_ops(&global_trace);
10916 
10917 #ifdef CONFIG_MODULES
10918 	INIT_LIST_HEAD(&global_trace.mod_events);
10919 #endif
10920 
10921 	init_trace_flags_index(&global_trace);
10922 
10923 	register_tracer(&nop_trace);
10924 
10925 	/* Function tracing may start here (via kernel command line) */
10926 	init_function_trace();
10927 
10928 	/* All seems OK, enable tracing */
10929 	tracing_disabled = 0;
10930 
10931 	atomic_notifier_chain_register(&panic_notifier_list,
10932 				       &trace_panic_notifier);
10933 
10934 	register_die_notifier(&trace_die_notifier);
10935 
10936 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10937 
10938 	INIT_LIST_HEAD(&global_trace.systems);
10939 	INIT_LIST_HEAD(&global_trace.events);
10940 	INIT_LIST_HEAD(&global_trace.hist_vars);
10941 	INIT_LIST_HEAD(&global_trace.err_log);
10942 	list_add(&global_trace.list, &ftrace_trace_arrays);
10943 
10944 	apply_trace_boot_options();
10945 
10946 	register_snapshot_cmd();
10947 
10948 	return 0;
10949 
10950 out_free_pipe_cpumask:
10951 	free_cpumask_var(global_trace.pipe_cpumask);
10952 out_free_savedcmd:
10953 	trace_free_saved_cmdlines_buffer();
10954 out_free_temp_buffer:
10955 	ring_buffer_free(temp_buffer);
10956 out_rm_hp_state:
10957 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10958 out_free_cpumask:
10959 	free_cpumask_var(global_trace.tracing_cpumask);
10960 out_free_buffer_mask:
10961 	free_cpumask_var(tracing_buffer_mask);
10962 out:
10963 	return ret;
10964 }
10965 
10966 #ifdef CONFIG_FUNCTION_TRACER
10967 /* Used to set module cached ftrace filtering at boot up */
10968 __init struct trace_array *trace_get_global_array(void)
10969 {
10970 	return &global_trace;
10971 }
10972 #endif
10973 
10974 void __init ftrace_boot_snapshot(void)
10975 {
10976 #ifdef CONFIG_TRACER_MAX_TRACE
10977 	struct trace_array *tr;
10978 
10979 	if (!snapshot_at_boot)
10980 		return;
10981 
10982 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10983 		if (!tr->allocated_snapshot)
10984 			continue;
10985 
10986 		tracing_snapshot_instance(tr);
10987 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10988 	}
10989 #endif
10990 }
10991 
10992 void __init early_trace_init(void)
10993 {
10994 	if (tracepoint_printk) {
10995 		tracepoint_print_iter =
10996 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10997 		if (MEM_FAIL(!tracepoint_print_iter,
10998 			     "Failed to allocate trace iterator\n"))
10999 			tracepoint_printk = 0;
11000 		else
11001 			static_key_enable(&tracepoint_printk_key.key);
11002 	}
11003 	tracer_alloc_buffers();
11004 
11005 	init_events();
11006 }
11007 
11008 void __init trace_init(void)
11009 {
11010 	trace_event_init();
11011 
11012 	if (boot_instance_index)
11013 		enable_instances();
11014 }
11015 
11016 __init static void clear_boot_tracer(void)
11017 {
11018 	/*
11019 	 * The default tracer at boot buffer is an init section.
11020 	 * This function is called in lateinit. If we did not
11021 	 * find the boot tracer, then clear it out, to prevent
11022 	 * later registration from accessing the buffer that is
11023 	 * about to be freed.
11024 	 */
11025 	if (!default_bootup_tracer)
11026 		return;
11027 
11028 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11029 	       default_bootup_tracer);
11030 	default_bootup_tracer = NULL;
11031 }
11032 
11033 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11034 __init static void tracing_set_default_clock(void)
11035 {
11036 	/* sched_clock_stable() is determined in late_initcall */
11037 	if (!trace_boot_clock && !sched_clock_stable()) {
11038 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11039 			pr_warn("Can not set tracing clock due to lockdown\n");
11040 			return;
11041 		}
11042 
11043 		printk(KERN_WARNING
11044 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11045 		       "If you want to keep using the local clock, then add:\n"
11046 		       "  \"trace_clock=local\"\n"
11047 		       "on the kernel command line\n");
11048 		tracing_set_clock(&global_trace, "global");
11049 	}
11050 }
11051 #else
11052 static inline void tracing_set_default_clock(void) { }
11053 #endif
11054 
11055 __init static int late_trace_init(void)
11056 {
11057 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11058 		static_key_disable(&tracepoint_printk_key.key);
11059 		tracepoint_printk = 0;
11060 	}
11061 
11062 	if (traceoff_after_boot)
11063 		tracing_off();
11064 
11065 	tracing_set_default_clock();
11066 	clear_boot_tracer();
11067 	return 0;
11068 }
11069 
11070 late_initcall_sync(late_trace_init);
11071