xref: /linux-6.15/kernel/trace/trace.c (revision 394f3f02)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
6  * Copyright (C) 2008 Ingo Molnar <[email protected]>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <[email protected]>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 
55 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
56 
57 #include "trace.h"
58 #include "trace_output.h"
59 
60 #ifdef CONFIG_FTRACE_STARTUP_TEST
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 void __init disable_tracing_selftest(const char *reason)
77 {
78 	if (!tracing_selftest_disabled) {
79 		tracing_selftest_disabled = true;
80 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
81 	}
82 }
83 #else
84 #define tracing_selftest_running	0
85 #define tracing_selftest_disabled	0
86 #endif
87 
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
93 
94 /* For tracers that don't implement custom flags */
95 static struct tracer_opt dummy_tracer_opt[] = {
96 	{ }
97 };
98 
99 static int
100 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
101 {
102 	return 0;
103 }
104 
105 /*
106  * To prevent the comm cache from being overwritten when no
107  * tracing is active, only save the comm when a trace event
108  * occurred.
109  */
110 DEFINE_PER_CPU(bool, trace_taskinfo_save);
111 
112 /*
113  * Kill all tracing for good (never come back).
114  * It is initialized to 1 but will turn to zero if the initialization
115  * of the tracer is successful. But that is the only place that sets
116  * this back to zero.
117  */
118 static int tracing_disabled = 1;
119 
120 cpumask_var_t __read_mostly	tracing_buffer_mask;
121 
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputing it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  * Set instance name if you want to dump the specific trace instance
137  * Multiple instance dump is also supported, and instances are seperated
138  * by commas.
139  */
140 /* Set to string format zero to disable by default */
141 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145 
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149 	struct module			*mod;
150 	unsigned long			length;
151 };
152 
153 union trace_eval_map_item;
154 
155 struct trace_eval_map_tail {
156 	/*
157 	 * "end" is first and points to NULL as it must be different
158 	 * than "mod" or "eval_string"
159 	 */
160 	union trace_eval_map_item	*next;
161 	const char			*end;	/* points to NULL */
162 };
163 
164 static DEFINE_MUTEX(trace_eval_mutex);
165 
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174 	struct trace_eval_map		map;
175 	struct trace_eval_map_head	head;
176 	struct trace_eval_map_tail	tail;
177 };
178 
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181 
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 				   struct trace_buffer *buffer,
185 				   unsigned int trace_ctx);
186 
187 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
188 static char *default_bootup_tracer;
189 
190 static bool allocate_snapshot;
191 static bool snapshot_at_boot;
192 
193 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_instance_index;
195 
196 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
197 static int boot_snapshot_index;
198 
199 static int __init set_cmdline_ftrace(char *str)
200 {
201 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
202 	default_bootup_tracer = bootup_tracer_buf;
203 	/* We are using ftrace early, expand it */
204 	trace_set_ring_buffer_expanded(NULL);
205 	return 1;
206 }
207 __setup("ftrace=", set_cmdline_ftrace);
208 
209 int ftrace_dump_on_oops_enabled(void)
210 {
211 	if (!strcmp("0", ftrace_dump_on_oops))
212 		return 0;
213 	else
214 		return 1;
215 }
216 
217 static int __init set_ftrace_dump_on_oops(char *str)
218 {
219 	if (!*str) {
220 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
221 		return 1;
222 	}
223 
224 	if (*str == ',') {
225 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
226 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
227 		return 1;
228 	}
229 
230 	if (*str++ == '=') {
231 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
232 		return 1;
233 	}
234 
235 	return 0;
236 }
237 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
238 
239 static int __init stop_trace_on_warning(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		__disable_trace_on_warning = 1;
243 	return 1;
244 }
245 __setup("traceoff_on_warning", stop_trace_on_warning);
246 
247 static int __init boot_alloc_snapshot(char *str)
248 {
249 	char *slot = boot_snapshot_info + boot_snapshot_index;
250 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
251 	int ret;
252 
253 	if (str[0] == '=') {
254 		str++;
255 		if (strlen(str) >= left)
256 			return -1;
257 
258 		ret = snprintf(slot, left, "%s\t", str);
259 		boot_snapshot_index += ret;
260 	} else {
261 		allocate_snapshot = true;
262 		/* We also need the main ring buffer expanded */
263 		trace_set_ring_buffer_expanded(NULL);
264 	}
265 	return 1;
266 }
267 __setup("alloc_snapshot", boot_alloc_snapshot);
268 
269 
270 static int __init boot_snapshot(char *str)
271 {
272 	snapshot_at_boot = true;
273 	boot_alloc_snapshot(str);
274 	return 1;
275 }
276 __setup("ftrace_boot_snapshot", boot_snapshot);
277 
278 
279 static int __init boot_instance(char *str)
280 {
281 	char *slot = boot_instance_info + boot_instance_index;
282 	int left = sizeof(boot_instance_info) - boot_instance_index;
283 	int ret;
284 
285 	if (strlen(str) >= left)
286 		return -1;
287 
288 	ret = snprintf(slot, left, "%s\t", str);
289 	boot_instance_index += ret;
290 
291 	return 1;
292 }
293 __setup("trace_instance=", boot_instance);
294 
295 
296 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
297 
298 static int __init set_trace_boot_options(char *str)
299 {
300 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
301 	return 1;
302 }
303 __setup("trace_options=", set_trace_boot_options);
304 
305 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
306 static char *trace_boot_clock __initdata;
307 
308 static int __init set_trace_boot_clock(char *str)
309 {
310 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
311 	trace_boot_clock = trace_boot_clock_buf;
312 	return 1;
313 }
314 __setup("trace_clock=", set_trace_boot_clock);
315 
316 static int __init set_tracepoint_printk(char *str)
317 {
318 	/* Ignore the "tp_printk_stop_on_boot" param */
319 	if (*str == '_')
320 		return 0;
321 
322 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
323 		tracepoint_printk = 1;
324 	return 1;
325 }
326 __setup("tp_printk", set_tracepoint_printk);
327 
328 static int __init set_tracepoint_printk_stop(char *str)
329 {
330 	tracepoint_printk_stop_on_boot = true;
331 	return 1;
332 }
333 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
334 
335 unsigned long long ns2usecs(u64 nsec)
336 {
337 	nsec += 500;
338 	do_div(nsec, 1000);
339 	return nsec;
340 }
341 
342 static void
343 trace_process_export(struct trace_export *export,
344 	       struct ring_buffer_event *event, int flag)
345 {
346 	struct trace_entry *entry;
347 	unsigned int size = 0;
348 
349 	if (export->flags & flag) {
350 		entry = ring_buffer_event_data(event);
351 		size = ring_buffer_event_length(event);
352 		export->write(export, entry, size);
353 	}
354 }
355 
356 static DEFINE_MUTEX(ftrace_export_lock);
357 
358 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
359 
360 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
361 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
362 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
363 
364 static inline void ftrace_exports_enable(struct trace_export *export)
365 {
366 	if (export->flags & TRACE_EXPORT_FUNCTION)
367 		static_branch_inc(&trace_function_exports_enabled);
368 
369 	if (export->flags & TRACE_EXPORT_EVENT)
370 		static_branch_inc(&trace_event_exports_enabled);
371 
372 	if (export->flags & TRACE_EXPORT_MARKER)
373 		static_branch_inc(&trace_marker_exports_enabled);
374 }
375 
376 static inline void ftrace_exports_disable(struct trace_export *export)
377 {
378 	if (export->flags & TRACE_EXPORT_FUNCTION)
379 		static_branch_dec(&trace_function_exports_enabled);
380 
381 	if (export->flags & TRACE_EXPORT_EVENT)
382 		static_branch_dec(&trace_event_exports_enabled);
383 
384 	if (export->flags & TRACE_EXPORT_MARKER)
385 		static_branch_dec(&trace_marker_exports_enabled);
386 }
387 
388 static void ftrace_exports(struct ring_buffer_event *event, int flag)
389 {
390 	struct trace_export *export;
391 
392 	preempt_disable_notrace();
393 
394 	export = rcu_dereference_raw_check(ftrace_exports_list);
395 	while (export) {
396 		trace_process_export(export, event, flag);
397 		export = rcu_dereference_raw_check(export->next);
398 	}
399 
400 	preempt_enable_notrace();
401 }
402 
403 static inline void
404 add_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406 	rcu_assign_pointer(export->next, *list);
407 	/*
408 	 * We are entering export into the list but another
409 	 * CPU might be walking that list. We need to make sure
410 	 * the export->next pointer is valid before another CPU sees
411 	 * the export pointer included into the list.
412 	 */
413 	rcu_assign_pointer(*list, export);
414 }
415 
416 static inline int
417 rm_trace_export(struct trace_export **list, struct trace_export *export)
418 {
419 	struct trace_export **p;
420 
421 	for (p = list; *p != NULL; p = &(*p)->next)
422 		if (*p == export)
423 			break;
424 
425 	if (*p != export)
426 		return -1;
427 
428 	rcu_assign_pointer(*p, (*p)->next);
429 
430 	return 0;
431 }
432 
433 static inline void
434 add_ftrace_export(struct trace_export **list, struct trace_export *export)
435 {
436 	ftrace_exports_enable(export);
437 
438 	add_trace_export(list, export);
439 }
440 
441 static inline int
442 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
443 {
444 	int ret;
445 
446 	ret = rm_trace_export(list, export);
447 	ftrace_exports_disable(export);
448 
449 	return ret;
450 }
451 
452 int register_ftrace_export(struct trace_export *export)
453 {
454 	if (WARN_ON_ONCE(!export->write))
455 		return -1;
456 
457 	mutex_lock(&ftrace_export_lock);
458 
459 	add_ftrace_export(&ftrace_exports_list, export);
460 
461 	mutex_unlock(&ftrace_export_lock);
462 
463 	return 0;
464 }
465 EXPORT_SYMBOL_GPL(register_ftrace_export);
466 
467 int unregister_ftrace_export(struct trace_export *export)
468 {
469 	int ret;
470 
471 	mutex_lock(&ftrace_export_lock);
472 
473 	ret = rm_ftrace_export(&ftrace_exports_list, export);
474 
475 	mutex_unlock(&ftrace_export_lock);
476 
477 	return ret;
478 }
479 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
480 
481 /* trace_flags holds trace_options default values */
482 #define TRACE_DEFAULT_FLAGS						\
483 	(FUNCTION_DEFAULT_FLAGS |					\
484 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
485 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
486 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
487 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
488 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
489 
490 /* trace_options that are only supported by global_trace */
491 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
492 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
493 
494 /* trace_flags that are default zero for instances */
495 #define ZEROED_TRACE_FLAGS \
496 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
497 
498 /*
499  * The global_trace is the descriptor that holds the top-level tracing
500  * buffers for the live tracing.
501  */
502 static struct trace_array global_trace = {
503 	.trace_flags = TRACE_DEFAULT_FLAGS,
504 };
505 
506 static struct trace_array *printk_trace = &global_trace;
507 
508 static __always_inline bool printk_binsafe(struct trace_array *tr)
509 {
510 	/*
511 	 * The binary format of traceprintk can cause a crash if used
512 	 * by a buffer from another boot. Force the use of the
513 	 * non binary version of trace_printk if the trace_printk
514 	 * buffer is a boot mapped ring buffer.
515 	 */
516 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
517 }
518 
519 static void update_printk_trace(struct trace_array *tr)
520 {
521 	if (printk_trace == tr)
522 		return;
523 
524 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
525 	printk_trace = tr;
526 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
527 }
528 
529 void trace_set_ring_buffer_expanded(struct trace_array *tr)
530 {
531 	if (!tr)
532 		tr = &global_trace;
533 	tr->ring_buffer_expanded = true;
534 }
535 
536 LIST_HEAD(ftrace_trace_arrays);
537 
538 int trace_array_get(struct trace_array *this_tr)
539 {
540 	struct trace_array *tr;
541 
542 	guard(mutex)(&trace_types_lock);
543 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
544 		if (tr == this_tr) {
545 			tr->ref++;
546 			return 0;
547 		}
548 	}
549 
550 	return -ENODEV;
551 }
552 
553 static void __trace_array_put(struct trace_array *this_tr)
554 {
555 	WARN_ON(!this_tr->ref);
556 	this_tr->ref--;
557 }
558 
559 /**
560  * trace_array_put - Decrement the reference counter for this trace array.
561  * @this_tr : pointer to the trace array
562  *
563  * NOTE: Use this when we no longer need the trace array returned by
564  * trace_array_get_by_name(). This ensures the trace array can be later
565  * destroyed.
566  *
567  */
568 void trace_array_put(struct trace_array *this_tr)
569 {
570 	if (!this_tr)
571 		return;
572 
573 	mutex_lock(&trace_types_lock);
574 	__trace_array_put(this_tr);
575 	mutex_unlock(&trace_types_lock);
576 }
577 EXPORT_SYMBOL_GPL(trace_array_put);
578 
579 int tracing_check_open_get_tr(struct trace_array *tr)
580 {
581 	int ret;
582 
583 	ret = security_locked_down(LOCKDOWN_TRACEFS);
584 	if (ret)
585 		return ret;
586 
587 	if (tracing_disabled)
588 		return -ENODEV;
589 
590 	if (tr && trace_array_get(tr) < 0)
591 		return -ENODEV;
592 
593 	return 0;
594 }
595 
596 /**
597  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
598  * @filtered_pids: The list of pids to check
599  * @search_pid: The PID to find in @filtered_pids
600  *
601  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
602  */
603 bool
604 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
605 {
606 	return trace_pid_list_is_set(filtered_pids, search_pid);
607 }
608 
609 /**
610  * trace_ignore_this_task - should a task be ignored for tracing
611  * @filtered_pids: The list of pids to check
612  * @filtered_no_pids: The list of pids not to be traced
613  * @task: The task that should be ignored if not filtered
614  *
615  * Checks if @task should be traced or not from @filtered_pids.
616  * Returns true if @task should *NOT* be traced.
617  * Returns false if @task should be traced.
618  */
619 bool
620 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
621 		       struct trace_pid_list *filtered_no_pids,
622 		       struct task_struct *task)
623 {
624 	/*
625 	 * If filtered_no_pids is not empty, and the task's pid is listed
626 	 * in filtered_no_pids, then return true.
627 	 * Otherwise, if filtered_pids is empty, that means we can
628 	 * trace all tasks. If it has content, then only trace pids
629 	 * within filtered_pids.
630 	 */
631 
632 	return (filtered_pids &&
633 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
634 		(filtered_no_pids &&
635 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
636 }
637 
638 /**
639  * trace_filter_add_remove_task - Add or remove a task from a pid_list
640  * @pid_list: The list to modify
641  * @self: The current task for fork or NULL for exit
642  * @task: The task to add or remove
643  *
644  * If adding a task, if @self is defined, the task is only added if @self
645  * is also included in @pid_list. This happens on fork and tasks should
646  * only be added when the parent is listed. If @self is NULL, then the
647  * @task pid will be removed from the list, which would happen on exit
648  * of a task.
649  */
650 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
651 				  struct task_struct *self,
652 				  struct task_struct *task)
653 {
654 	if (!pid_list)
655 		return;
656 
657 	/* For forks, we only add if the forking task is listed */
658 	if (self) {
659 		if (!trace_find_filtered_pid(pid_list, self->pid))
660 			return;
661 	}
662 
663 	/* "self" is set for forks, and NULL for exits */
664 	if (self)
665 		trace_pid_list_set(pid_list, task->pid);
666 	else
667 		trace_pid_list_clear(pid_list, task->pid);
668 }
669 
670 /**
671  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
672  * @pid_list: The pid list to show
673  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
674  * @pos: The position of the file
675  *
676  * This is used by the seq_file "next" operation to iterate the pids
677  * listed in a trace_pid_list structure.
678  *
679  * Returns the pid+1 as we want to display pid of zero, but NULL would
680  * stop the iteration.
681  */
682 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
683 {
684 	long pid = (unsigned long)v;
685 	unsigned int next;
686 
687 	(*pos)++;
688 
689 	/* pid already is +1 of the actual previous bit */
690 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
691 		return NULL;
692 
693 	pid = next;
694 
695 	/* Return pid + 1 to allow zero to be represented */
696 	return (void *)(pid + 1);
697 }
698 
699 /**
700  * trace_pid_start - Used for seq_file to start reading pid lists
701  * @pid_list: The pid list to show
702  * @pos: The position of the file
703  *
704  * This is used by seq_file "start" operation to start the iteration
705  * of listing pids.
706  *
707  * Returns the pid+1 as we want to display pid of zero, but NULL would
708  * stop the iteration.
709  */
710 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
711 {
712 	unsigned long pid;
713 	unsigned int first;
714 	loff_t l = 0;
715 
716 	if (trace_pid_list_first(pid_list, &first) < 0)
717 		return NULL;
718 
719 	pid = first;
720 
721 	/* Return pid + 1 so that zero can be the exit value */
722 	for (pid++; pid && l < *pos;
723 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
724 		;
725 	return (void *)pid;
726 }
727 
728 /**
729  * trace_pid_show - show the current pid in seq_file processing
730  * @m: The seq_file structure to write into
731  * @v: A void pointer of the pid (+1) value to display
732  *
733  * Can be directly used by seq_file operations to display the current
734  * pid value.
735  */
736 int trace_pid_show(struct seq_file *m, void *v)
737 {
738 	unsigned long pid = (unsigned long)v - 1;
739 
740 	seq_printf(m, "%lu\n", pid);
741 	return 0;
742 }
743 
744 /* 128 should be much more than enough */
745 #define PID_BUF_SIZE		127
746 
747 int trace_pid_write(struct trace_pid_list *filtered_pids,
748 		    struct trace_pid_list **new_pid_list,
749 		    const char __user *ubuf, size_t cnt)
750 {
751 	struct trace_pid_list *pid_list;
752 	struct trace_parser parser;
753 	unsigned long val;
754 	int nr_pids = 0;
755 	ssize_t read = 0;
756 	ssize_t ret;
757 	loff_t pos;
758 	pid_t pid;
759 
760 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
761 		return -ENOMEM;
762 
763 	/*
764 	 * Always recreate a new array. The write is an all or nothing
765 	 * operation. Always create a new array when adding new pids by
766 	 * the user. If the operation fails, then the current list is
767 	 * not modified.
768 	 */
769 	pid_list = trace_pid_list_alloc();
770 	if (!pid_list) {
771 		trace_parser_put(&parser);
772 		return -ENOMEM;
773 	}
774 
775 	if (filtered_pids) {
776 		/* copy the current bits to the new max */
777 		ret = trace_pid_list_first(filtered_pids, &pid);
778 		while (!ret) {
779 			trace_pid_list_set(pid_list, pid);
780 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
781 			nr_pids++;
782 		}
783 	}
784 
785 	ret = 0;
786 	while (cnt > 0) {
787 
788 		pos = 0;
789 
790 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
791 		if (ret < 0)
792 			break;
793 
794 		read += ret;
795 		ubuf += ret;
796 		cnt -= ret;
797 
798 		if (!trace_parser_loaded(&parser))
799 			break;
800 
801 		ret = -EINVAL;
802 		if (kstrtoul(parser.buffer, 0, &val))
803 			break;
804 
805 		pid = (pid_t)val;
806 
807 		if (trace_pid_list_set(pid_list, pid) < 0) {
808 			ret = -1;
809 			break;
810 		}
811 		nr_pids++;
812 
813 		trace_parser_clear(&parser);
814 		ret = 0;
815 	}
816 	trace_parser_put(&parser);
817 
818 	if (ret < 0) {
819 		trace_pid_list_free(pid_list);
820 		return ret;
821 	}
822 
823 	if (!nr_pids) {
824 		/* Cleared the list of pids */
825 		trace_pid_list_free(pid_list);
826 		pid_list = NULL;
827 	}
828 
829 	*new_pid_list = pid_list;
830 
831 	return read;
832 }
833 
834 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
835 {
836 	u64 ts;
837 
838 	/* Early boot up does not have a buffer yet */
839 	if (!buf->buffer)
840 		return trace_clock_local();
841 
842 	ts = ring_buffer_time_stamp(buf->buffer);
843 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
844 
845 	return ts;
846 }
847 
848 u64 ftrace_now(int cpu)
849 {
850 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
851 }
852 
853 /**
854  * tracing_is_enabled - Show if global_trace has been enabled
855  *
856  * Shows if the global trace has been enabled or not. It uses the
857  * mirror flag "buffer_disabled" to be used in fast paths such as for
858  * the irqsoff tracer. But it may be inaccurate due to races. If you
859  * need to know the accurate state, use tracing_is_on() which is a little
860  * slower, but accurate.
861  */
862 int tracing_is_enabled(void)
863 {
864 	/*
865 	 * For quick access (irqsoff uses this in fast path), just
866 	 * return the mirror variable of the state of the ring buffer.
867 	 * It's a little racy, but we don't really care.
868 	 */
869 	smp_rmb();
870 	return !global_trace.buffer_disabled;
871 }
872 
873 /*
874  * trace_buf_size is the size in bytes that is allocated
875  * for a buffer. Note, the number of bytes is always rounded
876  * to page size.
877  *
878  * This number is purposely set to a low number of 16384.
879  * If the dump on oops happens, it will be much appreciated
880  * to not have to wait for all that output. Anyway this can be
881  * boot time and run time configurable.
882  */
883 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
884 
885 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
886 
887 /* trace_types holds a link list of available tracers. */
888 static struct tracer		*trace_types __read_mostly;
889 
890 /*
891  * trace_types_lock is used to protect the trace_types list.
892  */
893 DEFINE_MUTEX(trace_types_lock);
894 
895 /*
896  * serialize the access of the ring buffer
897  *
898  * ring buffer serializes readers, but it is low level protection.
899  * The validity of the events (which returns by ring_buffer_peek() ..etc)
900  * are not protected by ring buffer.
901  *
902  * The content of events may become garbage if we allow other process consumes
903  * these events concurrently:
904  *   A) the page of the consumed events may become a normal page
905  *      (not reader page) in ring buffer, and this page will be rewritten
906  *      by events producer.
907  *   B) The page of the consumed events may become a page for splice_read,
908  *      and this page will be returned to system.
909  *
910  * These primitives allow multi process access to different cpu ring buffer
911  * concurrently.
912  *
913  * These primitives don't distinguish read-only and read-consume access.
914  * Multi read-only access are also serialized.
915  */
916 
917 #ifdef CONFIG_SMP
918 static DECLARE_RWSEM(all_cpu_access_lock);
919 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
920 
921 static inline void trace_access_lock(int cpu)
922 {
923 	if (cpu == RING_BUFFER_ALL_CPUS) {
924 		/* gain it for accessing the whole ring buffer. */
925 		down_write(&all_cpu_access_lock);
926 	} else {
927 		/* gain it for accessing a cpu ring buffer. */
928 
929 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
930 		down_read(&all_cpu_access_lock);
931 
932 		/* Secondly block other access to this @cpu ring buffer. */
933 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
934 	}
935 }
936 
937 static inline void trace_access_unlock(int cpu)
938 {
939 	if (cpu == RING_BUFFER_ALL_CPUS) {
940 		up_write(&all_cpu_access_lock);
941 	} else {
942 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
943 		up_read(&all_cpu_access_lock);
944 	}
945 }
946 
947 static inline void trace_access_lock_init(void)
948 {
949 	int cpu;
950 
951 	for_each_possible_cpu(cpu)
952 		mutex_init(&per_cpu(cpu_access_lock, cpu));
953 }
954 
955 #else
956 
957 static DEFINE_MUTEX(access_lock);
958 
959 static inline void trace_access_lock(int cpu)
960 {
961 	(void)cpu;
962 	mutex_lock(&access_lock);
963 }
964 
965 static inline void trace_access_unlock(int cpu)
966 {
967 	(void)cpu;
968 	mutex_unlock(&access_lock);
969 }
970 
971 static inline void trace_access_lock_init(void)
972 {
973 }
974 
975 #endif
976 
977 #ifdef CONFIG_STACKTRACE
978 static void __ftrace_trace_stack(struct trace_array *tr,
979 				 struct trace_buffer *buffer,
980 				 unsigned int trace_ctx,
981 				 int skip, struct pt_regs *regs);
982 static inline void ftrace_trace_stack(struct trace_array *tr,
983 				      struct trace_buffer *buffer,
984 				      unsigned int trace_ctx,
985 				      int skip, struct pt_regs *regs);
986 
987 #else
988 static inline void __ftrace_trace_stack(struct trace_array *tr,
989 					struct trace_buffer *buffer,
990 					unsigned int trace_ctx,
991 					int skip, struct pt_regs *regs)
992 {
993 }
994 static inline void ftrace_trace_stack(struct trace_array *tr,
995 				      struct trace_buffer *buffer,
996 				      unsigned long trace_ctx,
997 				      int skip, struct pt_regs *regs)
998 {
999 }
1000 
1001 #endif
1002 
1003 static __always_inline void
1004 trace_event_setup(struct ring_buffer_event *event,
1005 		  int type, unsigned int trace_ctx)
1006 {
1007 	struct trace_entry *ent = ring_buffer_event_data(event);
1008 
1009 	tracing_generic_entry_update(ent, type, trace_ctx);
1010 }
1011 
1012 static __always_inline struct ring_buffer_event *
1013 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1014 			  int type,
1015 			  unsigned long len,
1016 			  unsigned int trace_ctx)
1017 {
1018 	struct ring_buffer_event *event;
1019 
1020 	event = ring_buffer_lock_reserve(buffer, len);
1021 	if (event != NULL)
1022 		trace_event_setup(event, type, trace_ctx);
1023 
1024 	return event;
1025 }
1026 
1027 void tracer_tracing_on(struct trace_array *tr)
1028 {
1029 	if (tr->array_buffer.buffer)
1030 		ring_buffer_record_on(tr->array_buffer.buffer);
1031 	/*
1032 	 * This flag is looked at when buffers haven't been allocated
1033 	 * yet, or by some tracers (like irqsoff), that just want to
1034 	 * know if the ring buffer has been disabled, but it can handle
1035 	 * races of where it gets disabled but we still do a record.
1036 	 * As the check is in the fast path of the tracers, it is more
1037 	 * important to be fast than accurate.
1038 	 */
1039 	tr->buffer_disabled = 0;
1040 	/* Make the flag seen by readers */
1041 	smp_wmb();
1042 }
1043 
1044 /**
1045  * tracing_on - enable tracing buffers
1046  *
1047  * This function enables tracing buffers that may have been
1048  * disabled with tracing_off.
1049  */
1050 void tracing_on(void)
1051 {
1052 	tracer_tracing_on(&global_trace);
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_on);
1055 
1056 
1057 static __always_inline void
1058 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1059 {
1060 	__this_cpu_write(trace_taskinfo_save, true);
1061 
1062 	/* If this is the temp buffer, we need to commit fully */
1063 	if (this_cpu_read(trace_buffered_event) == event) {
1064 		/* Length is in event->array[0] */
1065 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1066 		/* Release the temp buffer */
1067 		this_cpu_dec(trace_buffered_event_cnt);
1068 		/* ring_buffer_unlock_commit() enables preemption */
1069 		preempt_enable_notrace();
1070 	} else
1071 		ring_buffer_unlock_commit(buffer);
1072 }
1073 
1074 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1075 		       const char *str, int size)
1076 {
1077 	struct ring_buffer_event *event;
1078 	struct trace_buffer *buffer;
1079 	struct print_entry *entry;
1080 	unsigned int trace_ctx;
1081 	int alloc;
1082 
1083 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1084 		return 0;
1085 
1086 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1087 		return 0;
1088 
1089 	if (unlikely(tracing_disabled))
1090 		return 0;
1091 
1092 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1093 
1094 	trace_ctx = tracing_gen_ctx();
1095 	buffer = tr->array_buffer.buffer;
1096 	ring_buffer_nest_start(buffer);
1097 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1098 					    trace_ctx);
1099 	if (!event) {
1100 		size = 0;
1101 		goto out;
1102 	}
1103 
1104 	entry = ring_buffer_event_data(event);
1105 	entry->ip = ip;
1106 
1107 	memcpy(&entry->buf, str, size);
1108 
1109 	/* Add a newline if necessary */
1110 	if (entry->buf[size - 1] != '\n') {
1111 		entry->buf[size] = '\n';
1112 		entry->buf[size + 1] = '\0';
1113 	} else
1114 		entry->buf[size] = '\0';
1115 
1116 	__buffer_unlock_commit(buffer, event);
1117 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1118  out:
1119 	ring_buffer_nest_end(buffer);
1120 	return size;
1121 }
1122 EXPORT_SYMBOL_GPL(__trace_array_puts);
1123 
1124 /**
1125  * __trace_puts - write a constant string into the trace buffer.
1126  * @ip:	   The address of the caller
1127  * @str:   The constant string to write
1128  * @size:  The size of the string.
1129  */
1130 int __trace_puts(unsigned long ip, const char *str, int size)
1131 {
1132 	return __trace_array_puts(printk_trace, ip, str, size);
1133 }
1134 EXPORT_SYMBOL_GPL(__trace_puts);
1135 
1136 /**
1137  * __trace_bputs - write the pointer to a constant string into trace buffer
1138  * @ip:	   The address of the caller
1139  * @str:   The constant string to write to the buffer to
1140  */
1141 int __trace_bputs(unsigned long ip, const char *str)
1142 {
1143 	struct trace_array *tr = READ_ONCE(printk_trace);
1144 	struct ring_buffer_event *event;
1145 	struct trace_buffer *buffer;
1146 	struct bputs_entry *entry;
1147 	unsigned int trace_ctx;
1148 	int size = sizeof(struct bputs_entry);
1149 	int ret = 0;
1150 
1151 	if (!printk_binsafe(tr))
1152 		return __trace_puts(ip, str, strlen(str));
1153 
1154 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1155 		return 0;
1156 
1157 	if (unlikely(tracing_selftest_running || tracing_disabled))
1158 		return 0;
1159 
1160 	trace_ctx = tracing_gen_ctx();
1161 	buffer = tr->array_buffer.buffer;
1162 
1163 	ring_buffer_nest_start(buffer);
1164 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1165 					    trace_ctx);
1166 	if (!event)
1167 		goto out;
1168 
1169 	entry = ring_buffer_event_data(event);
1170 	entry->ip			= ip;
1171 	entry->str			= str;
1172 
1173 	__buffer_unlock_commit(buffer, event);
1174 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1175 
1176 	ret = 1;
1177  out:
1178 	ring_buffer_nest_end(buffer);
1179 	return ret;
1180 }
1181 EXPORT_SYMBOL_GPL(__trace_bputs);
1182 
1183 #ifdef CONFIG_TRACER_SNAPSHOT
1184 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1185 					   void *cond_data)
1186 {
1187 	struct tracer *tracer = tr->current_trace;
1188 	unsigned long flags;
1189 
1190 	if (in_nmi()) {
1191 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1192 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1193 		return;
1194 	}
1195 
1196 	if (!tr->allocated_snapshot) {
1197 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1198 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1199 		tracer_tracing_off(tr);
1200 		return;
1201 	}
1202 
1203 	/* Note, snapshot can not be used when the tracer uses it */
1204 	if (tracer->use_max_tr) {
1205 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1206 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1207 		return;
1208 	}
1209 
1210 	if (tr->mapped) {
1211 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1212 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1213 		return;
1214 	}
1215 
1216 	local_irq_save(flags);
1217 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1218 	local_irq_restore(flags);
1219 }
1220 
1221 void tracing_snapshot_instance(struct trace_array *tr)
1222 {
1223 	tracing_snapshot_instance_cond(tr, NULL);
1224 }
1225 
1226 /**
1227  * tracing_snapshot - take a snapshot of the current buffer.
1228  *
1229  * This causes a swap between the snapshot buffer and the current live
1230  * tracing buffer. You can use this to take snapshots of the live
1231  * trace when some condition is triggered, but continue to trace.
1232  *
1233  * Note, make sure to allocate the snapshot with either
1234  * a tracing_snapshot_alloc(), or by doing it manually
1235  * with: echo 1 > /sys/kernel/tracing/snapshot
1236  *
1237  * If the snapshot buffer is not allocated, it will stop tracing.
1238  * Basically making a permanent snapshot.
1239  */
1240 void tracing_snapshot(void)
1241 {
1242 	struct trace_array *tr = &global_trace;
1243 
1244 	tracing_snapshot_instance(tr);
1245 }
1246 EXPORT_SYMBOL_GPL(tracing_snapshot);
1247 
1248 /**
1249  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1250  * @tr:		The tracing instance to snapshot
1251  * @cond_data:	The data to be tested conditionally, and possibly saved
1252  *
1253  * This is the same as tracing_snapshot() except that the snapshot is
1254  * conditional - the snapshot will only happen if the
1255  * cond_snapshot.update() implementation receiving the cond_data
1256  * returns true, which means that the trace array's cond_snapshot
1257  * update() operation used the cond_data to determine whether the
1258  * snapshot should be taken, and if it was, presumably saved it along
1259  * with the snapshot.
1260  */
1261 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1262 {
1263 	tracing_snapshot_instance_cond(tr, cond_data);
1264 }
1265 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1266 
1267 /**
1268  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1269  * @tr:		The tracing instance
1270  *
1271  * When the user enables a conditional snapshot using
1272  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1273  * with the snapshot.  This accessor is used to retrieve it.
1274  *
1275  * Should not be called from cond_snapshot.update(), since it takes
1276  * the tr->max_lock lock, which the code calling
1277  * cond_snapshot.update() has already done.
1278  *
1279  * Returns the cond_data associated with the trace array's snapshot.
1280  */
1281 void *tracing_cond_snapshot_data(struct trace_array *tr)
1282 {
1283 	void *cond_data = NULL;
1284 
1285 	local_irq_disable();
1286 	arch_spin_lock(&tr->max_lock);
1287 
1288 	if (tr->cond_snapshot)
1289 		cond_data = tr->cond_snapshot->cond_data;
1290 
1291 	arch_spin_unlock(&tr->max_lock);
1292 	local_irq_enable();
1293 
1294 	return cond_data;
1295 }
1296 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1297 
1298 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1299 					struct array_buffer *size_buf, int cpu_id);
1300 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1301 
1302 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1303 {
1304 	int order;
1305 	int ret;
1306 
1307 	if (!tr->allocated_snapshot) {
1308 
1309 		/* Make the snapshot buffer have the same order as main buffer */
1310 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1311 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1312 		if (ret < 0)
1313 			return ret;
1314 
1315 		/* allocate spare buffer */
1316 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1317 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1318 		if (ret < 0)
1319 			return ret;
1320 
1321 		tr->allocated_snapshot = true;
1322 	}
1323 
1324 	return 0;
1325 }
1326 
1327 static void free_snapshot(struct trace_array *tr)
1328 {
1329 	/*
1330 	 * We don't free the ring buffer. instead, resize it because
1331 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1332 	 * we want preserve it.
1333 	 */
1334 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1335 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1336 	set_buffer_entries(&tr->max_buffer, 1);
1337 	tracing_reset_online_cpus(&tr->max_buffer);
1338 	tr->allocated_snapshot = false;
1339 }
1340 
1341 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1342 {
1343 	int ret;
1344 
1345 	lockdep_assert_held(&trace_types_lock);
1346 
1347 	spin_lock(&tr->snapshot_trigger_lock);
1348 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1349 		spin_unlock(&tr->snapshot_trigger_lock);
1350 		return -EBUSY;
1351 	}
1352 
1353 	tr->snapshot++;
1354 	spin_unlock(&tr->snapshot_trigger_lock);
1355 
1356 	ret = tracing_alloc_snapshot_instance(tr);
1357 	if (ret) {
1358 		spin_lock(&tr->snapshot_trigger_lock);
1359 		tr->snapshot--;
1360 		spin_unlock(&tr->snapshot_trigger_lock);
1361 	}
1362 
1363 	return ret;
1364 }
1365 
1366 int tracing_arm_snapshot(struct trace_array *tr)
1367 {
1368 	int ret;
1369 
1370 	mutex_lock(&trace_types_lock);
1371 	ret = tracing_arm_snapshot_locked(tr);
1372 	mutex_unlock(&trace_types_lock);
1373 
1374 	return ret;
1375 }
1376 
1377 void tracing_disarm_snapshot(struct trace_array *tr)
1378 {
1379 	spin_lock(&tr->snapshot_trigger_lock);
1380 	if (!WARN_ON(!tr->snapshot))
1381 		tr->snapshot--;
1382 	spin_unlock(&tr->snapshot_trigger_lock);
1383 }
1384 
1385 /**
1386  * tracing_alloc_snapshot - allocate snapshot buffer.
1387  *
1388  * This only allocates the snapshot buffer if it isn't already
1389  * allocated - it doesn't also take a snapshot.
1390  *
1391  * This is meant to be used in cases where the snapshot buffer needs
1392  * to be set up for events that can't sleep but need to be able to
1393  * trigger a snapshot.
1394  */
1395 int tracing_alloc_snapshot(void)
1396 {
1397 	struct trace_array *tr = &global_trace;
1398 	int ret;
1399 
1400 	ret = tracing_alloc_snapshot_instance(tr);
1401 	WARN_ON(ret < 0);
1402 
1403 	return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1406 
1407 /**
1408  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1409  *
1410  * This is similar to tracing_snapshot(), but it will allocate the
1411  * snapshot buffer if it isn't already allocated. Use this only
1412  * where it is safe to sleep, as the allocation may sleep.
1413  *
1414  * This causes a swap between the snapshot buffer and the current live
1415  * tracing buffer. You can use this to take snapshots of the live
1416  * trace when some condition is triggered, but continue to trace.
1417  */
1418 void tracing_snapshot_alloc(void)
1419 {
1420 	int ret;
1421 
1422 	ret = tracing_alloc_snapshot();
1423 	if (ret < 0)
1424 		return;
1425 
1426 	tracing_snapshot();
1427 }
1428 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1429 
1430 /**
1431  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1432  * @tr:		The tracing instance
1433  * @cond_data:	User data to associate with the snapshot
1434  * @update:	Implementation of the cond_snapshot update function
1435  *
1436  * Check whether the conditional snapshot for the given instance has
1437  * already been enabled, or if the current tracer is already using a
1438  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1439  * save the cond_data and update function inside.
1440  *
1441  * Returns 0 if successful, error otherwise.
1442  */
1443 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1444 				 cond_update_fn_t update)
1445 {
1446 	struct cond_snapshot *cond_snapshot __free(kfree) =
1447 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1448 	int ret;
1449 
1450 	if (!cond_snapshot)
1451 		return -ENOMEM;
1452 
1453 	cond_snapshot->cond_data = cond_data;
1454 	cond_snapshot->update = update;
1455 
1456 	guard(mutex)(&trace_types_lock);
1457 
1458 	if (tr->current_trace->use_max_tr)
1459 		return -EBUSY;
1460 
1461 	/*
1462 	 * The cond_snapshot can only change to NULL without the
1463 	 * trace_types_lock. We don't care if we race with it going
1464 	 * to NULL, but we want to make sure that it's not set to
1465 	 * something other than NULL when we get here, which we can
1466 	 * do safely with only holding the trace_types_lock and not
1467 	 * having to take the max_lock.
1468 	 */
1469 	if (tr->cond_snapshot)
1470 		return -EBUSY;
1471 
1472 	ret = tracing_arm_snapshot_locked(tr);
1473 	if (ret)
1474 		return ret;
1475 
1476 	local_irq_disable();
1477 	arch_spin_lock(&tr->max_lock);
1478 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1479 	arch_spin_unlock(&tr->max_lock);
1480 	local_irq_enable();
1481 
1482 	return 0;
1483 }
1484 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1485 
1486 /**
1487  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1488  * @tr:		The tracing instance
1489  *
1490  * Check whether the conditional snapshot for the given instance is
1491  * enabled; if so, free the cond_snapshot associated with it,
1492  * otherwise return -EINVAL.
1493  *
1494  * Returns 0 if successful, error otherwise.
1495  */
1496 int tracing_snapshot_cond_disable(struct trace_array *tr)
1497 {
1498 	int ret = 0;
1499 
1500 	local_irq_disable();
1501 	arch_spin_lock(&tr->max_lock);
1502 
1503 	if (!tr->cond_snapshot)
1504 		ret = -EINVAL;
1505 	else {
1506 		kfree(tr->cond_snapshot);
1507 		tr->cond_snapshot = NULL;
1508 	}
1509 
1510 	arch_spin_unlock(&tr->max_lock);
1511 	local_irq_enable();
1512 
1513 	tracing_disarm_snapshot(tr);
1514 
1515 	return ret;
1516 }
1517 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1518 #else
1519 void tracing_snapshot(void)
1520 {
1521 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1522 }
1523 EXPORT_SYMBOL_GPL(tracing_snapshot);
1524 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1525 {
1526 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1529 int tracing_alloc_snapshot(void)
1530 {
1531 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1532 	return -ENODEV;
1533 }
1534 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1535 void tracing_snapshot_alloc(void)
1536 {
1537 	/* Give warning */
1538 	tracing_snapshot();
1539 }
1540 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1541 void *tracing_cond_snapshot_data(struct trace_array *tr)
1542 {
1543 	return NULL;
1544 }
1545 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1546 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1547 {
1548 	return -ENODEV;
1549 }
1550 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1551 int tracing_snapshot_cond_disable(struct trace_array *tr)
1552 {
1553 	return false;
1554 }
1555 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1556 #define free_snapshot(tr)	do { } while (0)
1557 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1558 #endif /* CONFIG_TRACER_SNAPSHOT */
1559 
1560 void tracer_tracing_off(struct trace_array *tr)
1561 {
1562 	if (tr->array_buffer.buffer)
1563 		ring_buffer_record_off(tr->array_buffer.buffer);
1564 	/*
1565 	 * This flag is looked at when buffers haven't been allocated
1566 	 * yet, or by some tracers (like irqsoff), that just want to
1567 	 * know if the ring buffer has been disabled, but it can handle
1568 	 * races of where it gets disabled but we still do a record.
1569 	 * As the check is in the fast path of the tracers, it is more
1570 	 * important to be fast than accurate.
1571 	 */
1572 	tr->buffer_disabled = 1;
1573 	/* Make the flag seen by readers */
1574 	smp_wmb();
1575 }
1576 
1577 /**
1578  * tracing_off - turn off tracing buffers
1579  *
1580  * This function stops the tracing buffers from recording data.
1581  * It does not disable any overhead the tracers themselves may
1582  * be causing. This function simply causes all recording to
1583  * the ring buffers to fail.
1584  */
1585 void tracing_off(void)
1586 {
1587 	tracer_tracing_off(&global_trace);
1588 }
1589 EXPORT_SYMBOL_GPL(tracing_off);
1590 
1591 void disable_trace_on_warning(void)
1592 {
1593 	if (__disable_trace_on_warning) {
1594 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1595 			"Disabling tracing due to warning\n");
1596 		tracing_off();
1597 	}
1598 }
1599 
1600 /**
1601  * tracer_tracing_is_on - show real state of ring buffer enabled
1602  * @tr : the trace array to know if ring buffer is enabled
1603  *
1604  * Shows real state of the ring buffer if it is enabled or not.
1605  */
1606 bool tracer_tracing_is_on(struct trace_array *tr)
1607 {
1608 	if (tr->array_buffer.buffer)
1609 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1610 	return !tr->buffer_disabled;
1611 }
1612 
1613 /**
1614  * tracing_is_on - show state of ring buffers enabled
1615  */
1616 int tracing_is_on(void)
1617 {
1618 	return tracer_tracing_is_on(&global_trace);
1619 }
1620 EXPORT_SYMBOL_GPL(tracing_is_on);
1621 
1622 static int __init set_buf_size(char *str)
1623 {
1624 	unsigned long buf_size;
1625 
1626 	if (!str)
1627 		return 0;
1628 	buf_size = memparse(str, &str);
1629 	/*
1630 	 * nr_entries can not be zero and the startup
1631 	 * tests require some buffer space. Therefore
1632 	 * ensure we have at least 4096 bytes of buffer.
1633 	 */
1634 	trace_buf_size = max(4096UL, buf_size);
1635 	return 1;
1636 }
1637 __setup("trace_buf_size=", set_buf_size);
1638 
1639 static int __init set_tracing_thresh(char *str)
1640 {
1641 	unsigned long threshold;
1642 	int ret;
1643 
1644 	if (!str)
1645 		return 0;
1646 	ret = kstrtoul(str, 0, &threshold);
1647 	if (ret < 0)
1648 		return 0;
1649 	tracing_thresh = threshold * 1000;
1650 	return 1;
1651 }
1652 __setup("tracing_thresh=", set_tracing_thresh);
1653 
1654 unsigned long nsecs_to_usecs(unsigned long nsecs)
1655 {
1656 	return nsecs / 1000;
1657 }
1658 
1659 /*
1660  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1661  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1662  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1663  * of strings in the order that the evals (enum) were defined.
1664  */
1665 #undef C
1666 #define C(a, b) b
1667 
1668 /* These must match the bit positions in trace_iterator_flags */
1669 static const char *trace_options[] = {
1670 	TRACE_FLAGS
1671 	NULL
1672 };
1673 
1674 static struct {
1675 	u64 (*func)(void);
1676 	const char *name;
1677 	int in_ns;		/* is this clock in nanoseconds? */
1678 } trace_clocks[] = {
1679 	{ trace_clock_local,		"local",	1 },
1680 	{ trace_clock_global,		"global",	1 },
1681 	{ trace_clock_counter,		"counter",	0 },
1682 	{ trace_clock_jiffies,		"uptime",	0 },
1683 	{ trace_clock,			"perf",		1 },
1684 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1685 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1686 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1687 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1688 	ARCH_TRACE_CLOCKS
1689 };
1690 
1691 bool trace_clock_in_ns(struct trace_array *tr)
1692 {
1693 	if (trace_clocks[tr->clock_id].in_ns)
1694 		return true;
1695 
1696 	return false;
1697 }
1698 
1699 /*
1700  * trace_parser_get_init - gets the buffer for trace parser
1701  */
1702 int trace_parser_get_init(struct trace_parser *parser, int size)
1703 {
1704 	memset(parser, 0, sizeof(*parser));
1705 
1706 	parser->buffer = kmalloc(size, GFP_KERNEL);
1707 	if (!parser->buffer)
1708 		return 1;
1709 
1710 	parser->size = size;
1711 	return 0;
1712 }
1713 
1714 /*
1715  * trace_parser_put - frees the buffer for trace parser
1716  */
1717 void trace_parser_put(struct trace_parser *parser)
1718 {
1719 	kfree(parser->buffer);
1720 	parser->buffer = NULL;
1721 }
1722 
1723 /*
1724  * trace_get_user - reads the user input string separated by  space
1725  * (matched by isspace(ch))
1726  *
1727  * For each string found the 'struct trace_parser' is updated,
1728  * and the function returns.
1729  *
1730  * Returns number of bytes read.
1731  *
1732  * See kernel/trace/trace.h for 'struct trace_parser' details.
1733  */
1734 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1735 	size_t cnt, loff_t *ppos)
1736 {
1737 	char ch;
1738 	size_t read = 0;
1739 	ssize_t ret;
1740 
1741 	if (!*ppos)
1742 		trace_parser_clear(parser);
1743 
1744 	ret = get_user(ch, ubuf++);
1745 	if (ret)
1746 		goto out;
1747 
1748 	read++;
1749 	cnt--;
1750 
1751 	/*
1752 	 * The parser is not finished with the last write,
1753 	 * continue reading the user input without skipping spaces.
1754 	 */
1755 	if (!parser->cont) {
1756 		/* skip white space */
1757 		while (cnt && isspace(ch)) {
1758 			ret = get_user(ch, ubuf++);
1759 			if (ret)
1760 				goto out;
1761 			read++;
1762 			cnt--;
1763 		}
1764 
1765 		parser->idx = 0;
1766 
1767 		/* only spaces were written */
1768 		if (isspace(ch) || !ch) {
1769 			*ppos += read;
1770 			ret = read;
1771 			goto out;
1772 		}
1773 	}
1774 
1775 	/* read the non-space input */
1776 	while (cnt && !isspace(ch) && ch) {
1777 		if (parser->idx < parser->size - 1)
1778 			parser->buffer[parser->idx++] = ch;
1779 		else {
1780 			ret = -EINVAL;
1781 			goto out;
1782 		}
1783 		ret = get_user(ch, ubuf++);
1784 		if (ret)
1785 			goto out;
1786 		read++;
1787 		cnt--;
1788 	}
1789 
1790 	/* We either got finished input or we have to wait for another call. */
1791 	if (isspace(ch) || !ch) {
1792 		parser->buffer[parser->idx] = 0;
1793 		parser->cont = false;
1794 	} else if (parser->idx < parser->size - 1) {
1795 		parser->cont = true;
1796 		parser->buffer[parser->idx++] = ch;
1797 		/* Make sure the parsed string always terminates with '\0'. */
1798 		parser->buffer[parser->idx] = 0;
1799 	} else {
1800 		ret = -EINVAL;
1801 		goto out;
1802 	}
1803 
1804 	*ppos += read;
1805 	ret = read;
1806 
1807 out:
1808 	return ret;
1809 }
1810 
1811 /* TODO add a seq_buf_to_buffer() */
1812 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1813 {
1814 	int len;
1815 
1816 	if (trace_seq_used(s) <= s->readpos)
1817 		return -EBUSY;
1818 
1819 	len = trace_seq_used(s) - s->readpos;
1820 	if (cnt > len)
1821 		cnt = len;
1822 	memcpy(buf, s->buffer + s->readpos, cnt);
1823 
1824 	s->readpos += cnt;
1825 	return cnt;
1826 }
1827 
1828 unsigned long __read_mostly	tracing_thresh;
1829 
1830 #ifdef CONFIG_TRACER_MAX_TRACE
1831 static const struct file_operations tracing_max_lat_fops;
1832 
1833 #ifdef LATENCY_FS_NOTIFY
1834 
1835 static struct workqueue_struct *fsnotify_wq;
1836 
1837 static void latency_fsnotify_workfn(struct work_struct *work)
1838 {
1839 	struct trace_array *tr = container_of(work, struct trace_array,
1840 					      fsnotify_work);
1841 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1842 }
1843 
1844 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1845 {
1846 	struct trace_array *tr = container_of(iwork, struct trace_array,
1847 					      fsnotify_irqwork);
1848 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1849 }
1850 
1851 static void trace_create_maxlat_file(struct trace_array *tr,
1852 				     struct dentry *d_tracer)
1853 {
1854 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1855 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1856 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1857 					      TRACE_MODE_WRITE,
1858 					      d_tracer, tr,
1859 					      &tracing_max_lat_fops);
1860 }
1861 
1862 __init static int latency_fsnotify_init(void)
1863 {
1864 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1865 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1866 	if (!fsnotify_wq) {
1867 		pr_err("Unable to allocate tr_max_lat_wq\n");
1868 		return -ENOMEM;
1869 	}
1870 	return 0;
1871 }
1872 
1873 late_initcall_sync(latency_fsnotify_init);
1874 
1875 void latency_fsnotify(struct trace_array *tr)
1876 {
1877 	if (!fsnotify_wq)
1878 		return;
1879 	/*
1880 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1881 	 * possible that we are called from __schedule() or do_idle(), which
1882 	 * could cause a deadlock.
1883 	 */
1884 	irq_work_queue(&tr->fsnotify_irqwork);
1885 }
1886 
1887 #else /* !LATENCY_FS_NOTIFY */
1888 
1889 #define trace_create_maxlat_file(tr, d_tracer)				\
1890 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1891 			  d_tracer, tr, &tracing_max_lat_fops)
1892 
1893 #endif
1894 
1895 /*
1896  * Copy the new maximum trace into the separate maximum-trace
1897  * structure. (this way the maximum trace is permanently saved,
1898  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1899  */
1900 static void
1901 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1902 {
1903 	struct array_buffer *trace_buf = &tr->array_buffer;
1904 	struct array_buffer *max_buf = &tr->max_buffer;
1905 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1906 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1907 
1908 	max_buf->cpu = cpu;
1909 	max_buf->time_start = data->preempt_timestamp;
1910 
1911 	max_data->saved_latency = tr->max_latency;
1912 	max_data->critical_start = data->critical_start;
1913 	max_data->critical_end = data->critical_end;
1914 
1915 	strscpy(max_data->comm, tsk->comm);
1916 	max_data->pid = tsk->pid;
1917 	/*
1918 	 * If tsk == current, then use current_uid(), as that does not use
1919 	 * RCU. The irq tracer can be called out of RCU scope.
1920 	 */
1921 	if (tsk == current)
1922 		max_data->uid = current_uid();
1923 	else
1924 		max_data->uid = task_uid(tsk);
1925 
1926 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1927 	max_data->policy = tsk->policy;
1928 	max_data->rt_priority = tsk->rt_priority;
1929 
1930 	/* record this tasks comm */
1931 	tracing_record_cmdline(tsk);
1932 	latency_fsnotify(tr);
1933 }
1934 
1935 /**
1936  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1937  * @tr: tracer
1938  * @tsk: the task with the latency
1939  * @cpu: The cpu that initiated the trace.
1940  * @cond_data: User data associated with a conditional snapshot
1941  *
1942  * Flip the buffers between the @tr and the max_tr and record information
1943  * about which task was the cause of this latency.
1944  */
1945 void
1946 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1947 	      void *cond_data)
1948 {
1949 	if (tr->stop_count)
1950 		return;
1951 
1952 	WARN_ON_ONCE(!irqs_disabled());
1953 
1954 	if (!tr->allocated_snapshot) {
1955 		/* Only the nop tracer should hit this when disabling */
1956 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1957 		return;
1958 	}
1959 
1960 	arch_spin_lock(&tr->max_lock);
1961 
1962 	/* Inherit the recordable setting from array_buffer */
1963 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1964 		ring_buffer_record_on(tr->max_buffer.buffer);
1965 	else
1966 		ring_buffer_record_off(tr->max_buffer.buffer);
1967 
1968 #ifdef CONFIG_TRACER_SNAPSHOT
1969 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1970 		arch_spin_unlock(&tr->max_lock);
1971 		return;
1972 	}
1973 #endif
1974 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1975 
1976 	__update_max_tr(tr, tsk, cpu);
1977 
1978 	arch_spin_unlock(&tr->max_lock);
1979 
1980 	/* Any waiters on the old snapshot buffer need to wake up */
1981 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1982 }
1983 
1984 /**
1985  * update_max_tr_single - only copy one trace over, and reset the rest
1986  * @tr: tracer
1987  * @tsk: task with the latency
1988  * @cpu: the cpu of the buffer to copy.
1989  *
1990  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1991  */
1992 void
1993 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1994 {
1995 	int ret;
1996 
1997 	if (tr->stop_count)
1998 		return;
1999 
2000 	WARN_ON_ONCE(!irqs_disabled());
2001 	if (!tr->allocated_snapshot) {
2002 		/* Only the nop tracer should hit this when disabling */
2003 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2004 		return;
2005 	}
2006 
2007 	arch_spin_lock(&tr->max_lock);
2008 
2009 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2010 
2011 	if (ret == -EBUSY) {
2012 		/*
2013 		 * We failed to swap the buffer due to a commit taking
2014 		 * place on this CPU. We fail to record, but we reset
2015 		 * the max trace buffer (no one writes directly to it)
2016 		 * and flag that it failed.
2017 		 * Another reason is resize is in progress.
2018 		 */
2019 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2020 			"Failed to swap buffers due to commit or resize in progress\n");
2021 	}
2022 
2023 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2024 
2025 	__update_max_tr(tr, tsk, cpu);
2026 	arch_spin_unlock(&tr->max_lock);
2027 }
2028 
2029 #endif /* CONFIG_TRACER_MAX_TRACE */
2030 
2031 struct pipe_wait {
2032 	struct trace_iterator		*iter;
2033 	int				wait_index;
2034 };
2035 
2036 static bool wait_pipe_cond(void *data)
2037 {
2038 	struct pipe_wait *pwait = data;
2039 	struct trace_iterator *iter = pwait->iter;
2040 
2041 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2042 		return true;
2043 
2044 	return iter->closed;
2045 }
2046 
2047 static int wait_on_pipe(struct trace_iterator *iter, int full)
2048 {
2049 	struct pipe_wait pwait;
2050 	int ret;
2051 
2052 	/* Iterators are static, they should be filled or empty */
2053 	if (trace_buffer_iter(iter, iter->cpu_file))
2054 		return 0;
2055 
2056 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2057 	pwait.iter = iter;
2058 
2059 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2060 			       wait_pipe_cond, &pwait);
2061 
2062 #ifdef CONFIG_TRACER_MAX_TRACE
2063 	/*
2064 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2065 	 * to happen, this would now be the main buffer.
2066 	 */
2067 	if (iter->snapshot)
2068 		iter->array_buffer = &iter->tr->max_buffer;
2069 #endif
2070 	return ret;
2071 }
2072 
2073 #ifdef CONFIG_FTRACE_STARTUP_TEST
2074 static bool selftests_can_run;
2075 
2076 struct trace_selftests {
2077 	struct list_head		list;
2078 	struct tracer			*type;
2079 };
2080 
2081 static LIST_HEAD(postponed_selftests);
2082 
2083 static int save_selftest(struct tracer *type)
2084 {
2085 	struct trace_selftests *selftest;
2086 
2087 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2088 	if (!selftest)
2089 		return -ENOMEM;
2090 
2091 	selftest->type = type;
2092 	list_add(&selftest->list, &postponed_selftests);
2093 	return 0;
2094 }
2095 
2096 static int run_tracer_selftest(struct tracer *type)
2097 {
2098 	struct trace_array *tr = &global_trace;
2099 	struct tracer *saved_tracer = tr->current_trace;
2100 	int ret;
2101 
2102 	if (!type->selftest || tracing_selftest_disabled)
2103 		return 0;
2104 
2105 	/*
2106 	 * If a tracer registers early in boot up (before scheduling is
2107 	 * initialized and such), then do not run its selftests yet.
2108 	 * Instead, run it a little later in the boot process.
2109 	 */
2110 	if (!selftests_can_run)
2111 		return save_selftest(type);
2112 
2113 	if (!tracing_is_on()) {
2114 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2115 			type->name);
2116 		return 0;
2117 	}
2118 
2119 	/*
2120 	 * Run a selftest on this tracer.
2121 	 * Here we reset the trace buffer, and set the current
2122 	 * tracer to be this tracer. The tracer can then run some
2123 	 * internal tracing to verify that everything is in order.
2124 	 * If we fail, we do not register this tracer.
2125 	 */
2126 	tracing_reset_online_cpus(&tr->array_buffer);
2127 
2128 	tr->current_trace = type;
2129 
2130 #ifdef CONFIG_TRACER_MAX_TRACE
2131 	if (type->use_max_tr) {
2132 		/* If we expanded the buffers, make sure the max is expanded too */
2133 		if (tr->ring_buffer_expanded)
2134 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2135 					   RING_BUFFER_ALL_CPUS);
2136 		tr->allocated_snapshot = true;
2137 	}
2138 #endif
2139 
2140 	/* the test is responsible for initializing and enabling */
2141 	pr_info("Testing tracer %s: ", type->name);
2142 	ret = type->selftest(type, tr);
2143 	/* the test is responsible for resetting too */
2144 	tr->current_trace = saved_tracer;
2145 	if (ret) {
2146 		printk(KERN_CONT "FAILED!\n");
2147 		/* Add the warning after printing 'FAILED' */
2148 		WARN_ON(1);
2149 		return -1;
2150 	}
2151 	/* Only reset on passing, to avoid touching corrupted buffers */
2152 	tracing_reset_online_cpus(&tr->array_buffer);
2153 
2154 #ifdef CONFIG_TRACER_MAX_TRACE
2155 	if (type->use_max_tr) {
2156 		tr->allocated_snapshot = false;
2157 
2158 		/* Shrink the max buffer again */
2159 		if (tr->ring_buffer_expanded)
2160 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2161 					   RING_BUFFER_ALL_CPUS);
2162 	}
2163 #endif
2164 
2165 	printk(KERN_CONT "PASSED\n");
2166 	return 0;
2167 }
2168 
2169 static int do_run_tracer_selftest(struct tracer *type)
2170 {
2171 	int ret;
2172 
2173 	/*
2174 	 * Tests can take a long time, especially if they are run one after the
2175 	 * other, as does happen during bootup when all the tracers are
2176 	 * registered. This could cause the soft lockup watchdog to trigger.
2177 	 */
2178 	cond_resched();
2179 
2180 	tracing_selftest_running = true;
2181 	ret = run_tracer_selftest(type);
2182 	tracing_selftest_running = false;
2183 
2184 	return ret;
2185 }
2186 
2187 static __init int init_trace_selftests(void)
2188 {
2189 	struct trace_selftests *p, *n;
2190 	struct tracer *t, **last;
2191 	int ret;
2192 
2193 	selftests_can_run = true;
2194 
2195 	guard(mutex)(&trace_types_lock);
2196 
2197 	if (list_empty(&postponed_selftests))
2198 		return 0;
2199 
2200 	pr_info("Running postponed tracer tests:\n");
2201 
2202 	tracing_selftest_running = true;
2203 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2204 		/* This loop can take minutes when sanitizers are enabled, so
2205 		 * lets make sure we allow RCU processing.
2206 		 */
2207 		cond_resched();
2208 		ret = run_tracer_selftest(p->type);
2209 		/* If the test fails, then warn and remove from available_tracers */
2210 		if (ret < 0) {
2211 			WARN(1, "tracer: %s failed selftest, disabling\n",
2212 			     p->type->name);
2213 			last = &trace_types;
2214 			for (t = trace_types; t; t = t->next) {
2215 				if (t == p->type) {
2216 					*last = t->next;
2217 					break;
2218 				}
2219 				last = &t->next;
2220 			}
2221 		}
2222 		list_del(&p->list);
2223 		kfree(p);
2224 	}
2225 	tracing_selftest_running = false;
2226 
2227 	return 0;
2228 }
2229 core_initcall(init_trace_selftests);
2230 #else
2231 static inline int do_run_tracer_selftest(struct tracer *type)
2232 {
2233 	return 0;
2234 }
2235 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2236 
2237 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2238 
2239 static void __init apply_trace_boot_options(void);
2240 
2241 /**
2242  * register_tracer - register a tracer with the ftrace system.
2243  * @type: the plugin for the tracer
2244  *
2245  * Register a new plugin tracer.
2246  */
2247 int __init register_tracer(struct tracer *type)
2248 {
2249 	struct tracer *t;
2250 	int ret = 0;
2251 
2252 	if (!type->name) {
2253 		pr_info("Tracer must have a name\n");
2254 		return -1;
2255 	}
2256 
2257 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2258 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2259 		return -1;
2260 	}
2261 
2262 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2263 		pr_warn("Can not register tracer %s due to lockdown\n",
2264 			   type->name);
2265 		return -EPERM;
2266 	}
2267 
2268 	mutex_lock(&trace_types_lock);
2269 
2270 	for (t = trace_types; t; t = t->next) {
2271 		if (strcmp(type->name, t->name) == 0) {
2272 			/* already found */
2273 			pr_info("Tracer %s already registered\n",
2274 				type->name);
2275 			ret = -1;
2276 			goto out;
2277 		}
2278 	}
2279 
2280 	if (!type->set_flag)
2281 		type->set_flag = &dummy_set_flag;
2282 	if (!type->flags) {
2283 		/*allocate a dummy tracer_flags*/
2284 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2285 		if (!type->flags) {
2286 			ret = -ENOMEM;
2287 			goto out;
2288 		}
2289 		type->flags->val = 0;
2290 		type->flags->opts = dummy_tracer_opt;
2291 	} else
2292 		if (!type->flags->opts)
2293 			type->flags->opts = dummy_tracer_opt;
2294 
2295 	/* store the tracer for __set_tracer_option */
2296 	type->flags->trace = type;
2297 
2298 	ret = do_run_tracer_selftest(type);
2299 	if (ret < 0)
2300 		goto out;
2301 
2302 	type->next = trace_types;
2303 	trace_types = type;
2304 	add_tracer_options(&global_trace, type);
2305 
2306  out:
2307 	mutex_unlock(&trace_types_lock);
2308 
2309 	if (ret || !default_bootup_tracer)
2310 		goto out_unlock;
2311 
2312 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2313 		goto out_unlock;
2314 
2315 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2316 	/* Do we want this tracer to start on bootup? */
2317 	tracing_set_tracer(&global_trace, type->name);
2318 	default_bootup_tracer = NULL;
2319 
2320 	apply_trace_boot_options();
2321 
2322 	/* disable other selftests, since this will break it. */
2323 	disable_tracing_selftest("running a tracer");
2324 
2325  out_unlock:
2326 	return ret;
2327 }
2328 
2329 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2330 {
2331 	struct trace_buffer *buffer = buf->buffer;
2332 
2333 	if (!buffer)
2334 		return;
2335 
2336 	ring_buffer_record_disable(buffer);
2337 
2338 	/* Make sure all commits have finished */
2339 	synchronize_rcu();
2340 	ring_buffer_reset_cpu(buffer, cpu);
2341 
2342 	ring_buffer_record_enable(buffer);
2343 }
2344 
2345 void tracing_reset_online_cpus(struct array_buffer *buf)
2346 {
2347 	struct trace_buffer *buffer = buf->buffer;
2348 
2349 	if (!buffer)
2350 		return;
2351 
2352 	ring_buffer_record_disable(buffer);
2353 
2354 	/* Make sure all commits have finished */
2355 	synchronize_rcu();
2356 
2357 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2358 
2359 	ring_buffer_reset_online_cpus(buffer);
2360 
2361 	ring_buffer_record_enable(buffer);
2362 }
2363 
2364 static void tracing_reset_all_cpus(struct array_buffer *buf)
2365 {
2366 	struct trace_buffer *buffer = buf->buffer;
2367 
2368 	if (!buffer)
2369 		return;
2370 
2371 	ring_buffer_record_disable(buffer);
2372 
2373 	/* Make sure all commits have finished */
2374 	synchronize_rcu();
2375 
2376 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2377 
2378 	ring_buffer_reset(buffer);
2379 
2380 	ring_buffer_record_enable(buffer);
2381 }
2382 
2383 /* Must have trace_types_lock held */
2384 void tracing_reset_all_online_cpus_unlocked(void)
2385 {
2386 	struct trace_array *tr;
2387 
2388 	lockdep_assert_held(&trace_types_lock);
2389 
2390 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2391 		if (!tr->clear_trace)
2392 			continue;
2393 		tr->clear_trace = false;
2394 		tracing_reset_online_cpus(&tr->array_buffer);
2395 #ifdef CONFIG_TRACER_MAX_TRACE
2396 		tracing_reset_online_cpus(&tr->max_buffer);
2397 #endif
2398 	}
2399 }
2400 
2401 void tracing_reset_all_online_cpus(void)
2402 {
2403 	mutex_lock(&trace_types_lock);
2404 	tracing_reset_all_online_cpus_unlocked();
2405 	mutex_unlock(&trace_types_lock);
2406 }
2407 
2408 int is_tracing_stopped(void)
2409 {
2410 	return global_trace.stop_count;
2411 }
2412 
2413 static void tracing_start_tr(struct trace_array *tr)
2414 {
2415 	struct trace_buffer *buffer;
2416 	unsigned long flags;
2417 
2418 	if (tracing_disabled)
2419 		return;
2420 
2421 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2422 	if (--tr->stop_count) {
2423 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2424 			/* Someone screwed up their debugging */
2425 			tr->stop_count = 0;
2426 		}
2427 		goto out;
2428 	}
2429 
2430 	/* Prevent the buffers from switching */
2431 	arch_spin_lock(&tr->max_lock);
2432 
2433 	buffer = tr->array_buffer.buffer;
2434 	if (buffer)
2435 		ring_buffer_record_enable(buffer);
2436 
2437 #ifdef CONFIG_TRACER_MAX_TRACE
2438 	buffer = tr->max_buffer.buffer;
2439 	if (buffer)
2440 		ring_buffer_record_enable(buffer);
2441 #endif
2442 
2443 	arch_spin_unlock(&tr->max_lock);
2444 
2445  out:
2446 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2447 }
2448 
2449 /**
2450  * tracing_start - quick start of the tracer
2451  *
2452  * If tracing is enabled but was stopped by tracing_stop,
2453  * this will start the tracer back up.
2454  */
2455 void tracing_start(void)
2456 
2457 {
2458 	return tracing_start_tr(&global_trace);
2459 }
2460 
2461 static void tracing_stop_tr(struct trace_array *tr)
2462 {
2463 	struct trace_buffer *buffer;
2464 	unsigned long flags;
2465 
2466 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2467 	if (tr->stop_count++)
2468 		goto out;
2469 
2470 	/* Prevent the buffers from switching */
2471 	arch_spin_lock(&tr->max_lock);
2472 
2473 	buffer = tr->array_buffer.buffer;
2474 	if (buffer)
2475 		ring_buffer_record_disable(buffer);
2476 
2477 #ifdef CONFIG_TRACER_MAX_TRACE
2478 	buffer = tr->max_buffer.buffer;
2479 	if (buffer)
2480 		ring_buffer_record_disable(buffer);
2481 #endif
2482 
2483 	arch_spin_unlock(&tr->max_lock);
2484 
2485  out:
2486 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2487 }
2488 
2489 /**
2490  * tracing_stop - quick stop of the tracer
2491  *
2492  * Light weight way to stop tracing. Use in conjunction with
2493  * tracing_start.
2494  */
2495 void tracing_stop(void)
2496 {
2497 	return tracing_stop_tr(&global_trace);
2498 }
2499 
2500 /*
2501  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2502  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2503  * simplifies those functions and keeps them in sync.
2504  */
2505 enum print_line_t trace_handle_return(struct trace_seq *s)
2506 {
2507 	return trace_seq_has_overflowed(s) ?
2508 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2509 }
2510 EXPORT_SYMBOL_GPL(trace_handle_return);
2511 
2512 static unsigned short migration_disable_value(void)
2513 {
2514 #if defined(CONFIG_SMP)
2515 	return current->migration_disabled;
2516 #else
2517 	return 0;
2518 #endif
2519 }
2520 
2521 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2522 {
2523 	unsigned int trace_flags = irqs_status;
2524 	unsigned int pc;
2525 
2526 	pc = preempt_count();
2527 
2528 	if (pc & NMI_MASK)
2529 		trace_flags |= TRACE_FLAG_NMI;
2530 	if (pc & HARDIRQ_MASK)
2531 		trace_flags |= TRACE_FLAG_HARDIRQ;
2532 	if (in_serving_softirq())
2533 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2534 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2535 		trace_flags |= TRACE_FLAG_BH_OFF;
2536 
2537 	if (tif_need_resched())
2538 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2539 	if (test_preempt_need_resched())
2540 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2541 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2542 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2543 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2544 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2545 }
2546 
2547 struct ring_buffer_event *
2548 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2549 			  int type,
2550 			  unsigned long len,
2551 			  unsigned int trace_ctx)
2552 {
2553 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2554 }
2555 
2556 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2557 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2558 static int trace_buffered_event_ref;
2559 
2560 /**
2561  * trace_buffered_event_enable - enable buffering events
2562  *
2563  * When events are being filtered, it is quicker to use a temporary
2564  * buffer to write the event data into if there's a likely chance
2565  * that it will not be committed. The discard of the ring buffer
2566  * is not as fast as committing, and is much slower than copying
2567  * a commit.
2568  *
2569  * When an event is to be filtered, allocate per cpu buffers to
2570  * write the event data into, and if the event is filtered and discarded
2571  * it is simply dropped, otherwise, the entire data is to be committed
2572  * in one shot.
2573  */
2574 void trace_buffered_event_enable(void)
2575 {
2576 	struct ring_buffer_event *event;
2577 	struct page *page;
2578 	int cpu;
2579 
2580 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2581 
2582 	if (trace_buffered_event_ref++)
2583 		return;
2584 
2585 	for_each_tracing_cpu(cpu) {
2586 		page = alloc_pages_node(cpu_to_node(cpu),
2587 					GFP_KERNEL | __GFP_NORETRY, 0);
2588 		/* This is just an optimization and can handle failures */
2589 		if (!page) {
2590 			pr_err("Failed to allocate event buffer\n");
2591 			break;
2592 		}
2593 
2594 		event = page_address(page);
2595 		memset(event, 0, sizeof(*event));
2596 
2597 		per_cpu(trace_buffered_event, cpu) = event;
2598 
2599 		preempt_disable();
2600 		if (cpu == smp_processor_id() &&
2601 		    __this_cpu_read(trace_buffered_event) !=
2602 		    per_cpu(trace_buffered_event, cpu))
2603 			WARN_ON_ONCE(1);
2604 		preempt_enable();
2605 	}
2606 }
2607 
2608 static void enable_trace_buffered_event(void *data)
2609 {
2610 	/* Probably not needed, but do it anyway */
2611 	smp_rmb();
2612 	this_cpu_dec(trace_buffered_event_cnt);
2613 }
2614 
2615 static void disable_trace_buffered_event(void *data)
2616 {
2617 	this_cpu_inc(trace_buffered_event_cnt);
2618 }
2619 
2620 /**
2621  * trace_buffered_event_disable - disable buffering events
2622  *
2623  * When a filter is removed, it is faster to not use the buffered
2624  * events, and to commit directly into the ring buffer. Free up
2625  * the temp buffers when there are no more users. This requires
2626  * special synchronization with current events.
2627  */
2628 void trace_buffered_event_disable(void)
2629 {
2630 	int cpu;
2631 
2632 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2633 
2634 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2635 		return;
2636 
2637 	if (--trace_buffered_event_ref)
2638 		return;
2639 
2640 	/* For each CPU, set the buffer as used. */
2641 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2642 			 NULL, true);
2643 
2644 	/* Wait for all current users to finish */
2645 	synchronize_rcu();
2646 
2647 	for_each_tracing_cpu(cpu) {
2648 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2649 		per_cpu(trace_buffered_event, cpu) = NULL;
2650 	}
2651 
2652 	/*
2653 	 * Wait for all CPUs that potentially started checking if they can use
2654 	 * their event buffer only after the previous synchronize_rcu() call and
2655 	 * they still read a valid pointer from trace_buffered_event. It must be
2656 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2657 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2658 	 */
2659 	synchronize_rcu();
2660 
2661 	/* For each CPU, relinquish the buffer */
2662 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2663 			 true);
2664 }
2665 
2666 static struct trace_buffer *temp_buffer;
2667 
2668 struct ring_buffer_event *
2669 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2670 			  struct trace_event_file *trace_file,
2671 			  int type, unsigned long len,
2672 			  unsigned int trace_ctx)
2673 {
2674 	struct ring_buffer_event *entry;
2675 	struct trace_array *tr = trace_file->tr;
2676 	int val;
2677 
2678 	*current_rb = tr->array_buffer.buffer;
2679 
2680 	if (!tr->no_filter_buffering_ref &&
2681 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2682 		preempt_disable_notrace();
2683 		/*
2684 		 * Filtering is on, so try to use the per cpu buffer first.
2685 		 * This buffer will simulate a ring_buffer_event,
2686 		 * where the type_len is zero and the array[0] will
2687 		 * hold the full length.
2688 		 * (see include/linux/ring-buffer.h for details on
2689 		 *  how the ring_buffer_event is structured).
2690 		 *
2691 		 * Using a temp buffer during filtering and copying it
2692 		 * on a matched filter is quicker than writing directly
2693 		 * into the ring buffer and then discarding it when
2694 		 * it doesn't match. That is because the discard
2695 		 * requires several atomic operations to get right.
2696 		 * Copying on match and doing nothing on a failed match
2697 		 * is still quicker than no copy on match, but having
2698 		 * to discard out of the ring buffer on a failed match.
2699 		 */
2700 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2701 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2702 
2703 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2704 
2705 			/*
2706 			 * Preemption is disabled, but interrupts and NMIs
2707 			 * can still come in now. If that happens after
2708 			 * the above increment, then it will have to go
2709 			 * back to the old method of allocating the event
2710 			 * on the ring buffer, and if the filter fails, it
2711 			 * will have to call ring_buffer_discard_commit()
2712 			 * to remove it.
2713 			 *
2714 			 * Need to also check the unlikely case that the
2715 			 * length is bigger than the temp buffer size.
2716 			 * If that happens, then the reserve is pretty much
2717 			 * guaranteed to fail, as the ring buffer currently
2718 			 * only allows events less than a page. But that may
2719 			 * change in the future, so let the ring buffer reserve
2720 			 * handle the failure in that case.
2721 			 */
2722 			if (val == 1 && likely(len <= max_len)) {
2723 				trace_event_setup(entry, type, trace_ctx);
2724 				entry->array[0] = len;
2725 				/* Return with preemption disabled */
2726 				return entry;
2727 			}
2728 			this_cpu_dec(trace_buffered_event_cnt);
2729 		}
2730 		/* __trace_buffer_lock_reserve() disables preemption */
2731 		preempt_enable_notrace();
2732 	}
2733 
2734 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2735 					    trace_ctx);
2736 	/*
2737 	 * If tracing is off, but we have triggers enabled
2738 	 * we still need to look at the event data. Use the temp_buffer
2739 	 * to store the trace event for the trigger to use. It's recursive
2740 	 * safe and will not be recorded anywhere.
2741 	 */
2742 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2743 		*current_rb = temp_buffer;
2744 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2745 						    trace_ctx);
2746 	}
2747 	return entry;
2748 }
2749 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2750 
2751 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2752 static DEFINE_MUTEX(tracepoint_printk_mutex);
2753 
2754 static void output_printk(struct trace_event_buffer *fbuffer)
2755 {
2756 	struct trace_event_call *event_call;
2757 	struct trace_event_file *file;
2758 	struct trace_event *event;
2759 	unsigned long flags;
2760 	struct trace_iterator *iter = tracepoint_print_iter;
2761 
2762 	/* We should never get here if iter is NULL */
2763 	if (WARN_ON_ONCE(!iter))
2764 		return;
2765 
2766 	event_call = fbuffer->trace_file->event_call;
2767 	if (!event_call || !event_call->event.funcs ||
2768 	    !event_call->event.funcs->trace)
2769 		return;
2770 
2771 	file = fbuffer->trace_file;
2772 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2773 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2774 	     !filter_match_preds(file->filter, fbuffer->entry)))
2775 		return;
2776 
2777 	event = &fbuffer->trace_file->event_call->event;
2778 
2779 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2780 	trace_seq_init(&iter->seq);
2781 	iter->ent = fbuffer->entry;
2782 	event_call->event.funcs->trace(iter, 0, event);
2783 	trace_seq_putc(&iter->seq, 0);
2784 	printk("%s", iter->seq.buffer);
2785 
2786 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2787 }
2788 
2789 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2790 			     void *buffer, size_t *lenp,
2791 			     loff_t *ppos)
2792 {
2793 	int save_tracepoint_printk;
2794 	int ret;
2795 
2796 	guard(mutex)(&tracepoint_printk_mutex);
2797 	save_tracepoint_printk = tracepoint_printk;
2798 
2799 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2800 
2801 	/*
2802 	 * This will force exiting early, as tracepoint_printk
2803 	 * is always zero when tracepoint_printk_iter is not allocated
2804 	 */
2805 	if (!tracepoint_print_iter)
2806 		tracepoint_printk = 0;
2807 
2808 	if (save_tracepoint_printk == tracepoint_printk)
2809 		return ret;
2810 
2811 	if (tracepoint_printk)
2812 		static_key_enable(&tracepoint_printk_key.key);
2813 	else
2814 		static_key_disable(&tracepoint_printk_key.key);
2815 
2816 	return ret;
2817 }
2818 
2819 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2820 {
2821 	enum event_trigger_type tt = ETT_NONE;
2822 	struct trace_event_file *file = fbuffer->trace_file;
2823 
2824 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2825 			fbuffer->entry, &tt))
2826 		goto discard;
2827 
2828 	if (static_key_false(&tracepoint_printk_key.key))
2829 		output_printk(fbuffer);
2830 
2831 	if (static_branch_unlikely(&trace_event_exports_enabled))
2832 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2833 
2834 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2835 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2836 
2837 discard:
2838 	if (tt)
2839 		event_triggers_post_call(file, tt);
2840 
2841 }
2842 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2843 
2844 /*
2845  * Skip 3:
2846  *
2847  *   trace_buffer_unlock_commit_regs()
2848  *   trace_event_buffer_commit()
2849  *   trace_event_raw_event_xxx()
2850  */
2851 # define STACK_SKIP 3
2852 
2853 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2854 				     struct trace_buffer *buffer,
2855 				     struct ring_buffer_event *event,
2856 				     unsigned int trace_ctx,
2857 				     struct pt_regs *regs)
2858 {
2859 	__buffer_unlock_commit(buffer, event);
2860 
2861 	/*
2862 	 * If regs is not set, then skip the necessary functions.
2863 	 * Note, we can still get here via blktrace, wakeup tracer
2864 	 * and mmiotrace, but that's ok if they lose a function or
2865 	 * two. They are not that meaningful.
2866 	 */
2867 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2868 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2869 }
2870 
2871 /*
2872  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2873  */
2874 void
2875 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2876 				   struct ring_buffer_event *event)
2877 {
2878 	__buffer_unlock_commit(buffer, event);
2879 }
2880 
2881 void
2882 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2883 	       parent_ip, unsigned int trace_ctx)
2884 {
2885 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2886 	struct ring_buffer_event *event;
2887 	struct ftrace_entry *entry;
2888 
2889 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2890 					    trace_ctx);
2891 	if (!event)
2892 		return;
2893 	entry	= ring_buffer_event_data(event);
2894 	entry->ip			= ip;
2895 	entry->parent_ip		= parent_ip;
2896 
2897 	if (static_branch_unlikely(&trace_function_exports_enabled))
2898 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2899 	__buffer_unlock_commit(buffer, event);
2900 }
2901 
2902 #ifdef CONFIG_STACKTRACE
2903 
2904 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2905 #define FTRACE_KSTACK_NESTING	4
2906 
2907 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2908 
2909 struct ftrace_stack {
2910 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2911 };
2912 
2913 
2914 struct ftrace_stacks {
2915 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2916 };
2917 
2918 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2919 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2920 
2921 static void __ftrace_trace_stack(struct trace_array *tr,
2922 				 struct trace_buffer *buffer,
2923 				 unsigned int trace_ctx,
2924 				 int skip, struct pt_regs *regs)
2925 {
2926 	struct ring_buffer_event *event;
2927 	unsigned int size, nr_entries;
2928 	struct ftrace_stack *fstack;
2929 	struct stack_entry *entry;
2930 	int stackidx;
2931 
2932 	/*
2933 	 * Add one, for this function and the call to save_stack_trace()
2934 	 * If regs is set, then these functions will not be in the way.
2935 	 */
2936 #ifndef CONFIG_UNWINDER_ORC
2937 	if (!regs)
2938 		skip++;
2939 #endif
2940 
2941 	preempt_disable_notrace();
2942 
2943 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2944 
2945 	/* This should never happen. If it does, yell once and skip */
2946 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2947 		goto out;
2948 
2949 	/*
2950 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2951 	 * interrupt will either see the value pre increment or post
2952 	 * increment. If the interrupt happens pre increment it will have
2953 	 * restored the counter when it returns.  We just need a barrier to
2954 	 * keep gcc from moving things around.
2955 	 */
2956 	barrier();
2957 
2958 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2959 	size = ARRAY_SIZE(fstack->calls);
2960 
2961 	if (regs) {
2962 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2963 						   size, skip);
2964 	} else {
2965 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2966 	}
2967 
2968 #ifdef CONFIG_DYNAMIC_FTRACE
2969 	/* Mark entry of stack trace as trampoline code */
2970 	if (tr->ops && tr->ops->trampoline) {
2971 		unsigned long tramp_start = tr->ops->trampoline;
2972 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2973 		unsigned long *calls = fstack->calls;
2974 
2975 		for (int i = 0; i < nr_entries; i++) {
2976 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2977 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2978 		}
2979 	}
2980 #endif
2981 
2982 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2983 				    struct_size(entry, caller, nr_entries),
2984 				    trace_ctx);
2985 	if (!event)
2986 		goto out;
2987 	entry = ring_buffer_event_data(event);
2988 
2989 	entry->size = nr_entries;
2990 	memcpy(&entry->caller, fstack->calls,
2991 	       flex_array_size(entry, caller, nr_entries));
2992 
2993 	__buffer_unlock_commit(buffer, event);
2994 
2995  out:
2996 	/* Again, don't let gcc optimize things here */
2997 	barrier();
2998 	__this_cpu_dec(ftrace_stack_reserve);
2999 	preempt_enable_notrace();
3000 
3001 }
3002 
3003 static inline void ftrace_trace_stack(struct trace_array *tr,
3004 				      struct trace_buffer *buffer,
3005 				      unsigned int trace_ctx,
3006 				      int skip, struct pt_regs *regs)
3007 {
3008 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3009 		return;
3010 
3011 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3012 }
3013 
3014 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3015 		   int skip)
3016 {
3017 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3018 
3019 	if (rcu_is_watching()) {
3020 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3021 		return;
3022 	}
3023 
3024 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3025 		return;
3026 
3027 	/*
3028 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3029 	 * but if the above rcu_is_watching() failed, then the NMI
3030 	 * triggered someplace critical, and ct_irq_enter() should
3031 	 * not be called from NMI.
3032 	 */
3033 	if (unlikely(in_nmi()))
3034 		return;
3035 
3036 	ct_irq_enter_irqson();
3037 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3038 	ct_irq_exit_irqson();
3039 }
3040 
3041 /**
3042  * trace_dump_stack - record a stack back trace in the trace buffer
3043  * @skip: Number of functions to skip (helper handlers)
3044  */
3045 void trace_dump_stack(int skip)
3046 {
3047 	if (tracing_disabled || tracing_selftest_running)
3048 		return;
3049 
3050 #ifndef CONFIG_UNWINDER_ORC
3051 	/* Skip 1 to skip this function. */
3052 	skip++;
3053 #endif
3054 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3055 				tracing_gen_ctx(), skip, NULL);
3056 }
3057 EXPORT_SYMBOL_GPL(trace_dump_stack);
3058 
3059 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3060 static DEFINE_PER_CPU(int, user_stack_count);
3061 
3062 static void
3063 ftrace_trace_userstack(struct trace_array *tr,
3064 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3065 {
3066 	struct ring_buffer_event *event;
3067 	struct userstack_entry *entry;
3068 
3069 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3070 		return;
3071 
3072 	/*
3073 	 * NMIs can not handle page faults, even with fix ups.
3074 	 * The save user stack can (and often does) fault.
3075 	 */
3076 	if (unlikely(in_nmi()))
3077 		return;
3078 
3079 	/*
3080 	 * prevent recursion, since the user stack tracing may
3081 	 * trigger other kernel events.
3082 	 */
3083 	preempt_disable();
3084 	if (__this_cpu_read(user_stack_count))
3085 		goto out;
3086 
3087 	__this_cpu_inc(user_stack_count);
3088 
3089 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3090 					    sizeof(*entry), trace_ctx);
3091 	if (!event)
3092 		goto out_drop_count;
3093 	entry	= ring_buffer_event_data(event);
3094 
3095 	entry->tgid		= current->tgid;
3096 	memset(&entry->caller, 0, sizeof(entry->caller));
3097 
3098 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3099 	__buffer_unlock_commit(buffer, event);
3100 
3101  out_drop_count:
3102 	__this_cpu_dec(user_stack_count);
3103  out:
3104 	preempt_enable();
3105 }
3106 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3107 static void ftrace_trace_userstack(struct trace_array *tr,
3108 				   struct trace_buffer *buffer,
3109 				   unsigned int trace_ctx)
3110 {
3111 }
3112 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3113 
3114 #endif /* CONFIG_STACKTRACE */
3115 
3116 static inline void
3117 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3118 			  unsigned long long delta)
3119 {
3120 	entry->bottom_delta_ts = delta & U32_MAX;
3121 	entry->top_delta_ts = (delta >> 32);
3122 }
3123 
3124 void trace_last_func_repeats(struct trace_array *tr,
3125 			     struct trace_func_repeats *last_info,
3126 			     unsigned int trace_ctx)
3127 {
3128 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3129 	struct func_repeats_entry *entry;
3130 	struct ring_buffer_event *event;
3131 	u64 delta;
3132 
3133 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3134 					    sizeof(*entry), trace_ctx);
3135 	if (!event)
3136 		return;
3137 
3138 	delta = ring_buffer_event_time_stamp(buffer, event) -
3139 		last_info->ts_last_call;
3140 
3141 	entry = ring_buffer_event_data(event);
3142 	entry->ip = last_info->ip;
3143 	entry->parent_ip = last_info->parent_ip;
3144 	entry->count = last_info->count;
3145 	func_repeats_set_delta_ts(entry, delta);
3146 
3147 	__buffer_unlock_commit(buffer, event);
3148 }
3149 
3150 /* created for use with alloc_percpu */
3151 struct trace_buffer_struct {
3152 	int nesting;
3153 	char buffer[4][TRACE_BUF_SIZE];
3154 };
3155 
3156 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3157 
3158 /*
3159  * This allows for lockless recording.  If we're nested too deeply, then
3160  * this returns NULL.
3161  */
3162 static char *get_trace_buf(void)
3163 {
3164 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3165 
3166 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3167 		return NULL;
3168 
3169 	buffer->nesting++;
3170 
3171 	/* Interrupts must see nesting incremented before we use the buffer */
3172 	barrier();
3173 	return &buffer->buffer[buffer->nesting - 1][0];
3174 }
3175 
3176 static void put_trace_buf(void)
3177 {
3178 	/* Don't let the decrement of nesting leak before this */
3179 	barrier();
3180 	this_cpu_dec(trace_percpu_buffer->nesting);
3181 }
3182 
3183 static int alloc_percpu_trace_buffer(void)
3184 {
3185 	struct trace_buffer_struct __percpu *buffers;
3186 
3187 	if (trace_percpu_buffer)
3188 		return 0;
3189 
3190 	buffers = alloc_percpu(struct trace_buffer_struct);
3191 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3192 		return -ENOMEM;
3193 
3194 	trace_percpu_buffer = buffers;
3195 	return 0;
3196 }
3197 
3198 static int buffers_allocated;
3199 
3200 void trace_printk_init_buffers(void)
3201 {
3202 	if (buffers_allocated)
3203 		return;
3204 
3205 	if (alloc_percpu_trace_buffer())
3206 		return;
3207 
3208 	/* trace_printk() is for debug use only. Don't use it in production. */
3209 
3210 	pr_warn("\n");
3211 	pr_warn("**********************************************************\n");
3212 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3213 	pr_warn("**                                                      **\n");
3214 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3215 	pr_warn("**                                                      **\n");
3216 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3217 	pr_warn("** unsafe for production use.                           **\n");
3218 	pr_warn("**                                                      **\n");
3219 	pr_warn("** If you see this message and you are not debugging    **\n");
3220 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3221 	pr_warn("**                                                      **\n");
3222 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3223 	pr_warn("**********************************************************\n");
3224 
3225 	/* Expand the buffers to set size */
3226 	tracing_update_buffers(&global_trace);
3227 
3228 	buffers_allocated = 1;
3229 
3230 	/*
3231 	 * trace_printk_init_buffers() can be called by modules.
3232 	 * If that happens, then we need to start cmdline recording
3233 	 * directly here. If the global_trace.buffer is already
3234 	 * allocated here, then this was called by module code.
3235 	 */
3236 	if (global_trace.array_buffer.buffer)
3237 		tracing_start_cmdline_record();
3238 }
3239 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3240 
3241 void trace_printk_start_comm(void)
3242 {
3243 	/* Start tracing comms if trace printk is set */
3244 	if (!buffers_allocated)
3245 		return;
3246 	tracing_start_cmdline_record();
3247 }
3248 
3249 static void trace_printk_start_stop_comm(int enabled)
3250 {
3251 	if (!buffers_allocated)
3252 		return;
3253 
3254 	if (enabled)
3255 		tracing_start_cmdline_record();
3256 	else
3257 		tracing_stop_cmdline_record();
3258 }
3259 
3260 /**
3261  * trace_vbprintk - write binary msg to tracing buffer
3262  * @ip:    The address of the caller
3263  * @fmt:   The string format to write to the buffer
3264  * @args:  Arguments for @fmt
3265  */
3266 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3267 {
3268 	struct ring_buffer_event *event;
3269 	struct trace_buffer *buffer;
3270 	struct trace_array *tr = READ_ONCE(printk_trace);
3271 	struct bprint_entry *entry;
3272 	unsigned int trace_ctx;
3273 	char *tbuffer;
3274 	int len = 0, size;
3275 
3276 	if (!printk_binsafe(tr))
3277 		return trace_vprintk(ip, fmt, args);
3278 
3279 	if (unlikely(tracing_selftest_running || tracing_disabled))
3280 		return 0;
3281 
3282 	/* Don't pollute graph traces with trace_vprintk internals */
3283 	pause_graph_tracing();
3284 
3285 	trace_ctx = tracing_gen_ctx();
3286 	preempt_disable_notrace();
3287 
3288 	tbuffer = get_trace_buf();
3289 	if (!tbuffer) {
3290 		len = 0;
3291 		goto out_nobuffer;
3292 	}
3293 
3294 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3295 
3296 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3297 		goto out_put;
3298 
3299 	size = sizeof(*entry) + sizeof(u32) * len;
3300 	buffer = tr->array_buffer.buffer;
3301 	ring_buffer_nest_start(buffer);
3302 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3303 					    trace_ctx);
3304 	if (!event)
3305 		goto out;
3306 	entry = ring_buffer_event_data(event);
3307 	entry->ip			= ip;
3308 	entry->fmt			= fmt;
3309 
3310 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3311 	__buffer_unlock_commit(buffer, event);
3312 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3313 
3314 out:
3315 	ring_buffer_nest_end(buffer);
3316 out_put:
3317 	put_trace_buf();
3318 
3319 out_nobuffer:
3320 	preempt_enable_notrace();
3321 	unpause_graph_tracing();
3322 
3323 	return len;
3324 }
3325 EXPORT_SYMBOL_GPL(trace_vbprintk);
3326 
3327 __printf(3, 0)
3328 static int
3329 __trace_array_vprintk(struct trace_buffer *buffer,
3330 		      unsigned long ip, const char *fmt, va_list args)
3331 {
3332 	struct ring_buffer_event *event;
3333 	int len = 0, size;
3334 	struct print_entry *entry;
3335 	unsigned int trace_ctx;
3336 	char *tbuffer;
3337 
3338 	if (tracing_disabled)
3339 		return 0;
3340 
3341 	/* Don't pollute graph traces with trace_vprintk internals */
3342 	pause_graph_tracing();
3343 
3344 	trace_ctx = tracing_gen_ctx();
3345 	preempt_disable_notrace();
3346 
3347 
3348 	tbuffer = get_trace_buf();
3349 	if (!tbuffer) {
3350 		len = 0;
3351 		goto out_nobuffer;
3352 	}
3353 
3354 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3355 
3356 	size = sizeof(*entry) + len + 1;
3357 	ring_buffer_nest_start(buffer);
3358 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3359 					    trace_ctx);
3360 	if (!event)
3361 		goto out;
3362 	entry = ring_buffer_event_data(event);
3363 	entry->ip = ip;
3364 
3365 	memcpy(&entry->buf, tbuffer, len + 1);
3366 	__buffer_unlock_commit(buffer, event);
3367 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3368 
3369 out:
3370 	ring_buffer_nest_end(buffer);
3371 	put_trace_buf();
3372 
3373 out_nobuffer:
3374 	preempt_enable_notrace();
3375 	unpause_graph_tracing();
3376 
3377 	return len;
3378 }
3379 
3380 __printf(3, 0)
3381 int trace_array_vprintk(struct trace_array *tr,
3382 			unsigned long ip, const char *fmt, va_list args)
3383 {
3384 	if (tracing_selftest_running && tr == &global_trace)
3385 		return 0;
3386 
3387 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3388 }
3389 
3390 /**
3391  * trace_array_printk - Print a message to a specific instance
3392  * @tr: The instance trace_array descriptor
3393  * @ip: The instruction pointer that this is called from.
3394  * @fmt: The format to print (printf format)
3395  *
3396  * If a subsystem sets up its own instance, they have the right to
3397  * printk strings into their tracing instance buffer using this
3398  * function. Note, this function will not write into the top level
3399  * buffer (use trace_printk() for that), as writing into the top level
3400  * buffer should only have events that can be individually disabled.
3401  * trace_printk() is only used for debugging a kernel, and should not
3402  * be ever incorporated in normal use.
3403  *
3404  * trace_array_printk() can be used, as it will not add noise to the
3405  * top level tracing buffer.
3406  *
3407  * Note, trace_array_init_printk() must be called on @tr before this
3408  * can be used.
3409  */
3410 __printf(3, 0)
3411 int trace_array_printk(struct trace_array *tr,
3412 		       unsigned long ip, const char *fmt, ...)
3413 {
3414 	int ret;
3415 	va_list ap;
3416 
3417 	if (!tr)
3418 		return -ENOENT;
3419 
3420 	/* This is only allowed for created instances */
3421 	if (tr == &global_trace)
3422 		return 0;
3423 
3424 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3425 		return 0;
3426 
3427 	va_start(ap, fmt);
3428 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3429 	va_end(ap);
3430 	return ret;
3431 }
3432 EXPORT_SYMBOL_GPL(trace_array_printk);
3433 
3434 /**
3435  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3436  * @tr: The trace array to initialize the buffers for
3437  *
3438  * As trace_array_printk() only writes into instances, they are OK to
3439  * have in the kernel (unlike trace_printk()). This needs to be called
3440  * before trace_array_printk() can be used on a trace_array.
3441  */
3442 int trace_array_init_printk(struct trace_array *tr)
3443 {
3444 	if (!tr)
3445 		return -ENOENT;
3446 
3447 	/* This is only allowed for created instances */
3448 	if (tr == &global_trace)
3449 		return -EINVAL;
3450 
3451 	return alloc_percpu_trace_buffer();
3452 }
3453 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3454 
3455 __printf(3, 4)
3456 int trace_array_printk_buf(struct trace_buffer *buffer,
3457 			   unsigned long ip, const char *fmt, ...)
3458 {
3459 	int ret;
3460 	va_list ap;
3461 
3462 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3463 		return 0;
3464 
3465 	va_start(ap, fmt);
3466 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3467 	va_end(ap);
3468 	return ret;
3469 }
3470 
3471 __printf(2, 0)
3472 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3473 {
3474 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3475 }
3476 EXPORT_SYMBOL_GPL(trace_vprintk);
3477 
3478 static void trace_iterator_increment(struct trace_iterator *iter)
3479 {
3480 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3481 
3482 	iter->idx++;
3483 	if (buf_iter)
3484 		ring_buffer_iter_advance(buf_iter);
3485 }
3486 
3487 static struct trace_entry *
3488 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3489 		unsigned long *lost_events)
3490 {
3491 	struct ring_buffer_event *event;
3492 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3493 
3494 	if (buf_iter) {
3495 		event = ring_buffer_iter_peek(buf_iter, ts);
3496 		if (lost_events)
3497 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3498 				(unsigned long)-1 : 0;
3499 	} else {
3500 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3501 					 lost_events);
3502 	}
3503 
3504 	if (event) {
3505 		iter->ent_size = ring_buffer_event_length(event);
3506 		return ring_buffer_event_data(event);
3507 	}
3508 	iter->ent_size = 0;
3509 	return NULL;
3510 }
3511 
3512 static struct trace_entry *
3513 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3514 		  unsigned long *missing_events, u64 *ent_ts)
3515 {
3516 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3517 	struct trace_entry *ent, *next = NULL;
3518 	unsigned long lost_events = 0, next_lost = 0;
3519 	int cpu_file = iter->cpu_file;
3520 	u64 next_ts = 0, ts;
3521 	int next_cpu = -1;
3522 	int next_size = 0;
3523 	int cpu;
3524 
3525 	/*
3526 	 * If we are in a per_cpu trace file, don't bother by iterating over
3527 	 * all cpu and peek directly.
3528 	 */
3529 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3530 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3531 			return NULL;
3532 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3533 		if (ent_cpu)
3534 			*ent_cpu = cpu_file;
3535 
3536 		return ent;
3537 	}
3538 
3539 	for_each_tracing_cpu(cpu) {
3540 
3541 		if (ring_buffer_empty_cpu(buffer, cpu))
3542 			continue;
3543 
3544 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3545 
3546 		/*
3547 		 * Pick the entry with the smallest timestamp:
3548 		 */
3549 		if (ent && (!next || ts < next_ts)) {
3550 			next = ent;
3551 			next_cpu = cpu;
3552 			next_ts = ts;
3553 			next_lost = lost_events;
3554 			next_size = iter->ent_size;
3555 		}
3556 	}
3557 
3558 	iter->ent_size = next_size;
3559 
3560 	if (ent_cpu)
3561 		*ent_cpu = next_cpu;
3562 
3563 	if (ent_ts)
3564 		*ent_ts = next_ts;
3565 
3566 	if (missing_events)
3567 		*missing_events = next_lost;
3568 
3569 	return next;
3570 }
3571 
3572 #define STATIC_FMT_BUF_SIZE	128
3573 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3574 
3575 char *trace_iter_expand_format(struct trace_iterator *iter)
3576 {
3577 	char *tmp;
3578 
3579 	/*
3580 	 * iter->tr is NULL when used with tp_printk, which makes
3581 	 * this get called where it is not safe to call krealloc().
3582 	 */
3583 	if (!iter->tr || iter->fmt == static_fmt_buf)
3584 		return NULL;
3585 
3586 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3587 		       GFP_KERNEL);
3588 	if (tmp) {
3589 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3590 		iter->fmt = tmp;
3591 	}
3592 
3593 	return tmp;
3594 }
3595 
3596 /* Returns true if the string is safe to dereference from an event */
3597 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3598 {
3599 	unsigned long addr = (unsigned long)str;
3600 	struct trace_event *trace_event;
3601 	struct trace_event_call *event;
3602 
3603 	/* OK if part of the event data */
3604 	if ((addr >= (unsigned long)iter->ent) &&
3605 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3606 		return true;
3607 
3608 	/* OK if part of the temp seq buffer */
3609 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3610 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3611 		return true;
3612 
3613 	/* Core rodata can not be freed */
3614 	if (is_kernel_rodata(addr))
3615 		return true;
3616 
3617 	if (trace_is_tracepoint_string(str))
3618 		return true;
3619 
3620 	/*
3621 	 * Now this could be a module event, referencing core module
3622 	 * data, which is OK.
3623 	 */
3624 	if (!iter->ent)
3625 		return false;
3626 
3627 	trace_event = ftrace_find_event(iter->ent->type);
3628 	if (!trace_event)
3629 		return false;
3630 
3631 	event = container_of(trace_event, struct trace_event_call, event);
3632 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3633 		return false;
3634 
3635 	/* Would rather have rodata, but this will suffice */
3636 	if (within_module_core(addr, event->module))
3637 		return true;
3638 
3639 	return false;
3640 }
3641 
3642 /**
3643  * ignore_event - Check dereferenced fields while writing to the seq buffer
3644  * @iter: The iterator that holds the seq buffer and the event being printed
3645  *
3646  * At boot up, test_event_printk() will flag any event that dereferences
3647  * a string with "%s" that does exist in the ring buffer. It may still
3648  * be valid, as the string may point to a static string in the kernel
3649  * rodata that never gets freed. But if the string pointer is pointing
3650  * to something that was allocated, there's a chance that it can be freed
3651  * by the time the user reads the trace. This would cause a bad memory
3652  * access by the kernel and possibly crash the system.
3653  *
3654  * This function will check if the event has any fields flagged as needing
3655  * to be checked at runtime and perform those checks.
3656  *
3657  * If it is found that a field is unsafe, it will write into the @iter->seq
3658  * a message stating what was found to be unsafe.
3659  *
3660  * @return: true if the event is unsafe and should be ignored,
3661  *          false otherwise.
3662  */
3663 bool ignore_event(struct trace_iterator *iter)
3664 {
3665 	struct ftrace_event_field *field;
3666 	struct trace_event *trace_event;
3667 	struct trace_event_call *event;
3668 	struct list_head *head;
3669 	struct trace_seq *seq;
3670 	const void *ptr;
3671 
3672 	trace_event = ftrace_find_event(iter->ent->type);
3673 
3674 	seq = &iter->seq;
3675 
3676 	if (!trace_event) {
3677 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3678 		return true;
3679 	}
3680 
3681 	event = container_of(trace_event, struct trace_event_call, event);
3682 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3683 		return false;
3684 
3685 	head = trace_get_fields(event);
3686 	if (!head) {
3687 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3688 				 trace_event_name(event));
3689 		return true;
3690 	}
3691 
3692 	/* Offsets are from the iter->ent that points to the raw event */
3693 	ptr = iter->ent;
3694 
3695 	list_for_each_entry(field, head, link) {
3696 		const char *str;
3697 		bool good;
3698 
3699 		if (!field->needs_test)
3700 			continue;
3701 
3702 		str = *(const char **)(ptr + field->offset);
3703 
3704 		good = trace_safe_str(iter, str);
3705 
3706 		/*
3707 		 * If you hit this warning, it is likely that the
3708 		 * trace event in question used %s on a string that
3709 		 * was saved at the time of the event, but may not be
3710 		 * around when the trace is read. Use __string(),
3711 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3712 		 * instead. See samples/trace_events/trace-events-sample.h
3713 		 * for reference.
3714 		 */
3715 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3716 			      trace_event_name(event), field->name)) {
3717 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3718 					 trace_event_name(event), field->name);
3719 			return true;
3720 		}
3721 	}
3722 	return false;
3723 }
3724 
3725 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3726 {
3727 	const char *p, *new_fmt;
3728 	char *q;
3729 
3730 	if (WARN_ON_ONCE(!fmt))
3731 		return fmt;
3732 
3733 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3734 		return fmt;
3735 
3736 	p = fmt;
3737 	new_fmt = q = iter->fmt;
3738 	while (*p) {
3739 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3740 			if (!trace_iter_expand_format(iter))
3741 				return fmt;
3742 
3743 			q += iter->fmt - new_fmt;
3744 			new_fmt = iter->fmt;
3745 		}
3746 
3747 		*q++ = *p++;
3748 
3749 		/* Replace %p with %px */
3750 		if (p[-1] == '%') {
3751 			if (p[0] == '%') {
3752 				*q++ = *p++;
3753 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3754 				*q++ = *p++;
3755 				*q++ = 'x';
3756 			}
3757 		}
3758 	}
3759 	*q = '\0';
3760 
3761 	return new_fmt;
3762 }
3763 
3764 #define STATIC_TEMP_BUF_SIZE	128
3765 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3766 
3767 /* Find the next real entry, without updating the iterator itself */
3768 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3769 					  int *ent_cpu, u64 *ent_ts)
3770 {
3771 	/* __find_next_entry will reset ent_size */
3772 	int ent_size = iter->ent_size;
3773 	struct trace_entry *entry;
3774 
3775 	/*
3776 	 * If called from ftrace_dump(), then the iter->temp buffer
3777 	 * will be the static_temp_buf and not created from kmalloc.
3778 	 * If the entry size is greater than the buffer, we can
3779 	 * not save it. Just return NULL in that case. This is only
3780 	 * used to add markers when two consecutive events' time
3781 	 * stamps have a large delta. See trace_print_lat_context()
3782 	 */
3783 	if (iter->temp == static_temp_buf &&
3784 	    STATIC_TEMP_BUF_SIZE < ent_size)
3785 		return NULL;
3786 
3787 	/*
3788 	 * The __find_next_entry() may call peek_next_entry(), which may
3789 	 * call ring_buffer_peek() that may make the contents of iter->ent
3790 	 * undefined. Need to copy iter->ent now.
3791 	 */
3792 	if (iter->ent && iter->ent != iter->temp) {
3793 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3794 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3795 			void *temp;
3796 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3797 			if (!temp)
3798 				return NULL;
3799 			kfree(iter->temp);
3800 			iter->temp = temp;
3801 			iter->temp_size = iter->ent_size;
3802 		}
3803 		memcpy(iter->temp, iter->ent, iter->ent_size);
3804 		iter->ent = iter->temp;
3805 	}
3806 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3807 	/* Put back the original ent_size */
3808 	iter->ent_size = ent_size;
3809 
3810 	return entry;
3811 }
3812 
3813 /* Find the next real entry, and increment the iterator to the next entry */
3814 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3815 {
3816 	iter->ent = __find_next_entry(iter, &iter->cpu,
3817 				      &iter->lost_events, &iter->ts);
3818 
3819 	if (iter->ent)
3820 		trace_iterator_increment(iter);
3821 
3822 	return iter->ent ? iter : NULL;
3823 }
3824 
3825 static void trace_consume(struct trace_iterator *iter)
3826 {
3827 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3828 			    &iter->lost_events);
3829 }
3830 
3831 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3832 {
3833 	struct trace_iterator *iter = m->private;
3834 	int i = (int)*pos;
3835 	void *ent;
3836 
3837 	WARN_ON_ONCE(iter->leftover);
3838 
3839 	(*pos)++;
3840 
3841 	/* can't go backwards */
3842 	if (iter->idx > i)
3843 		return NULL;
3844 
3845 	if (iter->idx < 0)
3846 		ent = trace_find_next_entry_inc(iter);
3847 	else
3848 		ent = iter;
3849 
3850 	while (ent && iter->idx < i)
3851 		ent = trace_find_next_entry_inc(iter);
3852 
3853 	iter->pos = *pos;
3854 
3855 	return ent;
3856 }
3857 
3858 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3859 {
3860 	struct ring_buffer_iter *buf_iter;
3861 	unsigned long entries = 0;
3862 	u64 ts;
3863 
3864 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3865 
3866 	buf_iter = trace_buffer_iter(iter, cpu);
3867 	if (!buf_iter)
3868 		return;
3869 
3870 	ring_buffer_iter_reset(buf_iter);
3871 
3872 	/*
3873 	 * We could have the case with the max latency tracers
3874 	 * that a reset never took place on a cpu. This is evident
3875 	 * by the timestamp being before the start of the buffer.
3876 	 */
3877 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3878 		if (ts >= iter->array_buffer->time_start)
3879 			break;
3880 		entries++;
3881 		ring_buffer_iter_advance(buf_iter);
3882 		/* This could be a big loop */
3883 		cond_resched();
3884 	}
3885 
3886 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3887 }
3888 
3889 /*
3890  * The current tracer is copied to avoid a global locking
3891  * all around.
3892  */
3893 static void *s_start(struct seq_file *m, loff_t *pos)
3894 {
3895 	struct trace_iterator *iter = m->private;
3896 	struct trace_array *tr = iter->tr;
3897 	int cpu_file = iter->cpu_file;
3898 	void *p = NULL;
3899 	loff_t l = 0;
3900 	int cpu;
3901 
3902 	mutex_lock(&trace_types_lock);
3903 	if (unlikely(tr->current_trace != iter->trace)) {
3904 		/* Close iter->trace before switching to the new current tracer */
3905 		if (iter->trace->close)
3906 			iter->trace->close(iter);
3907 		iter->trace = tr->current_trace;
3908 		/* Reopen the new current tracer */
3909 		if (iter->trace->open)
3910 			iter->trace->open(iter);
3911 	}
3912 	mutex_unlock(&trace_types_lock);
3913 
3914 #ifdef CONFIG_TRACER_MAX_TRACE
3915 	if (iter->snapshot && iter->trace->use_max_tr)
3916 		return ERR_PTR(-EBUSY);
3917 #endif
3918 
3919 	if (*pos != iter->pos) {
3920 		iter->ent = NULL;
3921 		iter->cpu = 0;
3922 		iter->idx = -1;
3923 
3924 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3925 			for_each_tracing_cpu(cpu)
3926 				tracing_iter_reset(iter, cpu);
3927 		} else
3928 			tracing_iter_reset(iter, cpu_file);
3929 
3930 		iter->leftover = 0;
3931 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3932 			;
3933 
3934 	} else {
3935 		/*
3936 		 * If we overflowed the seq_file before, then we want
3937 		 * to just reuse the trace_seq buffer again.
3938 		 */
3939 		if (iter->leftover)
3940 			p = iter;
3941 		else {
3942 			l = *pos - 1;
3943 			p = s_next(m, p, &l);
3944 		}
3945 	}
3946 
3947 	trace_event_read_lock();
3948 	trace_access_lock(cpu_file);
3949 	return p;
3950 }
3951 
3952 static void s_stop(struct seq_file *m, void *p)
3953 {
3954 	struct trace_iterator *iter = m->private;
3955 
3956 #ifdef CONFIG_TRACER_MAX_TRACE
3957 	if (iter->snapshot && iter->trace->use_max_tr)
3958 		return;
3959 #endif
3960 
3961 	trace_access_unlock(iter->cpu_file);
3962 	trace_event_read_unlock();
3963 }
3964 
3965 static void
3966 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3967 		      unsigned long *entries, int cpu)
3968 {
3969 	unsigned long count;
3970 
3971 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3972 	/*
3973 	 * If this buffer has skipped entries, then we hold all
3974 	 * entries for the trace and we need to ignore the
3975 	 * ones before the time stamp.
3976 	 */
3977 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3978 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3979 		/* total is the same as the entries */
3980 		*total = count;
3981 	} else
3982 		*total = count +
3983 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3984 	*entries = count;
3985 }
3986 
3987 static void
3988 get_total_entries(struct array_buffer *buf,
3989 		  unsigned long *total, unsigned long *entries)
3990 {
3991 	unsigned long t, e;
3992 	int cpu;
3993 
3994 	*total = 0;
3995 	*entries = 0;
3996 
3997 	for_each_tracing_cpu(cpu) {
3998 		get_total_entries_cpu(buf, &t, &e, cpu);
3999 		*total += t;
4000 		*entries += e;
4001 	}
4002 }
4003 
4004 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4005 {
4006 	unsigned long total, entries;
4007 
4008 	if (!tr)
4009 		tr = &global_trace;
4010 
4011 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4012 
4013 	return entries;
4014 }
4015 
4016 unsigned long trace_total_entries(struct trace_array *tr)
4017 {
4018 	unsigned long total, entries;
4019 
4020 	if (!tr)
4021 		tr = &global_trace;
4022 
4023 	get_total_entries(&tr->array_buffer, &total, &entries);
4024 
4025 	return entries;
4026 }
4027 
4028 static void print_lat_help_header(struct seq_file *m)
4029 {
4030 	seq_puts(m, "#                    _------=> CPU#            \n"
4031 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4032 		    "#                  | / _----=> need-resched    \n"
4033 		    "#                  || / _---=> hardirq/softirq \n"
4034 		    "#                  ||| / _--=> preempt-depth   \n"
4035 		    "#                  |||| / _-=> migrate-disable \n"
4036 		    "#                  ||||| /     delay           \n"
4037 		    "#  cmd     pid     |||||| time  |   caller     \n"
4038 		    "#     \\   /        ||||||  \\    |    /       \n");
4039 }
4040 
4041 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4042 {
4043 	unsigned long total;
4044 	unsigned long entries;
4045 
4046 	get_total_entries(buf, &total, &entries);
4047 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4048 		   entries, total, num_online_cpus());
4049 	seq_puts(m, "#\n");
4050 }
4051 
4052 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4053 				   unsigned int flags)
4054 {
4055 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4056 
4057 	print_event_info(buf, m);
4058 
4059 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4060 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4061 }
4062 
4063 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4064 				       unsigned int flags)
4065 {
4066 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4067 	static const char space[] = "            ";
4068 	int prec = tgid ? 12 : 2;
4069 
4070 	print_event_info(buf, m);
4071 
4072 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4073 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4074 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4075 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4076 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4077 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4078 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4079 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4080 }
4081 
4082 void
4083 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4084 {
4085 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4086 	struct array_buffer *buf = iter->array_buffer;
4087 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4088 	struct tracer *type = iter->trace;
4089 	unsigned long entries;
4090 	unsigned long total;
4091 	const char *name = type->name;
4092 
4093 	get_total_entries(buf, &total, &entries);
4094 
4095 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4096 		   name, init_utsname()->release);
4097 	seq_puts(m, "# -----------------------------------"
4098 		 "---------------------------------\n");
4099 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4100 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4101 		   nsecs_to_usecs(data->saved_latency),
4102 		   entries,
4103 		   total,
4104 		   buf->cpu,
4105 		   preempt_model_none()      ? "server" :
4106 		   preempt_model_voluntary() ? "desktop" :
4107 		   preempt_model_full()      ? "preempt" :
4108 		   preempt_model_lazy()	     ? "lazy"    :
4109 		   preempt_model_rt()        ? "preempt_rt" :
4110 		   "unknown",
4111 		   /* These are reserved for later use */
4112 		   0, 0, 0, 0);
4113 #ifdef CONFIG_SMP
4114 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4115 #else
4116 	seq_puts(m, ")\n");
4117 #endif
4118 	seq_puts(m, "#    -----------------\n");
4119 	seq_printf(m, "#    | task: %.16s-%d "
4120 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4121 		   data->comm, data->pid,
4122 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4123 		   data->policy, data->rt_priority);
4124 	seq_puts(m, "#    -----------------\n");
4125 
4126 	if (data->critical_start) {
4127 		seq_puts(m, "#  => started at: ");
4128 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4129 		trace_print_seq(m, &iter->seq);
4130 		seq_puts(m, "\n#  => ended at:   ");
4131 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4132 		trace_print_seq(m, &iter->seq);
4133 		seq_puts(m, "\n#\n");
4134 	}
4135 
4136 	seq_puts(m, "#\n");
4137 }
4138 
4139 static void test_cpu_buff_start(struct trace_iterator *iter)
4140 {
4141 	struct trace_seq *s = &iter->seq;
4142 	struct trace_array *tr = iter->tr;
4143 
4144 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4145 		return;
4146 
4147 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4148 		return;
4149 
4150 	if (cpumask_available(iter->started) &&
4151 	    cpumask_test_cpu(iter->cpu, iter->started))
4152 		return;
4153 
4154 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4155 		return;
4156 
4157 	if (cpumask_available(iter->started))
4158 		cpumask_set_cpu(iter->cpu, iter->started);
4159 
4160 	/* Don't print started cpu buffer for the first entry of the trace */
4161 	if (iter->idx > 1)
4162 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4163 				iter->cpu);
4164 }
4165 
4166 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4167 {
4168 	struct trace_array *tr = iter->tr;
4169 	struct trace_seq *s = &iter->seq;
4170 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4171 	struct trace_entry *entry;
4172 	struct trace_event *event;
4173 
4174 	entry = iter->ent;
4175 
4176 	test_cpu_buff_start(iter);
4177 
4178 	event = ftrace_find_event(entry->type);
4179 
4180 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4181 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4182 			trace_print_lat_context(iter);
4183 		else
4184 			trace_print_context(iter);
4185 	}
4186 
4187 	if (trace_seq_has_overflowed(s))
4188 		return TRACE_TYPE_PARTIAL_LINE;
4189 
4190 	if (event) {
4191 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4192 			return print_event_fields(iter, event);
4193 		/*
4194 		 * For TRACE_EVENT() events, the print_fmt is not
4195 		 * safe to use if the array has delta offsets
4196 		 * Force printing via the fields.
4197 		 */
4198 		if ((tr->text_delta) &&
4199 		    event->type > __TRACE_LAST_TYPE)
4200 			return print_event_fields(iter, event);
4201 
4202 		return event->funcs->trace(iter, sym_flags, event);
4203 	}
4204 
4205 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4206 
4207 	return trace_handle_return(s);
4208 }
4209 
4210 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4211 {
4212 	struct trace_array *tr = iter->tr;
4213 	struct trace_seq *s = &iter->seq;
4214 	struct trace_entry *entry;
4215 	struct trace_event *event;
4216 
4217 	entry = iter->ent;
4218 
4219 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4220 		trace_seq_printf(s, "%d %d %llu ",
4221 				 entry->pid, iter->cpu, iter->ts);
4222 
4223 	if (trace_seq_has_overflowed(s))
4224 		return TRACE_TYPE_PARTIAL_LINE;
4225 
4226 	event = ftrace_find_event(entry->type);
4227 	if (event)
4228 		return event->funcs->raw(iter, 0, event);
4229 
4230 	trace_seq_printf(s, "%d ?\n", entry->type);
4231 
4232 	return trace_handle_return(s);
4233 }
4234 
4235 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4236 {
4237 	struct trace_array *tr = iter->tr;
4238 	struct trace_seq *s = &iter->seq;
4239 	unsigned char newline = '\n';
4240 	struct trace_entry *entry;
4241 	struct trace_event *event;
4242 
4243 	entry = iter->ent;
4244 
4245 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4246 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4247 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4248 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4249 		if (trace_seq_has_overflowed(s))
4250 			return TRACE_TYPE_PARTIAL_LINE;
4251 	}
4252 
4253 	event = ftrace_find_event(entry->type);
4254 	if (event) {
4255 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4256 		if (ret != TRACE_TYPE_HANDLED)
4257 			return ret;
4258 	}
4259 
4260 	SEQ_PUT_FIELD(s, newline);
4261 
4262 	return trace_handle_return(s);
4263 }
4264 
4265 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4266 {
4267 	struct trace_array *tr = iter->tr;
4268 	struct trace_seq *s = &iter->seq;
4269 	struct trace_entry *entry;
4270 	struct trace_event *event;
4271 
4272 	entry = iter->ent;
4273 
4274 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4275 		SEQ_PUT_FIELD(s, entry->pid);
4276 		SEQ_PUT_FIELD(s, iter->cpu);
4277 		SEQ_PUT_FIELD(s, iter->ts);
4278 		if (trace_seq_has_overflowed(s))
4279 			return TRACE_TYPE_PARTIAL_LINE;
4280 	}
4281 
4282 	event = ftrace_find_event(entry->type);
4283 	return event ? event->funcs->binary(iter, 0, event) :
4284 		TRACE_TYPE_HANDLED;
4285 }
4286 
4287 int trace_empty(struct trace_iterator *iter)
4288 {
4289 	struct ring_buffer_iter *buf_iter;
4290 	int cpu;
4291 
4292 	/* If we are looking at one CPU buffer, only check that one */
4293 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4294 		cpu = iter->cpu_file;
4295 		buf_iter = trace_buffer_iter(iter, cpu);
4296 		if (buf_iter) {
4297 			if (!ring_buffer_iter_empty(buf_iter))
4298 				return 0;
4299 		} else {
4300 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4301 				return 0;
4302 		}
4303 		return 1;
4304 	}
4305 
4306 	for_each_tracing_cpu(cpu) {
4307 		buf_iter = trace_buffer_iter(iter, cpu);
4308 		if (buf_iter) {
4309 			if (!ring_buffer_iter_empty(buf_iter))
4310 				return 0;
4311 		} else {
4312 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4313 				return 0;
4314 		}
4315 	}
4316 
4317 	return 1;
4318 }
4319 
4320 /*  Called with trace_event_read_lock() held. */
4321 enum print_line_t print_trace_line(struct trace_iterator *iter)
4322 {
4323 	struct trace_array *tr = iter->tr;
4324 	unsigned long trace_flags = tr->trace_flags;
4325 	enum print_line_t ret;
4326 
4327 	if (iter->lost_events) {
4328 		if (iter->lost_events == (unsigned long)-1)
4329 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4330 					 iter->cpu);
4331 		else
4332 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4333 					 iter->cpu, iter->lost_events);
4334 		if (trace_seq_has_overflowed(&iter->seq))
4335 			return TRACE_TYPE_PARTIAL_LINE;
4336 	}
4337 
4338 	if (iter->trace && iter->trace->print_line) {
4339 		ret = iter->trace->print_line(iter);
4340 		if (ret != TRACE_TYPE_UNHANDLED)
4341 			return ret;
4342 	}
4343 
4344 	if (iter->ent->type == TRACE_BPUTS &&
4345 			trace_flags & TRACE_ITER_PRINTK &&
4346 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4347 		return trace_print_bputs_msg_only(iter);
4348 
4349 	if (iter->ent->type == TRACE_BPRINT &&
4350 			trace_flags & TRACE_ITER_PRINTK &&
4351 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4352 		return trace_print_bprintk_msg_only(iter);
4353 
4354 	if (iter->ent->type == TRACE_PRINT &&
4355 			trace_flags & TRACE_ITER_PRINTK &&
4356 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4357 		return trace_print_printk_msg_only(iter);
4358 
4359 	if (trace_flags & TRACE_ITER_BIN)
4360 		return print_bin_fmt(iter);
4361 
4362 	if (trace_flags & TRACE_ITER_HEX)
4363 		return print_hex_fmt(iter);
4364 
4365 	if (trace_flags & TRACE_ITER_RAW)
4366 		return print_raw_fmt(iter);
4367 
4368 	return print_trace_fmt(iter);
4369 }
4370 
4371 void trace_latency_header(struct seq_file *m)
4372 {
4373 	struct trace_iterator *iter = m->private;
4374 	struct trace_array *tr = iter->tr;
4375 
4376 	/* print nothing if the buffers are empty */
4377 	if (trace_empty(iter))
4378 		return;
4379 
4380 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4381 		print_trace_header(m, iter);
4382 
4383 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4384 		print_lat_help_header(m);
4385 }
4386 
4387 void trace_default_header(struct seq_file *m)
4388 {
4389 	struct trace_iterator *iter = m->private;
4390 	struct trace_array *tr = iter->tr;
4391 	unsigned long trace_flags = tr->trace_flags;
4392 
4393 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4394 		return;
4395 
4396 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4397 		/* print nothing if the buffers are empty */
4398 		if (trace_empty(iter))
4399 			return;
4400 		print_trace_header(m, iter);
4401 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4402 			print_lat_help_header(m);
4403 	} else {
4404 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4405 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4406 				print_func_help_header_irq(iter->array_buffer,
4407 							   m, trace_flags);
4408 			else
4409 				print_func_help_header(iter->array_buffer, m,
4410 						       trace_flags);
4411 		}
4412 	}
4413 }
4414 
4415 static void test_ftrace_alive(struct seq_file *m)
4416 {
4417 	if (!ftrace_is_dead())
4418 		return;
4419 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4420 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4421 }
4422 
4423 #ifdef CONFIG_TRACER_MAX_TRACE
4424 static void show_snapshot_main_help(struct seq_file *m)
4425 {
4426 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4427 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4428 		    "#                      Takes a snapshot of the main buffer.\n"
4429 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4430 		    "#                      (Doesn't have to be '2' works with any number that\n"
4431 		    "#                       is not a '0' or '1')\n");
4432 }
4433 
4434 static void show_snapshot_percpu_help(struct seq_file *m)
4435 {
4436 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4437 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4438 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4439 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4440 #else
4441 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4442 		    "#                     Must use main snapshot file to allocate.\n");
4443 #endif
4444 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4445 		    "#                      (Doesn't have to be '2' works with any number that\n"
4446 		    "#                       is not a '0' or '1')\n");
4447 }
4448 
4449 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4450 {
4451 	if (iter->tr->allocated_snapshot)
4452 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4453 	else
4454 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4455 
4456 	seq_puts(m, "# Snapshot commands:\n");
4457 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4458 		show_snapshot_main_help(m);
4459 	else
4460 		show_snapshot_percpu_help(m);
4461 }
4462 #else
4463 /* Should never be called */
4464 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4465 #endif
4466 
4467 static int s_show(struct seq_file *m, void *v)
4468 {
4469 	struct trace_iterator *iter = v;
4470 	int ret;
4471 
4472 	if (iter->ent == NULL) {
4473 		if (iter->tr) {
4474 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4475 			seq_puts(m, "#\n");
4476 			test_ftrace_alive(m);
4477 		}
4478 		if (iter->snapshot && trace_empty(iter))
4479 			print_snapshot_help(m, iter);
4480 		else if (iter->trace && iter->trace->print_header)
4481 			iter->trace->print_header(m);
4482 		else
4483 			trace_default_header(m);
4484 
4485 	} else if (iter->leftover) {
4486 		/*
4487 		 * If we filled the seq_file buffer earlier, we
4488 		 * want to just show it now.
4489 		 */
4490 		ret = trace_print_seq(m, &iter->seq);
4491 
4492 		/* ret should this time be zero, but you never know */
4493 		iter->leftover = ret;
4494 
4495 	} else {
4496 		ret = print_trace_line(iter);
4497 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4498 			iter->seq.full = 0;
4499 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4500 		}
4501 		ret = trace_print_seq(m, &iter->seq);
4502 		/*
4503 		 * If we overflow the seq_file buffer, then it will
4504 		 * ask us for this data again at start up.
4505 		 * Use that instead.
4506 		 *  ret is 0 if seq_file write succeeded.
4507 		 *        -1 otherwise.
4508 		 */
4509 		iter->leftover = ret;
4510 	}
4511 
4512 	return 0;
4513 }
4514 
4515 /*
4516  * Should be used after trace_array_get(), trace_types_lock
4517  * ensures that i_cdev was already initialized.
4518  */
4519 static inline int tracing_get_cpu(struct inode *inode)
4520 {
4521 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4522 		return (long)inode->i_cdev - 1;
4523 	return RING_BUFFER_ALL_CPUS;
4524 }
4525 
4526 static const struct seq_operations tracer_seq_ops = {
4527 	.start		= s_start,
4528 	.next		= s_next,
4529 	.stop		= s_stop,
4530 	.show		= s_show,
4531 };
4532 
4533 /*
4534  * Note, as iter itself can be allocated and freed in different
4535  * ways, this function is only used to free its content, and not
4536  * the iterator itself. The only requirement to all the allocations
4537  * is that it must zero all fields (kzalloc), as freeing works with
4538  * ethier allocated content or NULL.
4539  */
4540 static void free_trace_iter_content(struct trace_iterator *iter)
4541 {
4542 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4543 	if (iter->fmt != static_fmt_buf)
4544 		kfree(iter->fmt);
4545 
4546 	kfree(iter->temp);
4547 	kfree(iter->buffer_iter);
4548 	mutex_destroy(&iter->mutex);
4549 	free_cpumask_var(iter->started);
4550 }
4551 
4552 static struct trace_iterator *
4553 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4554 {
4555 	struct trace_array *tr = inode->i_private;
4556 	struct trace_iterator *iter;
4557 	int cpu;
4558 
4559 	if (tracing_disabled)
4560 		return ERR_PTR(-ENODEV);
4561 
4562 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4563 	if (!iter)
4564 		return ERR_PTR(-ENOMEM);
4565 
4566 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4567 				    GFP_KERNEL);
4568 	if (!iter->buffer_iter)
4569 		goto release;
4570 
4571 	/*
4572 	 * trace_find_next_entry() may need to save off iter->ent.
4573 	 * It will place it into the iter->temp buffer. As most
4574 	 * events are less than 128, allocate a buffer of that size.
4575 	 * If one is greater, then trace_find_next_entry() will
4576 	 * allocate a new buffer to adjust for the bigger iter->ent.
4577 	 * It's not critical if it fails to get allocated here.
4578 	 */
4579 	iter->temp = kmalloc(128, GFP_KERNEL);
4580 	if (iter->temp)
4581 		iter->temp_size = 128;
4582 
4583 	/*
4584 	 * trace_event_printf() may need to modify given format
4585 	 * string to replace %p with %px so that it shows real address
4586 	 * instead of hash value. However, that is only for the event
4587 	 * tracing, other tracer may not need. Defer the allocation
4588 	 * until it is needed.
4589 	 */
4590 	iter->fmt = NULL;
4591 	iter->fmt_size = 0;
4592 
4593 	mutex_lock(&trace_types_lock);
4594 	iter->trace = tr->current_trace;
4595 
4596 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4597 		goto fail;
4598 
4599 	iter->tr = tr;
4600 
4601 #ifdef CONFIG_TRACER_MAX_TRACE
4602 	/* Currently only the top directory has a snapshot */
4603 	if (tr->current_trace->print_max || snapshot)
4604 		iter->array_buffer = &tr->max_buffer;
4605 	else
4606 #endif
4607 		iter->array_buffer = &tr->array_buffer;
4608 	iter->snapshot = snapshot;
4609 	iter->pos = -1;
4610 	iter->cpu_file = tracing_get_cpu(inode);
4611 	mutex_init(&iter->mutex);
4612 
4613 	/* Notify the tracer early; before we stop tracing. */
4614 	if (iter->trace->open)
4615 		iter->trace->open(iter);
4616 
4617 	/* Annotate start of buffers if we had overruns */
4618 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4619 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4620 
4621 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4622 	if (trace_clocks[tr->clock_id].in_ns)
4623 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4624 
4625 	/*
4626 	 * If pause-on-trace is enabled, then stop the trace while
4627 	 * dumping, unless this is the "snapshot" file
4628 	 */
4629 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4630 		tracing_stop_tr(tr);
4631 
4632 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4633 		for_each_tracing_cpu(cpu) {
4634 			iter->buffer_iter[cpu] =
4635 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4636 							 cpu, GFP_KERNEL);
4637 		}
4638 		ring_buffer_read_prepare_sync();
4639 		for_each_tracing_cpu(cpu) {
4640 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4641 			tracing_iter_reset(iter, cpu);
4642 		}
4643 	} else {
4644 		cpu = iter->cpu_file;
4645 		iter->buffer_iter[cpu] =
4646 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4647 						 cpu, GFP_KERNEL);
4648 		ring_buffer_read_prepare_sync();
4649 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4650 		tracing_iter_reset(iter, cpu);
4651 	}
4652 
4653 	mutex_unlock(&trace_types_lock);
4654 
4655 	return iter;
4656 
4657  fail:
4658 	mutex_unlock(&trace_types_lock);
4659 	free_trace_iter_content(iter);
4660 release:
4661 	seq_release_private(inode, file);
4662 	return ERR_PTR(-ENOMEM);
4663 }
4664 
4665 int tracing_open_generic(struct inode *inode, struct file *filp)
4666 {
4667 	int ret;
4668 
4669 	ret = tracing_check_open_get_tr(NULL);
4670 	if (ret)
4671 		return ret;
4672 
4673 	filp->private_data = inode->i_private;
4674 	return 0;
4675 }
4676 
4677 bool tracing_is_disabled(void)
4678 {
4679 	return (tracing_disabled) ? true: false;
4680 }
4681 
4682 /*
4683  * Open and update trace_array ref count.
4684  * Must have the current trace_array passed to it.
4685  */
4686 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4687 {
4688 	struct trace_array *tr = inode->i_private;
4689 	int ret;
4690 
4691 	ret = tracing_check_open_get_tr(tr);
4692 	if (ret)
4693 		return ret;
4694 
4695 	filp->private_data = inode->i_private;
4696 
4697 	return 0;
4698 }
4699 
4700 /*
4701  * The private pointer of the inode is the trace_event_file.
4702  * Update the tr ref count associated to it.
4703  */
4704 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4705 {
4706 	struct trace_event_file *file = inode->i_private;
4707 	int ret;
4708 
4709 	ret = tracing_check_open_get_tr(file->tr);
4710 	if (ret)
4711 		return ret;
4712 
4713 	mutex_lock(&event_mutex);
4714 
4715 	/* Fail if the file is marked for removal */
4716 	if (file->flags & EVENT_FILE_FL_FREED) {
4717 		trace_array_put(file->tr);
4718 		ret = -ENODEV;
4719 	} else {
4720 		event_file_get(file);
4721 	}
4722 
4723 	mutex_unlock(&event_mutex);
4724 	if (ret)
4725 		return ret;
4726 
4727 	filp->private_data = inode->i_private;
4728 
4729 	return 0;
4730 }
4731 
4732 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4733 {
4734 	struct trace_event_file *file = inode->i_private;
4735 
4736 	trace_array_put(file->tr);
4737 	event_file_put(file);
4738 
4739 	return 0;
4740 }
4741 
4742 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4743 {
4744 	tracing_release_file_tr(inode, filp);
4745 	return single_release(inode, filp);
4746 }
4747 
4748 static int tracing_mark_open(struct inode *inode, struct file *filp)
4749 {
4750 	stream_open(inode, filp);
4751 	return tracing_open_generic_tr(inode, filp);
4752 }
4753 
4754 static int tracing_release(struct inode *inode, struct file *file)
4755 {
4756 	struct trace_array *tr = inode->i_private;
4757 	struct seq_file *m = file->private_data;
4758 	struct trace_iterator *iter;
4759 	int cpu;
4760 
4761 	if (!(file->f_mode & FMODE_READ)) {
4762 		trace_array_put(tr);
4763 		return 0;
4764 	}
4765 
4766 	/* Writes do not use seq_file */
4767 	iter = m->private;
4768 	mutex_lock(&trace_types_lock);
4769 
4770 	for_each_tracing_cpu(cpu) {
4771 		if (iter->buffer_iter[cpu])
4772 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4773 	}
4774 
4775 	if (iter->trace && iter->trace->close)
4776 		iter->trace->close(iter);
4777 
4778 	if (!iter->snapshot && tr->stop_count)
4779 		/* reenable tracing if it was previously enabled */
4780 		tracing_start_tr(tr);
4781 
4782 	__trace_array_put(tr);
4783 
4784 	mutex_unlock(&trace_types_lock);
4785 
4786 	free_trace_iter_content(iter);
4787 	seq_release_private(inode, file);
4788 
4789 	return 0;
4790 }
4791 
4792 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4793 {
4794 	struct trace_array *tr = inode->i_private;
4795 
4796 	trace_array_put(tr);
4797 	return 0;
4798 }
4799 
4800 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4801 {
4802 	struct trace_array *tr = inode->i_private;
4803 
4804 	trace_array_put(tr);
4805 
4806 	return single_release(inode, file);
4807 }
4808 
4809 static int tracing_open(struct inode *inode, struct file *file)
4810 {
4811 	struct trace_array *tr = inode->i_private;
4812 	struct trace_iterator *iter;
4813 	int ret;
4814 
4815 	ret = tracing_check_open_get_tr(tr);
4816 	if (ret)
4817 		return ret;
4818 
4819 	/* If this file was open for write, then erase contents */
4820 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4821 		int cpu = tracing_get_cpu(inode);
4822 		struct array_buffer *trace_buf = &tr->array_buffer;
4823 
4824 #ifdef CONFIG_TRACER_MAX_TRACE
4825 		if (tr->current_trace->print_max)
4826 			trace_buf = &tr->max_buffer;
4827 #endif
4828 
4829 		if (cpu == RING_BUFFER_ALL_CPUS)
4830 			tracing_reset_online_cpus(trace_buf);
4831 		else
4832 			tracing_reset_cpu(trace_buf, cpu);
4833 	}
4834 
4835 	if (file->f_mode & FMODE_READ) {
4836 		iter = __tracing_open(inode, file, false);
4837 		if (IS_ERR(iter))
4838 			ret = PTR_ERR(iter);
4839 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4840 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4841 	}
4842 
4843 	if (ret < 0)
4844 		trace_array_put(tr);
4845 
4846 	return ret;
4847 }
4848 
4849 /*
4850  * Some tracers are not suitable for instance buffers.
4851  * A tracer is always available for the global array (toplevel)
4852  * or if it explicitly states that it is.
4853  */
4854 static bool
4855 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4856 {
4857 #ifdef CONFIG_TRACER_SNAPSHOT
4858 	/* arrays with mapped buffer range do not have snapshots */
4859 	if (tr->range_addr_start && t->use_max_tr)
4860 		return false;
4861 #endif
4862 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4863 }
4864 
4865 /* Find the next tracer that this trace array may use */
4866 static struct tracer *
4867 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4868 {
4869 	while (t && !trace_ok_for_array(t, tr))
4870 		t = t->next;
4871 
4872 	return t;
4873 }
4874 
4875 static void *
4876 t_next(struct seq_file *m, void *v, loff_t *pos)
4877 {
4878 	struct trace_array *tr = m->private;
4879 	struct tracer *t = v;
4880 
4881 	(*pos)++;
4882 
4883 	if (t)
4884 		t = get_tracer_for_array(tr, t->next);
4885 
4886 	return t;
4887 }
4888 
4889 static void *t_start(struct seq_file *m, loff_t *pos)
4890 {
4891 	struct trace_array *tr = m->private;
4892 	struct tracer *t;
4893 	loff_t l = 0;
4894 
4895 	mutex_lock(&trace_types_lock);
4896 
4897 	t = get_tracer_for_array(tr, trace_types);
4898 	for (; t && l < *pos; t = t_next(m, t, &l))
4899 			;
4900 
4901 	return t;
4902 }
4903 
4904 static void t_stop(struct seq_file *m, void *p)
4905 {
4906 	mutex_unlock(&trace_types_lock);
4907 }
4908 
4909 static int t_show(struct seq_file *m, void *v)
4910 {
4911 	struct tracer *t = v;
4912 
4913 	if (!t)
4914 		return 0;
4915 
4916 	seq_puts(m, t->name);
4917 	if (t->next)
4918 		seq_putc(m, ' ');
4919 	else
4920 		seq_putc(m, '\n');
4921 
4922 	return 0;
4923 }
4924 
4925 static const struct seq_operations show_traces_seq_ops = {
4926 	.start		= t_start,
4927 	.next		= t_next,
4928 	.stop		= t_stop,
4929 	.show		= t_show,
4930 };
4931 
4932 static int show_traces_open(struct inode *inode, struct file *file)
4933 {
4934 	struct trace_array *tr = inode->i_private;
4935 	struct seq_file *m;
4936 	int ret;
4937 
4938 	ret = tracing_check_open_get_tr(tr);
4939 	if (ret)
4940 		return ret;
4941 
4942 	ret = seq_open(file, &show_traces_seq_ops);
4943 	if (ret) {
4944 		trace_array_put(tr);
4945 		return ret;
4946 	}
4947 
4948 	m = file->private_data;
4949 	m->private = tr;
4950 
4951 	return 0;
4952 }
4953 
4954 static int tracing_seq_release(struct inode *inode, struct file *file)
4955 {
4956 	struct trace_array *tr = inode->i_private;
4957 
4958 	trace_array_put(tr);
4959 	return seq_release(inode, file);
4960 }
4961 
4962 static ssize_t
4963 tracing_write_stub(struct file *filp, const char __user *ubuf,
4964 		   size_t count, loff_t *ppos)
4965 {
4966 	return count;
4967 }
4968 
4969 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4970 {
4971 	int ret;
4972 
4973 	if (file->f_mode & FMODE_READ)
4974 		ret = seq_lseek(file, offset, whence);
4975 	else
4976 		file->f_pos = ret = 0;
4977 
4978 	return ret;
4979 }
4980 
4981 static const struct file_operations tracing_fops = {
4982 	.open		= tracing_open,
4983 	.read		= seq_read,
4984 	.read_iter	= seq_read_iter,
4985 	.splice_read	= copy_splice_read,
4986 	.write		= tracing_write_stub,
4987 	.llseek		= tracing_lseek,
4988 	.release	= tracing_release,
4989 };
4990 
4991 static const struct file_operations show_traces_fops = {
4992 	.open		= show_traces_open,
4993 	.read		= seq_read,
4994 	.llseek		= seq_lseek,
4995 	.release	= tracing_seq_release,
4996 };
4997 
4998 static ssize_t
4999 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5000 		     size_t count, loff_t *ppos)
5001 {
5002 	struct trace_array *tr = file_inode(filp)->i_private;
5003 	char *mask_str;
5004 	int len;
5005 
5006 	len = snprintf(NULL, 0, "%*pb\n",
5007 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5008 	mask_str = kmalloc(len, GFP_KERNEL);
5009 	if (!mask_str)
5010 		return -ENOMEM;
5011 
5012 	len = snprintf(mask_str, len, "%*pb\n",
5013 		       cpumask_pr_args(tr->tracing_cpumask));
5014 	if (len >= count) {
5015 		count = -EINVAL;
5016 		goto out_err;
5017 	}
5018 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5019 
5020 out_err:
5021 	kfree(mask_str);
5022 
5023 	return count;
5024 }
5025 
5026 int tracing_set_cpumask(struct trace_array *tr,
5027 			cpumask_var_t tracing_cpumask_new)
5028 {
5029 	int cpu;
5030 
5031 	if (!tr)
5032 		return -EINVAL;
5033 
5034 	local_irq_disable();
5035 	arch_spin_lock(&tr->max_lock);
5036 	for_each_tracing_cpu(cpu) {
5037 		/*
5038 		 * Increase/decrease the disabled counter if we are
5039 		 * about to flip a bit in the cpumask:
5040 		 */
5041 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5042 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5043 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5044 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5045 #ifdef CONFIG_TRACER_MAX_TRACE
5046 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5047 #endif
5048 		}
5049 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5050 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5051 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5052 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5053 #ifdef CONFIG_TRACER_MAX_TRACE
5054 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5055 #endif
5056 		}
5057 	}
5058 	arch_spin_unlock(&tr->max_lock);
5059 	local_irq_enable();
5060 
5061 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5062 
5063 	return 0;
5064 }
5065 
5066 static ssize_t
5067 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5068 		      size_t count, loff_t *ppos)
5069 {
5070 	struct trace_array *tr = file_inode(filp)->i_private;
5071 	cpumask_var_t tracing_cpumask_new;
5072 	int err;
5073 
5074 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5075 		return -EINVAL;
5076 
5077 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5078 		return -ENOMEM;
5079 
5080 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5081 	if (err)
5082 		goto err_free;
5083 
5084 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5085 	if (err)
5086 		goto err_free;
5087 
5088 	free_cpumask_var(tracing_cpumask_new);
5089 
5090 	return count;
5091 
5092 err_free:
5093 	free_cpumask_var(tracing_cpumask_new);
5094 
5095 	return err;
5096 }
5097 
5098 static const struct file_operations tracing_cpumask_fops = {
5099 	.open		= tracing_open_generic_tr,
5100 	.read		= tracing_cpumask_read,
5101 	.write		= tracing_cpumask_write,
5102 	.release	= tracing_release_generic_tr,
5103 	.llseek		= generic_file_llseek,
5104 };
5105 
5106 static int tracing_trace_options_show(struct seq_file *m, void *v)
5107 {
5108 	struct tracer_opt *trace_opts;
5109 	struct trace_array *tr = m->private;
5110 	u32 tracer_flags;
5111 	int i;
5112 
5113 	guard(mutex)(&trace_types_lock);
5114 
5115 	tracer_flags = tr->current_trace->flags->val;
5116 	trace_opts = tr->current_trace->flags->opts;
5117 
5118 	for (i = 0; trace_options[i]; i++) {
5119 		if (tr->trace_flags & (1 << i))
5120 			seq_printf(m, "%s\n", trace_options[i]);
5121 		else
5122 			seq_printf(m, "no%s\n", trace_options[i]);
5123 	}
5124 
5125 	for (i = 0; trace_opts[i].name; i++) {
5126 		if (tracer_flags & trace_opts[i].bit)
5127 			seq_printf(m, "%s\n", trace_opts[i].name);
5128 		else
5129 			seq_printf(m, "no%s\n", trace_opts[i].name);
5130 	}
5131 
5132 	return 0;
5133 }
5134 
5135 static int __set_tracer_option(struct trace_array *tr,
5136 			       struct tracer_flags *tracer_flags,
5137 			       struct tracer_opt *opts, int neg)
5138 {
5139 	struct tracer *trace = tracer_flags->trace;
5140 	int ret;
5141 
5142 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5143 	if (ret)
5144 		return ret;
5145 
5146 	if (neg)
5147 		tracer_flags->val &= ~opts->bit;
5148 	else
5149 		tracer_flags->val |= opts->bit;
5150 	return 0;
5151 }
5152 
5153 /* Try to assign a tracer specific option */
5154 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5155 {
5156 	struct tracer *trace = tr->current_trace;
5157 	struct tracer_flags *tracer_flags = trace->flags;
5158 	struct tracer_opt *opts = NULL;
5159 	int i;
5160 
5161 	for (i = 0; tracer_flags->opts[i].name; i++) {
5162 		opts = &tracer_flags->opts[i];
5163 
5164 		if (strcmp(cmp, opts->name) == 0)
5165 			return __set_tracer_option(tr, trace->flags, opts, neg);
5166 	}
5167 
5168 	return -EINVAL;
5169 }
5170 
5171 /* Some tracers require overwrite to stay enabled */
5172 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5173 {
5174 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5175 		return -1;
5176 
5177 	return 0;
5178 }
5179 
5180 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5181 {
5182 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5183 	    (mask == TRACE_ITER_RECORD_CMD) ||
5184 	    (mask == TRACE_ITER_TRACE_PRINTK))
5185 		lockdep_assert_held(&event_mutex);
5186 
5187 	/* do nothing if flag is already set */
5188 	if (!!(tr->trace_flags & mask) == !!enabled)
5189 		return 0;
5190 
5191 	/* Give the tracer a chance to approve the change */
5192 	if (tr->current_trace->flag_changed)
5193 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5194 			return -EINVAL;
5195 
5196 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5197 		if (enabled) {
5198 			update_printk_trace(tr);
5199 		} else {
5200 			/*
5201 			 * The global_trace cannot clear this.
5202 			 * It's flag only gets cleared if another instance sets it.
5203 			 */
5204 			if (printk_trace == &global_trace)
5205 				return -EINVAL;
5206 			/*
5207 			 * An instance must always have it set.
5208 			 * by default, that's the global_trace instane.
5209 			 */
5210 			if (printk_trace == tr)
5211 				update_printk_trace(&global_trace);
5212 		}
5213 	}
5214 
5215 	if (enabled)
5216 		tr->trace_flags |= mask;
5217 	else
5218 		tr->trace_flags &= ~mask;
5219 
5220 	if (mask == TRACE_ITER_RECORD_CMD)
5221 		trace_event_enable_cmd_record(enabled);
5222 
5223 	if (mask == TRACE_ITER_RECORD_TGID) {
5224 
5225 		if (trace_alloc_tgid_map() < 0) {
5226 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5227 			return -ENOMEM;
5228 		}
5229 
5230 		trace_event_enable_tgid_record(enabled);
5231 	}
5232 
5233 	if (mask == TRACE_ITER_EVENT_FORK)
5234 		trace_event_follow_fork(tr, enabled);
5235 
5236 	if (mask == TRACE_ITER_FUNC_FORK)
5237 		ftrace_pid_follow_fork(tr, enabled);
5238 
5239 	if (mask == TRACE_ITER_OVERWRITE) {
5240 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5241 #ifdef CONFIG_TRACER_MAX_TRACE
5242 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5243 #endif
5244 	}
5245 
5246 	if (mask == TRACE_ITER_PRINTK) {
5247 		trace_printk_start_stop_comm(enabled);
5248 		trace_printk_control(enabled);
5249 	}
5250 
5251 	return 0;
5252 }
5253 
5254 int trace_set_options(struct trace_array *tr, char *option)
5255 {
5256 	char *cmp;
5257 	int neg = 0;
5258 	int ret;
5259 	size_t orig_len = strlen(option);
5260 	int len;
5261 
5262 	cmp = strstrip(option);
5263 
5264 	len = str_has_prefix(cmp, "no");
5265 	if (len)
5266 		neg = 1;
5267 
5268 	cmp += len;
5269 
5270 	mutex_lock(&event_mutex);
5271 	mutex_lock(&trace_types_lock);
5272 
5273 	ret = match_string(trace_options, -1, cmp);
5274 	/* If no option could be set, test the specific tracer options */
5275 	if (ret < 0)
5276 		ret = set_tracer_option(tr, cmp, neg);
5277 	else
5278 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5279 
5280 	mutex_unlock(&trace_types_lock);
5281 	mutex_unlock(&event_mutex);
5282 
5283 	/*
5284 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5285 	 * turn it back into a space.
5286 	 */
5287 	if (orig_len > strlen(option))
5288 		option[strlen(option)] = ' ';
5289 
5290 	return ret;
5291 }
5292 
5293 static void __init apply_trace_boot_options(void)
5294 {
5295 	char *buf = trace_boot_options_buf;
5296 	char *option;
5297 
5298 	while (true) {
5299 		option = strsep(&buf, ",");
5300 
5301 		if (!option)
5302 			break;
5303 
5304 		if (*option)
5305 			trace_set_options(&global_trace, option);
5306 
5307 		/* Put back the comma to allow this to be called again */
5308 		if (buf)
5309 			*(buf - 1) = ',';
5310 	}
5311 }
5312 
5313 static ssize_t
5314 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5315 			size_t cnt, loff_t *ppos)
5316 {
5317 	struct seq_file *m = filp->private_data;
5318 	struct trace_array *tr = m->private;
5319 	char buf[64];
5320 	int ret;
5321 
5322 	if (cnt >= sizeof(buf))
5323 		return -EINVAL;
5324 
5325 	if (copy_from_user(buf, ubuf, cnt))
5326 		return -EFAULT;
5327 
5328 	buf[cnt] = 0;
5329 
5330 	ret = trace_set_options(tr, buf);
5331 	if (ret < 0)
5332 		return ret;
5333 
5334 	*ppos += cnt;
5335 
5336 	return cnt;
5337 }
5338 
5339 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5340 {
5341 	struct trace_array *tr = inode->i_private;
5342 	int ret;
5343 
5344 	ret = tracing_check_open_get_tr(tr);
5345 	if (ret)
5346 		return ret;
5347 
5348 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5349 	if (ret < 0)
5350 		trace_array_put(tr);
5351 
5352 	return ret;
5353 }
5354 
5355 static const struct file_operations tracing_iter_fops = {
5356 	.open		= tracing_trace_options_open,
5357 	.read		= seq_read,
5358 	.llseek		= seq_lseek,
5359 	.release	= tracing_single_release_tr,
5360 	.write		= tracing_trace_options_write,
5361 };
5362 
5363 static const char readme_msg[] =
5364 	"tracing mini-HOWTO:\n\n"
5365 	"By default tracefs removes all OTH file permission bits.\n"
5366 	"When mounting tracefs an optional group id can be specified\n"
5367 	"which adds the group to every directory and file in tracefs:\n\n"
5368 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5369 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5370 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5371 	" Important files:\n"
5372 	"  trace\t\t\t- The static contents of the buffer\n"
5373 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5374 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5375 	"  current_tracer\t- function and latency tracers\n"
5376 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5377 	"  error_log\t- error log for failed commands (that support it)\n"
5378 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5379 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5380 	"  trace_clock\t\t- change the clock used to order events\n"
5381 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5382 	"      global:   Synced across CPUs but slows tracing down.\n"
5383 	"     counter:   Not a clock, but just an increment\n"
5384 	"      uptime:   Jiffy counter from time of boot\n"
5385 	"        perf:   Same clock that perf events use\n"
5386 #ifdef CONFIG_X86_64
5387 	"     x86-tsc:   TSC cycle counter\n"
5388 #endif
5389 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5390 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5391 	"    absolute:   Absolute (standalone) timestamp\n"
5392 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5393 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5394 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5395 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5396 	"\t\t\t  Remove sub-buffer with rmdir\n"
5397 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5398 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5399 	"\t\t\t  option name\n"
5400 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5401 #ifdef CONFIG_DYNAMIC_FTRACE
5402 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5403 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5404 	"\t\t\t  functions\n"
5405 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5406 	"\t     modules: Can select a group via module\n"
5407 	"\t      Format: :mod:<module-name>\n"
5408 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5409 	"\t    triggers: a command to perform when function is hit\n"
5410 	"\t      Format: <function>:<trigger>[:count]\n"
5411 	"\t     trigger: traceon, traceoff\n"
5412 	"\t\t      enable_event:<system>:<event>\n"
5413 	"\t\t      disable_event:<system>:<event>\n"
5414 #ifdef CONFIG_STACKTRACE
5415 	"\t\t      stacktrace\n"
5416 #endif
5417 #ifdef CONFIG_TRACER_SNAPSHOT
5418 	"\t\t      snapshot\n"
5419 #endif
5420 	"\t\t      dump\n"
5421 	"\t\t      cpudump\n"
5422 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5423 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5424 	"\t     The first one will disable tracing every time do_fault is hit\n"
5425 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5426 	"\t       The first time do trap is hit and it disables tracing, the\n"
5427 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5428 	"\t       the counter will not decrement. It only decrements when the\n"
5429 	"\t       trigger did work\n"
5430 	"\t     To remove trigger without count:\n"
5431 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5432 	"\t     To remove trigger with a count:\n"
5433 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5434 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5435 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5436 	"\t    modules: Can select a group via module command :mod:\n"
5437 	"\t    Does not accept triggers\n"
5438 #endif /* CONFIG_DYNAMIC_FTRACE */
5439 #ifdef CONFIG_FUNCTION_TRACER
5440 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5441 	"\t\t    (function)\n"
5442 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5443 	"\t\t    (function)\n"
5444 #endif
5445 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5446 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5447 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5448 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5449 #endif
5450 #ifdef CONFIG_TRACER_SNAPSHOT
5451 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5452 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5453 	"\t\t\t  information\n"
5454 #endif
5455 #ifdef CONFIG_STACK_TRACER
5456 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5457 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5458 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5459 	"\t\t\t  new trace)\n"
5460 #ifdef CONFIG_DYNAMIC_FTRACE
5461 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5462 	"\t\t\t  traces\n"
5463 #endif
5464 #endif /* CONFIG_STACK_TRACER */
5465 #ifdef CONFIG_DYNAMIC_EVENTS
5466 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5467 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5468 #endif
5469 #ifdef CONFIG_KPROBE_EVENTS
5470 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5471 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5472 #endif
5473 #ifdef CONFIG_UPROBE_EVENTS
5474 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5475 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5476 #endif
5477 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5478     defined(CONFIG_FPROBE_EVENTS)
5479 	"\t  accepts: event-definitions (one definition per line)\n"
5480 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5481 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5482 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5483 #endif
5484 #ifdef CONFIG_FPROBE_EVENTS
5485 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5486 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5487 #endif
5488 #ifdef CONFIG_HIST_TRIGGERS
5489 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5490 #endif
5491 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5492 	"\t           -:[<group>/][<event>]\n"
5493 #ifdef CONFIG_KPROBE_EVENTS
5494 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5495   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5496 #endif
5497 #ifdef CONFIG_UPROBE_EVENTS
5498   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5499 #endif
5500 	"\t     args: <name>=fetcharg[:type]\n"
5501 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5502 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5503 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5504 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5505 	"\t           <argname>[->field[->field|.field...]],\n"
5506 #endif
5507 #else
5508 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5509 #endif
5510 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5511 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5512 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5513 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5514 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5515 #ifdef CONFIG_HIST_TRIGGERS
5516 	"\t    field: <stype> <name>;\n"
5517 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5518 	"\t           [unsigned] char/int/long\n"
5519 #endif
5520 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5521 	"\t            of the <attached-group>/<attached-event>.\n"
5522 #endif
5523 	"  set_event\t\t- Enables events by name written into it\n"
5524 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5525 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5526 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5527 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5528 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5529 	"\t\t\t  events\n"
5530 	"      filter\t\t- If set, only events passing filter are traced\n"
5531 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5532 	"\t\t\t  <event>:\n"
5533 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5534 	"      filter\t\t- If set, only events passing filter are traced\n"
5535 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5536 	"\t    Format: <trigger>[:count][if <filter>]\n"
5537 	"\t   trigger: traceon, traceoff\n"
5538 	"\t            enable_event:<system>:<event>\n"
5539 	"\t            disable_event:<system>:<event>\n"
5540 #ifdef CONFIG_HIST_TRIGGERS
5541 	"\t            enable_hist:<system>:<event>\n"
5542 	"\t            disable_hist:<system>:<event>\n"
5543 #endif
5544 #ifdef CONFIG_STACKTRACE
5545 	"\t\t    stacktrace\n"
5546 #endif
5547 #ifdef CONFIG_TRACER_SNAPSHOT
5548 	"\t\t    snapshot\n"
5549 #endif
5550 #ifdef CONFIG_HIST_TRIGGERS
5551 	"\t\t    hist (see below)\n"
5552 #endif
5553 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5554 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5555 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5556 	"\t                  events/block/block_unplug/trigger\n"
5557 	"\t   The first disables tracing every time block_unplug is hit.\n"
5558 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5559 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5560 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5561 	"\t   Like function triggers, the counter is only decremented if it\n"
5562 	"\t    enabled or disabled tracing.\n"
5563 	"\t   To remove a trigger without a count:\n"
5564 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5565 	"\t   To remove a trigger with a count:\n"
5566 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5567 	"\t   Filters can be ignored when removing a trigger.\n"
5568 #ifdef CONFIG_HIST_TRIGGERS
5569 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5570 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5571 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5572 	"\t            [:values=<field1[,field2,...]>]\n"
5573 	"\t            [:sort=<field1[,field2,...]>]\n"
5574 	"\t            [:size=#entries]\n"
5575 	"\t            [:pause][:continue][:clear]\n"
5576 	"\t            [:name=histname1]\n"
5577 	"\t            [:nohitcount]\n"
5578 	"\t            [:<handler>.<action>]\n"
5579 	"\t            [if <filter>]\n\n"
5580 	"\t    Note, special fields can be used as well:\n"
5581 	"\t            common_timestamp - to record current timestamp\n"
5582 	"\t            common_cpu - to record the CPU the event happened on\n"
5583 	"\n"
5584 	"\t    A hist trigger variable can be:\n"
5585 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5586 	"\t        - a reference to another variable e.g. y=$x,\n"
5587 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5588 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5589 	"\n"
5590 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5591 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5592 	"\t    variable reference, field or numeric literal.\n"
5593 	"\n"
5594 	"\t    When a matching event is hit, an entry is added to a hash\n"
5595 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5596 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5597 	"\t    correspond to fields in the event's format description.  Keys\n"
5598 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5599 	"\t    Compound keys consisting of up to two fields can be specified\n"
5600 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5601 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5602 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5603 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5604 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5605 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5606 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5607 	"\t    its histogram data will be shared with other triggers of the\n"
5608 	"\t    same name, and trigger hits will update this common data.\n\n"
5609 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5610 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5611 	"\t    triggers attached to an event, there will be a table for each\n"
5612 	"\t    trigger in the output.  The table displayed for a named\n"
5613 	"\t    trigger will be the same as any other instance having the\n"
5614 	"\t    same name.  The default format used to display a given field\n"
5615 	"\t    can be modified by appending any of the following modifiers\n"
5616 	"\t    to the field name, as applicable:\n\n"
5617 	"\t            .hex        display a number as a hex value\n"
5618 	"\t            .sym        display an address as a symbol\n"
5619 	"\t            .sym-offset display an address as a symbol and offset\n"
5620 	"\t            .execname   display a common_pid as a program name\n"
5621 	"\t            .syscall    display a syscall id as a syscall name\n"
5622 	"\t            .log2       display log2 value rather than raw number\n"
5623 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5624 	"\t            .usecs      display a common_timestamp in microseconds\n"
5625 	"\t            .percent    display a number of percentage value\n"
5626 	"\t            .graph      display a bar-graph of a value\n\n"
5627 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5628 	"\t    trigger or to start a hist trigger but not log any events\n"
5629 	"\t    until told to do so.  'continue' can be used to start or\n"
5630 	"\t    restart a paused hist trigger.\n\n"
5631 	"\t    The 'clear' parameter will clear the contents of a running\n"
5632 	"\t    hist trigger and leave its current paused/active state\n"
5633 	"\t    unchanged.\n\n"
5634 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5635 	"\t    raw hitcount in the histogram.\n\n"
5636 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5637 	"\t    have one event conditionally start and stop another event's\n"
5638 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5639 	"\t    the enable_event and disable_event triggers.\n\n"
5640 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5641 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5642 	"\t        <handler>.<action>\n\n"
5643 	"\t    The available handlers are:\n\n"
5644 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5645 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5646 	"\t        onchange(var)            - invoke action if var changes\n\n"
5647 	"\t    The available actions are:\n\n"
5648 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5649 	"\t        save(field,...)                      - save current event fields\n"
5650 #ifdef CONFIG_TRACER_SNAPSHOT
5651 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5652 #endif
5653 #ifdef CONFIG_SYNTH_EVENTS
5654 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5655 	"\t  Write into this file to define/undefine new synthetic events.\n"
5656 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5657 #endif
5658 #endif
5659 ;
5660 
5661 static ssize_t
5662 tracing_readme_read(struct file *filp, char __user *ubuf,
5663 		       size_t cnt, loff_t *ppos)
5664 {
5665 	return simple_read_from_buffer(ubuf, cnt, ppos,
5666 					readme_msg, strlen(readme_msg));
5667 }
5668 
5669 static const struct file_operations tracing_readme_fops = {
5670 	.open		= tracing_open_generic,
5671 	.read		= tracing_readme_read,
5672 	.llseek		= generic_file_llseek,
5673 };
5674 
5675 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5676 static union trace_eval_map_item *
5677 update_eval_map(union trace_eval_map_item *ptr)
5678 {
5679 	if (!ptr->map.eval_string) {
5680 		if (ptr->tail.next) {
5681 			ptr = ptr->tail.next;
5682 			/* Set ptr to the next real item (skip head) */
5683 			ptr++;
5684 		} else
5685 			return NULL;
5686 	}
5687 	return ptr;
5688 }
5689 
5690 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5691 {
5692 	union trace_eval_map_item *ptr = v;
5693 
5694 	/*
5695 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5696 	 * This really should never happen.
5697 	 */
5698 	(*pos)++;
5699 	ptr = update_eval_map(ptr);
5700 	if (WARN_ON_ONCE(!ptr))
5701 		return NULL;
5702 
5703 	ptr++;
5704 	ptr = update_eval_map(ptr);
5705 
5706 	return ptr;
5707 }
5708 
5709 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5710 {
5711 	union trace_eval_map_item *v;
5712 	loff_t l = 0;
5713 
5714 	mutex_lock(&trace_eval_mutex);
5715 
5716 	v = trace_eval_maps;
5717 	if (v)
5718 		v++;
5719 
5720 	while (v && l < *pos) {
5721 		v = eval_map_next(m, v, &l);
5722 	}
5723 
5724 	return v;
5725 }
5726 
5727 static void eval_map_stop(struct seq_file *m, void *v)
5728 {
5729 	mutex_unlock(&trace_eval_mutex);
5730 }
5731 
5732 static int eval_map_show(struct seq_file *m, void *v)
5733 {
5734 	union trace_eval_map_item *ptr = v;
5735 
5736 	seq_printf(m, "%s %ld (%s)\n",
5737 		   ptr->map.eval_string, ptr->map.eval_value,
5738 		   ptr->map.system);
5739 
5740 	return 0;
5741 }
5742 
5743 static const struct seq_operations tracing_eval_map_seq_ops = {
5744 	.start		= eval_map_start,
5745 	.next		= eval_map_next,
5746 	.stop		= eval_map_stop,
5747 	.show		= eval_map_show,
5748 };
5749 
5750 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5751 {
5752 	int ret;
5753 
5754 	ret = tracing_check_open_get_tr(NULL);
5755 	if (ret)
5756 		return ret;
5757 
5758 	return seq_open(filp, &tracing_eval_map_seq_ops);
5759 }
5760 
5761 static const struct file_operations tracing_eval_map_fops = {
5762 	.open		= tracing_eval_map_open,
5763 	.read		= seq_read,
5764 	.llseek		= seq_lseek,
5765 	.release	= seq_release,
5766 };
5767 
5768 static inline union trace_eval_map_item *
5769 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5770 {
5771 	/* Return tail of array given the head */
5772 	return ptr + ptr->head.length + 1;
5773 }
5774 
5775 static void
5776 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5777 			   int len)
5778 {
5779 	struct trace_eval_map **stop;
5780 	struct trace_eval_map **map;
5781 	union trace_eval_map_item *map_array;
5782 	union trace_eval_map_item *ptr;
5783 
5784 	stop = start + len;
5785 
5786 	/*
5787 	 * The trace_eval_maps contains the map plus a head and tail item,
5788 	 * where the head holds the module and length of array, and the
5789 	 * tail holds a pointer to the next list.
5790 	 */
5791 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5792 	if (!map_array) {
5793 		pr_warn("Unable to allocate trace eval mapping\n");
5794 		return;
5795 	}
5796 
5797 	guard(mutex)(&trace_eval_mutex);
5798 
5799 	if (!trace_eval_maps)
5800 		trace_eval_maps = map_array;
5801 	else {
5802 		ptr = trace_eval_maps;
5803 		for (;;) {
5804 			ptr = trace_eval_jmp_to_tail(ptr);
5805 			if (!ptr->tail.next)
5806 				break;
5807 			ptr = ptr->tail.next;
5808 
5809 		}
5810 		ptr->tail.next = map_array;
5811 	}
5812 	map_array->head.mod = mod;
5813 	map_array->head.length = len;
5814 	map_array++;
5815 
5816 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5817 		map_array->map = **map;
5818 		map_array++;
5819 	}
5820 	memset(map_array, 0, sizeof(*map_array));
5821 }
5822 
5823 static void trace_create_eval_file(struct dentry *d_tracer)
5824 {
5825 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5826 			  NULL, &tracing_eval_map_fops);
5827 }
5828 
5829 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5830 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5831 static inline void trace_insert_eval_map_file(struct module *mod,
5832 			      struct trace_eval_map **start, int len) { }
5833 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5834 
5835 static void trace_insert_eval_map(struct module *mod,
5836 				  struct trace_eval_map **start, int len)
5837 {
5838 	struct trace_eval_map **map;
5839 
5840 	if (len <= 0)
5841 		return;
5842 
5843 	map = start;
5844 
5845 	trace_event_eval_update(map, len);
5846 
5847 	trace_insert_eval_map_file(mod, start, len);
5848 }
5849 
5850 static ssize_t
5851 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5852 		       size_t cnt, loff_t *ppos)
5853 {
5854 	struct trace_array *tr = filp->private_data;
5855 	char buf[MAX_TRACER_SIZE+2];
5856 	int r;
5857 
5858 	mutex_lock(&trace_types_lock);
5859 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5860 	mutex_unlock(&trace_types_lock);
5861 
5862 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5863 }
5864 
5865 int tracer_init(struct tracer *t, struct trace_array *tr)
5866 {
5867 	tracing_reset_online_cpus(&tr->array_buffer);
5868 	return t->init(tr);
5869 }
5870 
5871 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5872 {
5873 	int cpu;
5874 
5875 	for_each_tracing_cpu(cpu)
5876 		per_cpu_ptr(buf->data, cpu)->entries = val;
5877 }
5878 
5879 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5880 {
5881 	if (cpu == RING_BUFFER_ALL_CPUS) {
5882 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5883 	} else {
5884 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5885 	}
5886 }
5887 
5888 #ifdef CONFIG_TRACER_MAX_TRACE
5889 /* resize @tr's buffer to the size of @size_tr's entries */
5890 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5891 					struct array_buffer *size_buf, int cpu_id)
5892 {
5893 	int cpu, ret = 0;
5894 
5895 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5896 		for_each_tracing_cpu(cpu) {
5897 			ret = ring_buffer_resize(trace_buf->buffer,
5898 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5899 			if (ret < 0)
5900 				break;
5901 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5902 				per_cpu_ptr(size_buf->data, cpu)->entries;
5903 		}
5904 	} else {
5905 		ret = ring_buffer_resize(trace_buf->buffer,
5906 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5907 		if (ret == 0)
5908 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5909 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5910 	}
5911 
5912 	return ret;
5913 }
5914 #endif /* CONFIG_TRACER_MAX_TRACE */
5915 
5916 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5917 					unsigned long size, int cpu)
5918 {
5919 	int ret;
5920 
5921 	/*
5922 	 * If kernel or user changes the size of the ring buffer
5923 	 * we use the size that was given, and we can forget about
5924 	 * expanding it later.
5925 	 */
5926 	trace_set_ring_buffer_expanded(tr);
5927 
5928 	/* May be called before buffers are initialized */
5929 	if (!tr->array_buffer.buffer)
5930 		return 0;
5931 
5932 	/* Do not allow tracing while resizing ring buffer */
5933 	tracing_stop_tr(tr);
5934 
5935 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5936 	if (ret < 0)
5937 		goto out_start;
5938 
5939 #ifdef CONFIG_TRACER_MAX_TRACE
5940 	if (!tr->allocated_snapshot)
5941 		goto out;
5942 
5943 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5944 	if (ret < 0) {
5945 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5946 						     &tr->array_buffer, cpu);
5947 		if (r < 0) {
5948 			/*
5949 			 * AARGH! We are left with different
5950 			 * size max buffer!!!!
5951 			 * The max buffer is our "snapshot" buffer.
5952 			 * When a tracer needs a snapshot (one of the
5953 			 * latency tracers), it swaps the max buffer
5954 			 * with the saved snap shot. We succeeded to
5955 			 * update the size of the main buffer, but failed to
5956 			 * update the size of the max buffer. But when we tried
5957 			 * to reset the main buffer to the original size, we
5958 			 * failed there too. This is very unlikely to
5959 			 * happen, but if it does, warn and kill all
5960 			 * tracing.
5961 			 */
5962 			WARN_ON(1);
5963 			tracing_disabled = 1;
5964 		}
5965 		goto out_start;
5966 	}
5967 
5968 	update_buffer_entries(&tr->max_buffer, cpu);
5969 
5970  out:
5971 #endif /* CONFIG_TRACER_MAX_TRACE */
5972 
5973 	update_buffer_entries(&tr->array_buffer, cpu);
5974  out_start:
5975 	tracing_start_tr(tr);
5976 	return ret;
5977 }
5978 
5979 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5980 				  unsigned long size, int cpu_id)
5981 {
5982 	guard(mutex)(&trace_types_lock);
5983 
5984 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5985 		/* make sure, this cpu is enabled in the mask */
5986 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5987 			return -EINVAL;
5988 	}
5989 
5990 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
5991 }
5992 
5993 struct trace_mod_entry {
5994 	unsigned long	mod_addr;
5995 	char		mod_name[MODULE_NAME_LEN];
5996 };
5997 
5998 struct trace_scratch {
5999 	unsigned long		text_addr;
6000 	unsigned long		nr_entries;
6001 	struct trace_mod_entry	entries[];
6002 };
6003 
6004 static DEFINE_MUTEX(scratch_mutex);
6005 
6006 static int cmp_mod_entry(const void *key, const void *pivot)
6007 {
6008 	unsigned long addr = (unsigned long)key;
6009 	const struct trace_mod_entry *ent = pivot;
6010 
6011 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6012 		return 0;
6013 	else
6014 		return addr - ent->mod_addr;
6015 }
6016 
6017 /**
6018  * trace_adjust_address() - Adjust prev boot address to current address.
6019  * @tr: Persistent ring buffer's trace_array.
6020  * @addr: Address in @tr which is adjusted.
6021  */
6022 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6023 {
6024 	struct trace_module_delta *module_delta;
6025 	struct trace_scratch *tscratch;
6026 	struct trace_mod_entry *entry;
6027 	int idx = 0, nr_entries;
6028 
6029 	/* If we don't have last boot delta, return the address */
6030 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6031 		return addr;
6032 
6033 	/* tr->module_delta must be protected by rcu. */
6034 	guard(rcu)();
6035 	tscratch = tr->scratch;
6036 	/* if there is no tscrach, module_delta must be NULL. */
6037 	module_delta = READ_ONCE(tr->module_delta);
6038 	if (!module_delta || tscratch->entries[0].mod_addr > addr)
6039 		return addr + tr->text_delta;
6040 
6041 	/* Note that entries must be sorted. */
6042 	nr_entries = tscratch->nr_entries;
6043 	if (nr_entries == 1 ||
6044 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6045 		idx = nr_entries - 1;
6046 	else {
6047 		entry = __inline_bsearch((void *)addr,
6048 				tscratch->entries,
6049 				nr_entries - 1,
6050 				sizeof(tscratch->entries[0]),
6051 				cmp_mod_entry);
6052 		if (entry)
6053 			idx = entry - tscratch->entries;
6054 	}
6055 
6056 	return addr + module_delta->delta[idx];
6057 }
6058 
6059 #ifdef CONFIG_MODULES
6060 static int save_mod(struct module *mod, void *data)
6061 {
6062 	struct trace_array *tr = data;
6063 	struct trace_scratch *tscratch;
6064 	struct trace_mod_entry *entry;
6065 	unsigned int size;
6066 
6067 	tscratch = tr->scratch;
6068 	if (!tscratch)
6069 		return -1;
6070 	size = tr->scratch_size;
6071 
6072 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6073 		return -1;
6074 
6075 	entry = &tscratch->entries[tscratch->nr_entries];
6076 
6077 	tscratch->nr_entries++;
6078 
6079 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6080 	strscpy(entry->mod_name, mod->name);
6081 
6082 	return 0;
6083 }
6084 #else
6085 static int save_mod(struct module *mod, void *data)
6086 {
6087 	return 0;
6088 }
6089 #endif
6090 
6091 static void update_last_data(struct trace_array *tr)
6092 {
6093 	struct trace_module_delta *module_delta;
6094 	struct trace_scratch *tscratch;
6095 
6096 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6097 		return;
6098 
6099 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6100 		return;
6101 
6102 	/* Only if the buffer has previous boot data clear and update it. */
6103 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6104 
6105 	/* Reset the module list and reload them */
6106 	if (tr->scratch) {
6107 		struct trace_scratch *tscratch = tr->scratch;
6108 
6109 		memset(tscratch->entries, 0,
6110 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6111 		tscratch->nr_entries = 0;
6112 
6113 		guard(mutex)(&scratch_mutex);
6114 		module_for_each_mod(save_mod, tr);
6115 	}
6116 
6117 	/*
6118 	 * Need to clear all CPU buffers as there cannot be events
6119 	 * from the previous boot mixed with events with this boot
6120 	 * as that will cause a confusing trace. Need to clear all
6121 	 * CPU buffers, even for those that may currently be offline.
6122 	 */
6123 	tracing_reset_all_cpus(&tr->array_buffer);
6124 
6125 	/* Using current data now */
6126 	tr->text_delta = 0;
6127 
6128 	if (!tr->scratch)
6129 		return;
6130 
6131 	tscratch = tr->scratch;
6132 	module_delta = READ_ONCE(tr->module_delta);
6133 	WRITE_ONCE(tr->module_delta, NULL);
6134 	kfree_rcu(module_delta, rcu);
6135 
6136 	/* Set the persistent ring buffer meta data to this address */
6137 	tscratch->text_addr = (unsigned long)_text;
6138 }
6139 
6140 /**
6141  * tracing_update_buffers - used by tracing facility to expand ring buffers
6142  * @tr: The tracing instance
6143  *
6144  * To save on memory when the tracing is never used on a system with it
6145  * configured in. The ring buffers are set to a minimum size. But once
6146  * a user starts to use the tracing facility, then they need to grow
6147  * to their default size.
6148  *
6149  * This function is to be called when a tracer is about to be used.
6150  */
6151 int tracing_update_buffers(struct trace_array *tr)
6152 {
6153 	int ret = 0;
6154 
6155 	mutex_lock(&trace_types_lock);
6156 
6157 	update_last_data(tr);
6158 
6159 	if (!tr->ring_buffer_expanded)
6160 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6161 						RING_BUFFER_ALL_CPUS);
6162 	mutex_unlock(&trace_types_lock);
6163 
6164 	return ret;
6165 }
6166 
6167 struct trace_option_dentry;
6168 
6169 static void
6170 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6171 
6172 /*
6173  * Used to clear out the tracer before deletion of an instance.
6174  * Must have trace_types_lock held.
6175  */
6176 static void tracing_set_nop(struct trace_array *tr)
6177 {
6178 	if (tr->current_trace == &nop_trace)
6179 		return;
6180 
6181 	tr->current_trace->enabled--;
6182 
6183 	if (tr->current_trace->reset)
6184 		tr->current_trace->reset(tr);
6185 
6186 	tr->current_trace = &nop_trace;
6187 }
6188 
6189 static bool tracer_options_updated;
6190 
6191 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6192 {
6193 	/* Only enable if the directory has been created already. */
6194 	if (!tr->dir)
6195 		return;
6196 
6197 	/* Only create trace option files after update_tracer_options finish */
6198 	if (!tracer_options_updated)
6199 		return;
6200 
6201 	create_trace_option_files(tr, t);
6202 }
6203 
6204 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6205 {
6206 	struct tracer *t;
6207 #ifdef CONFIG_TRACER_MAX_TRACE
6208 	bool had_max_tr;
6209 #endif
6210 	int ret;
6211 
6212 	guard(mutex)(&trace_types_lock);
6213 
6214 	update_last_data(tr);
6215 
6216 	if (!tr->ring_buffer_expanded) {
6217 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6218 						RING_BUFFER_ALL_CPUS);
6219 		if (ret < 0)
6220 			return ret;
6221 		ret = 0;
6222 	}
6223 
6224 	for (t = trace_types; t; t = t->next) {
6225 		if (strcmp(t->name, buf) == 0)
6226 			break;
6227 	}
6228 	if (!t)
6229 		return -EINVAL;
6230 
6231 	if (t == tr->current_trace)
6232 		return 0;
6233 
6234 #ifdef CONFIG_TRACER_SNAPSHOT
6235 	if (t->use_max_tr) {
6236 		local_irq_disable();
6237 		arch_spin_lock(&tr->max_lock);
6238 		ret = tr->cond_snapshot ? -EBUSY : 0;
6239 		arch_spin_unlock(&tr->max_lock);
6240 		local_irq_enable();
6241 		if (ret)
6242 			return ret;
6243 	}
6244 #endif
6245 	/* Some tracers won't work on kernel command line */
6246 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6247 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6248 			t->name);
6249 		return -EINVAL;
6250 	}
6251 
6252 	/* Some tracers are only allowed for the top level buffer */
6253 	if (!trace_ok_for_array(t, tr))
6254 		return -EINVAL;
6255 
6256 	/* If trace pipe files are being read, we can't change the tracer */
6257 	if (tr->trace_ref)
6258 		return -EBUSY;
6259 
6260 	trace_branch_disable();
6261 
6262 	tr->current_trace->enabled--;
6263 
6264 	if (tr->current_trace->reset)
6265 		tr->current_trace->reset(tr);
6266 
6267 #ifdef CONFIG_TRACER_MAX_TRACE
6268 	had_max_tr = tr->current_trace->use_max_tr;
6269 
6270 	/* Current trace needs to be nop_trace before synchronize_rcu */
6271 	tr->current_trace = &nop_trace;
6272 
6273 	if (had_max_tr && !t->use_max_tr) {
6274 		/*
6275 		 * We need to make sure that the update_max_tr sees that
6276 		 * current_trace changed to nop_trace to keep it from
6277 		 * swapping the buffers after we resize it.
6278 		 * The update_max_tr is called from interrupts disabled
6279 		 * so a synchronized_sched() is sufficient.
6280 		 */
6281 		synchronize_rcu();
6282 		free_snapshot(tr);
6283 		tracing_disarm_snapshot(tr);
6284 	}
6285 
6286 	if (!had_max_tr && t->use_max_tr) {
6287 		ret = tracing_arm_snapshot_locked(tr);
6288 		if (ret)
6289 			return ret;
6290 	}
6291 #else
6292 	tr->current_trace = &nop_trace;
6293 #endif
6294 
6295 	if (t->init) {
6296 		ret = tracer_init(t, tr);
6297 		if (ret) {
6298 #ifdef CONFIG_TRACER_MAX_TRACE
6299 			if (t->use_max_tr)
6300 				tracing_disarm_snapshot(tr);
6301 #endif
6302 			return ret;
6303 		}
6304 	}
6305 
6306 	tr->current_trace = t;
6307 	tr->current_trace->enabled++;
6308 	trace_branch_enable(tr);
6309 
6310 	return 0;
6311 }
6312 
6313 static ssize_t
6314 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6315 			size_t cnt, loff_t *ppos)
6316 {
6317 	struct trace_array *tr = filp->private_data;
6318 	char buf[MAX_TRACER_SIZE+1];
6319 	char *name;
6320 	size_t ret;
6321 	int err;
6322 
6323 	ret = cnt;
6324 
6325 	if (cnt > MAX_TRACER_SIZE)
6326 		cnt = MAX_TRACER_SIZE;
6327 
6328 	if (copy_from_user(buf, ubuf, cnt))
6329 		return -EFAULT;
6330 
6331 	buf[cnt] = 0;
6332 
6333 	name = strim(buf);
6334 
6335 	err = tracing_set_tracer(tr, name);
6336 	if (err)
6337 		return err;
6338 
6339 	*ppos += ret;
6340 
6341 	return ret;
6342 }
6343 
6344 static ssize_t
6345 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6346 		   size_t cnt, loff_t *ppos)
6347 {
6348 	char buf[64];
6349 	int r;
6350 
6351 	r = snprintf(buf, sizeof(buf), "%ld\n",
6352 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6353 	if (r > sizeof(buf))
6354 		r = sizeof(buf);
6355 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6356 }
6357 
6358 static ssize_t
6359 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6360 		    size_t cnt, loff_t *ppos)
6361 {
6362 	unsigned long val;
6363 	int ret;
6364 
6365 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6366 	if (ret)
6367 		return ret;
6368 
6369 	*ptr = val * 1000;
6370 
6371 	return cnt;
6372 }
6373 
6374 static ssize_t
6375 tracing_thresh_read(struct file *filp, char __user *ubuf,
6376 		    size_t cnt, loff_t *ppos)
6377 {
6378 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6379 }
6380 
6381 static ssize_t
6382 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6383 		     size_t cnt, loff_t *ppos)
6384 {
6385 	struct trace_array *tr = filp->private_data;
6386 	int ret;
6387 
6388 	guard(mutex)(&trace_types_lock);
6389 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6390 	if (ret < 0)
6391 		return ret;
6392 
6393 	if (tr->current_trace->update_thresh) {
6394 		ret = tr->current_trace->update_thresh(tr);
6395 		if (ret < 0)
6396 			return ret;
6397 	}
6398 
6399 	return cnt;
6400 }
6401 
6402 #ifdef CONFIG_TRACER_MAX_TRACE
6403 
6404 static ssize_t
6405 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6406 		     size_t cnt, loff_t *ppos)
6407 {
6408 	struct trace_array *tr = filp->private_data;
6409 
6410 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6411 }
6412 
6413 static ssize_t
6414 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6415 		      size_t cnt, loff_t *ppos)
6416 {
6417 	struct trace_array *tr = filp->private_data;
6418 
6419 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6420 }
6421 
6422 #endif
6423 
6424 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6425 {
6426 	if (cpu == RING_BUFFER_ALL_CPUS) {
6427 		if (cpumask_empty(tr->pipe_cpumask)) {
6428 			cpumask_setall(tr->pipe_cpumask);
6429 			return 0;
6430 		}
6431 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6432 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6433 		return 0;
6434 	}
6435 	return -EBUSY;
6436 }
6437 
6438 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6439 {
6440 	if (cpu == RING_BUFFER_ALL_CPUS) {
6441 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6442 		cpumask_clear(tr->pipe_cpumask);
6443 	} else {
6444 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6445 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6446 	}
6447 }
6448 
6449 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6450 {
6451 	struct trace_array *tr = inode->i_private;
6452 	struct trace_iterator *iter;
6453 	int cpu;
6454 	int ret;
6455 
6456 	ret = tracing_check_open_get_tr(tr);
6457 	if (ret)
6458 		return ret;
6459 
6460 	mutex_lock(&trace_types_lock);
6461 	cpu = tracing_get_cpu(inode);
6462 	ret = open_pipe_on_cpu(tr, cpu);
6463 	if (ret)
6464 		goto fail_pipe_on_cpu;
6465 
6466 	/* create a buffer to store the information to pass to userspace */
6467 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6468 	if (!iter) {
6469 		ret = -ENOMEM;
6470 		goto fail_alloc_iter;
6471 	}
6472 
6473 	trace_seq_init(&iter->seq);
6474 	iter->trace = tr->current_trace;
6475 
6476 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6477 		ret = -ENOMEM;
6478 		goto fail;
6479 	}
6480 
6481 	/* trace pipe does not show start of buffer */
6482 	cpumask_setall(iter->started);
6483 
6484 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6485 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6486 
6487 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6488 	if (trace_clocks[tr->clock_id].in_ns)
6489 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6490 
6491 	iter->tr = tr;
6492 	iter->array_buffer = &tr->array_buffer;
6493 	iter->cpu_file = cpu;
6494 	mutex_init(&iter->mutex);
6495 	filp->private_data = iter;
6496 
6497 	if (iter->trace->pipe_open)
6498 		iter->trace->pipe_open(iter);
6499 
6500 	nonseekable_open(inode, filp);
6501 
6502 	tr->trace_ref++;
6503 
6504 	mutex_unlock(&trace_types_lock);
6505 	return ret;
6506 
6507 fail:
6508 	kfree(iter);
6509 fail_alloc_iter:
6510 	close_pipe_on_cpu(tr, cpu);
6511 fail_pipe_on_cpu:
6512 	__trace_array_put(tr);
6513 	mutex_unlock(&trace_types_lock);
6514 	return ret;
6515 }
6516 
6517 static int tracing_release_pipe(struct inode *inode, struct file *file)
6518 {
6519 	struct trace_iterator *iter = file->private_data;
6520 	struct trace_array *tr = inode->i_private;
6521 
6522 	mutex_lock(&trace_types_lock);
6523 
6524 	tr->trace_ref--;
6525 
6526 	if (iter->trace->pipe_close)
6527 		iter->trace->pipe_close(iter);
6528 	close_pipe_on_cpu(tr, iter->cpu_file);
6529 	mutex_unlock(&trace_types_lock);
6530 
6531 	free_trace_iter_content(iter);
6532 	kfree(iter);
6533 
6534 	trace_array_put(tr);
6535 
6536 	return 0;
6537 }
6538 
6539 static __poll_t
6540 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6541 {
6542 	struct trace_array *tr = iter->tr;
6543 
6544 	/* Iterators are static, they should be filled or empty */
6545 	if (trace_buffer_iter(iter, iter->cpu_file))
6546 		return EPOLLIN | EPOLLRDNORM;
6547 
6548 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6549 		/*
6550 		 * Always select as readable when in blocking mode
6551 		 */
6552 		return EPOLLIN | EPOLLRDNORM;
6553 	else
6554 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6555 					     filp, poll_table, iter->tr->buffer_percent);
6556 }
6557 
6558 static __poll_t
6559 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6560 {
6561 	struct trace_iterator *iter = filp->private_data;
6562 
6563 	return trace_poll(iter, filp, poll_table);
6564 }
6565 
6566 /* Must be called with iter->mutex held. */
6567 static int tracing_wait_pipe(struct file *filp)
6568 {
6569 	struct trace_iterator *iter = filp->private_data;
6570 	int ret;
6571 
6572 	while (trace_empty(iter)) {
6573 
6574 		if ((filp->f_flags & O_NONBLOCK)) {
6575 			return -EAGAIN;
6576 		}
6577 
6578 		/*
6579 		 * We block until we read something and tracing is disabled.
6580 		 * We still block if tracing is disabled, but we have never
6581 		 * read anything. This allows a user to cat this file, and
6582 		 * then enable tracing. But after we have read something,
6583 		 * we give an EOF when tracing is again disabled.
6584 		 *
6585 		 * iter->pos will be 0 if we haven't read anything.
6586 		 */
6587 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6588 			break;
6589 
6590 		mutex_unlock(&iter->mutex);
6591 
6592 		ret = wait_on_pipe(iter, 0);
6593 
6594 		mutex_lock(&iter->mutex);
6595 
6596 		if (ret)
6597 			return ret;
6598 	}
6599 
6600 	return 1;
6601 }
6602 
6603 /*
6604  * Consumer reader.
6605  */
6606 static ssize_t
6607 tracing_read_pipe(struct file *filp, char __user *ubuf,
6608 		  size_t cnt, loff_t *ppos)
6609 {
6610 	struct trace_iterator *iter = filp->private_data;
6611 	ssize_t sret;
6612 
6613 	/*
6614 	 * Avoid more than one consumer on a single file descriptor
6615 	 * This is just a matter of traces coherency, the ring buffer itself
6616 	 * is protected.
6617 	 */
6618 	guard(mutex)(&iter->mutex);
6619 
6620 	/* return any leftover data */
6621 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6622 	if (sret != -EBUSY)
6623 		return sret;
6624 
6625 	trace_seq_init(&iter->seq);
6626 
6627 	if (iter->trace->read) {
6628 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6629 		if (sret)
6630 			return sret;
6631 	}
6632 
6633 waitagain:
6634 	sret = tracing_wait_pipe(filp);
6635 	if (sret <= 0)
6636 		return sret;
6637 
6638 	/* stop when tracing is finished */
6639 	if (trace_empty(iter))
6640 		return 0;
6641 
6642 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6643 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6644 
6645 	/* reset all but tr, trace, and overruns */
6646 	trace_iterator_reset(iter);
6647 	cpumask_clear(iter->started);
6648 	trace_seq_init(&iter->seq);
6649 
6650 	trace_event_read_lock();
6651 	trace_access_lock(iter->cpu_file);
6652 	while (trace_find_next_entry_inc(iter) != NULL) {
6653 		enum print_line_t ret;
6654 		int save_len = iter->seq.seq.len;
6655 
6656 		ret = print_trace_line(iter);
6657 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6658 			/*
6659 			 * If one print_trace_line() fills entire trace_seq in one shot,
6660 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6661 			 * In this case, we need to consume it, otherwise, loop will peek
6662 			 * this event next time, resulting in an infinite loop.
6663 			 */
6664 			if (save_len == 0) {
6665 				iter->seq.full = 0;
6666 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6667 				trace_consume(iter);
6668 				break;
6669 			}
6670 
6671 			/* In other cases, don't print partial lines */
6672 			iter->seq.seq.len = save_len;
6673 			break;
6674 		}
6675 		if (ret != TRACE_TYPE_NO_CONSUME)
6676 			trace_consume(iter);
6677 
6678 		if (trace_seq_used(&iter->seq) >= cnt)
6679 			break;
6680 
6681 		/*
6682 		 * Setting the full flag means we reached the trace_seq buffer
6683 		 * size and we should leave by partial output condition above.
6684 		 * One of the trace_seq_* functions is not used properly.
6685 		 */
6686 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6687 			  iter->ent->type);
6688 	}
6689 	trace_access_unlock(iter->cpu_file);
6690 	trace_event_read_unlock();
6691 
6692 	/* Now copy what we have to the user */
6693 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6694 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6695 		trace_seq_init(&iter->seq);
6696 
6697 	/*
6698 	 * If there was nothing to send to user, in spite of consuming trace
6699 	 * entries, go back to wait for more entries.
6700 	 */
6701 	if (sret == -EBUSY)
6702 		goto waitagain;
6703 
6704 	return sret;
6705 }
6706 
6707 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6708 				     unsigned int idx)
6709 {
6710 	__free_page(spd->pages[idx]);
6711 }
6712 
6713 static size_t
6714 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6715 {
6716 	size_t count;
6717 	int save_len;
6718 	int ret;
6719 
6720 	/* Seq buffer is page-sized, exactly what we need. */
6721 	for (;;) {
6722 		save_len = iter->seq.seq.len;
6723 		ret = print_trace_line(iter);
6724 
6725 		if (trace_seq_has_overflowed(&iter->seq)) {
6726 			iter->seq.seq.len = save_len;
6727 			break;
6728 		}
6729 
6730 		/*
6731 		 * This should not be hit, because it should only
6732 		 * be set if the iter->seq overflowed. But check it
6733 		 * anyway to be safe.
6734 		 */
6735 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6736 			iter->seq.seq.len = save_len;
6737 			break;
6738 		}
6739 
6740 		count = trace_seq_used(&iter->seq) - save_len;
6741 		if (rem < count) {
6742 			rem = 0;
6743 			iter->seq.seq.len = save_len;
6744 			break;
6745 		}
6746 
6747 		if (ret != TRACE_TYPE_NO_CONSUME)
6748 			trace_consume(iter);
6749 		rem -= count;
6750 		if (!trace_find_next_entry_inc(iter))	{
6751 			rem = 0;
6752 			iter->ent = NULL;
6753 			break;
6754 		}
6755 	}
6756 
6757 	return rem;
6758 }
6759 
6760 static ssize_t tracing_splice_read_pipe(struct file *filp,
6761 					loff_t *ppos,
6762 					struct pipe_inode_info *pipe,
6763 					size_t len,
6764 					unsigned int flags)
6765 {
6766 	struct page *pages_def[PIPE_DEF_BUFFERS];
6767 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6768 	struct trace_iterator *iter = filp->private_data;
6769 	struct splice_pipe_desc spd = {
6770 		.pages		= pages_def,
6771 		.partial	= partial_def,
6772 		.nr_pages	= 0, /* This gets updated below. */
6773 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6774 		.ops		= &default_pipe_buf_ops,
6775 		.spd_release	= tracing_spd_release_pipe,
6776 	};
6777 	ssize_t ret;
6778 	size_t rem;
6779 	unsigned int i;
6780 
6781 	if (splice_grow_spd(pipe, &spd))
6782 		return -ENOMEM;
6783 
6784 	mutex_lock(&iter->mutex);
6785 
6786 	if (iter->trace->splice_read) {
6787 		ret = iter->trace->splice_read(iter, filp,
6788 					       ppos, pipe, len, flags);
6789 		if (ret)
6790 			goto out_err;
6791 	}
6792 
6793 	ret = tracing_wait_pipe(filp);
6794 	if (ret <= 0)
6795 		goto out_err;
6796 
6797 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6798 		ret = -EFAULT;
6799 		goto out_err;
6800 	}
6801 
6802 	trace_event_read_lock();
6803 	trace_access_lock(iter->cpu_file);
6804 
6805 	/* Fill as many pages as possible. */
6806 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6807 		spd.pages[i] = alloc_page(GFP_KERNEL);
6808 		if (!spd.pages[i])
6809 			break;
6810 
6811 		rem = tracing_fill_pipe_page(rem, iter);
6812 
6813 		/* Copy the data into the page, so we can start over. */
6814 		ret = trace_seq_to_buffer(&iter->seq,
6815 					  page_address(spd.pages[i]),
6816 					  trace_seq_used(&iter->seq));
6817 		if (ret < 0) {
6818 			__free_page(spd.pages[i]);
6819 			break;
6820 		}
6821 		spd.partial[i].offset = 0;
6822 		spd.partial[i].len = trace_seq_used(&iter->seq);
6823 
6824 		trace_seq_init(&iter->seq);
6825 	}
6826 
6827 	trace_access_unlock(iter->cpu_file);
6828 	trace_event_read_unlock();
6829 	mutex_unlock(&iter->mutex);
6830 
6831 	spd.nr_pages = i;
6832 
6833 	if (i)
6834 		ret = splice_to_pipe(pipe, &spd);
6835 	else
6836 		ret = 0;
6837 out:
6838 	splice_shrink_spd(&spd);
6839 	return ret;
6840 
6841 out_err:
6842 	mutex_unlock(&iter->mutex);
6843 	goto out;
6844 }
6845 
6846 static ssize_t
6847 tracing_entries_read(struct file *filp, char __user *ubuf,
6848 		     size_t cnt, loff_t *ppos)
6849 {
6850 	struct inode *inode = file_inode(filp);
6851 	struct trace_array *tr = inode->i_private;
6852 	int cpu = tracing_get_cpu(inode);
6853 	char buf[64];
6854 	int r = 0;
6855 	ssize_t ret;
6856 
6857 	mutex_lock(&trace_types_lock);
6858 
6859 	if (cpu == RING_BUFFER_ALL_CPUS) {
6860 		int cpu, buf_size_same;
6861 		unsigned long size;
6862 
6863 		size = 0;
6864 		buf_size_same = 1;
6865 		/* check if all cpu sizes are same */
6866 		for_each_tracing_cpu(cpu) {
6867 			/* fill in the size from first enabled cpu */
6868 			if (size == 0)
6869 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6870 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6871 				buf_size_same = 0;
6872 				break;
6873 			}
6874 		}
6875 
6876 		if (buf_size_same) {
6877 			if (!tr->ring_buffer_expanded)
6878 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6879 					    size >> 10,
6880 					    trace_buf_size >> 10);
6881 			else
6882 				r = sprintf(buf, "%lu\n", size >> 10);
6883 		} else
6884 			r = sprintf(buf, "X\n");
6885 	} else
6886 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6887 
6888 	mutex_unlock(&trace_types_lock);
6889 
6890 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6891 	return ret;
6892 }
6893 
6894 static ssize_t
6895 tracing_entries_write(struct file *filp, const char __user *ubuf,
6896 		      size_t cnt, loff_t *ppos)
6897 {
6898 	struct inode *inode = file_inode(filp);
6899 	struct trace_array *tr = inode->i_private;
6900 	unsigned long val;
6901 	int ret;
6902 
6903 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6904 	if (ret)
6905 		return ret;
6906 
6907 	/* must have at least 1 entry */
6908 	if (!val)
6909 		return -EINVAL;
6910 
6911 	/* value is in KB */
6912 	val <<= 10;
6913 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6914 	if (ret < 0)
6915 		return ret;
6916 
6917 	*ppos += cnt;
6918 
6919 	return cnt;
6920 }
6921 
6922 static ssize_t
6923 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6924 				size_t cnt, loff_t *ppos)
6925 {
6926 	struct trace_array *tr = filp->private_data;
6927 	char buf[64];
6928 	int r, cpu;
6929 	unsigned long size = 0, expanded_size = 0;
6930 
6931 	mutex_lock(&trace_types_lock);
6932 	for_each_tracing_cpu(cpu) {
6933 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6934 		if (!tr->ring_buffer_expanded)
6935 			expanded_size += trace_buf_size >> 10;
6936 	}
6937 	if (tr->ring_buffer_expanded)
6938 		r = sprintf(buf, "%lu\n", size);
6939 	else
6940 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6941 	mutex_unlock(&trace_types_lock);
6942 
6943 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6944 }
6945 
6946 #define LAST_BOOT_HEADER ((void *)1)
6947 
6948 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6949 {
6950 	struct trace_array *tr = m->private;
6951 	struct trace_scratch *tscratch = tr->scratch;
6952 	unsigned int index = *pos;
6953 
6954 	(*pos)++;
6955 
6956 	if (*pos == 1)
6957 		return LAST_BOOT_HEADER;
6958 
6959 	/* Only show offsets of the last boot data */
6960 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6961 		return NULL;
6962 
6963 	/* *pos 0 is for the header, 1 is for the first module */
6964 	index--;
6965 
6966 	if (index >= tscratch->nr_entries)
6967 		return NULL;
6968 
6969 	return &tscratch->entries[index];
6970 }
6971 
6972 static void *l_start(struct seq_file *m, loff_t *pos)
6973 {
6974 	mutex_lock(&scratch_mutex);
6975 
6976 	return l_next(m, NULL, pos);
6977 }
6978 
6979 static void l_stop(struct seq_file *m, void *p)
6980 {
6981 	mutex_unlock(&scratch_mutex);
6982 }
6983 
6984 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6985 {
6986 	struct trace_scratch *tscratch = tr->scratch;
6987 
6988 	/*
6989 	 * Do not leak KASLR address. This only shows the KASLR address of
6990 	 * the last boot. When the ring buffer is started, the LAST_BOOT
6991 	 * flag gets cleared, and this should only report "current".
6992 	 * Otherwise it shows the KASLR address from the previous boot which
6993 	 * should not be the same as the current boot.
6994 	 */
6995 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6996 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
6997 	else
6998 		seq_puts(m, "# Current\n");
6999 }
7000 
7001 static int l_show(struct seq_file *m, void *v)
7002 {
7003 	struct trace_array *tr = m->private;
7004 	struct trace_mod_entry *entry = v;
7005 
7006 	if (v == LAST_BOOT_HEADER) {
7007 		show_last_boot_header(m, tr);
7008 		return 0;
7009 	}
7010 
7011 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7012 	return 0;
7013 }
7014 
7015 static const struct seq_operations last_boot_seq_ops = {
7016 	.start		= l_start,
7017 	.next		= l_next,
7018 	.stop		= l_stop,
7019 	.show		= l_show,
7020 };
7021 
7022 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7023 {
7024 	struct trace_array *tr = inode->i_private;
7025 	struct seq_file *m;
7026 	int ret;
7027 
7028 	ret = tracing_check_open_get_tr(tr);
7029 	if (ret)
7030 		return ret;
7031 
7032 	ret = seq_open(file, &last_boot_seq_ops);
7033 	if (ret) {
7034 		trace_array_put(tr);
7035 		return ret;
7036 	}
7037 
7038 	m = file->private_data;
7039 	m->private = tr;
7040 
7041 	return 0;
7042 }
7043 
7044 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7045 {
7046 	struct trace_array *tr = inode->i_private;
7047 	int cpu = tracing_get_cpu(inode);
7048 	int ret;
7049 
7050 	ret = tracing_check_open_get_tr(tr);
7051 	if (ret)
7052 		return ret;
7053 
7054 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7055 	if (ret < 0)
7056 		__trace_array_put(tr);
7057 	return ret;
7058 }
7059 
7060 static ssize_t
7061 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7062 			  size_t cnt, loff_t *ppos)
7063 {
7064 	/*
7065 	 * There is no need to read what the user has written, this function
7066 	 * is just to make sure that there is no error when "echo" is used
7067 	 */
7068 
7069 	*ppos += cnt;
7070 
7071 	return cnt;
7072 }
7073 
7074 static int
7075 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7076 {
7077 	struct trace_array *tr = inode->i_private;
7078 
7079 	/* disable tracing ? */
7080 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7081 		tracer_tracing_off(tr);
7082 	/* resize the ring buffer to 0 */
7083 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7084 
7085 	trace_array_put(tr);
7086 
7087 	return 0;
7088 }
7089 
7090 #define TRACE_MARKER_MAX_SIZE		4096
7091 
7092 static ssize_t
7093 tracing_mark_write(struct file *filp, const char __user *ubuf,
7094 					size_t cnt, loff_t *fpos)
7095 {
7096 	struct trace_array *tr = filp->private_data;
7097 	struct ring_buffer_event *event;
7098 	enum event_trigger_type tt = ETT_NONE;
7099 	struct trace_buffer *buffer;
7100 	struct print_entry *entry;
7101 	int meta_size;
7102 	ssize_t written;
7103 	size_t size;
7104 	int len;
7105 
7106 /* Used in tracing_mark_raw_write() as well */
7107 #define FAULTED_STR "<faulted>"
7108 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7109 
7110 	if (tracing_disabled)
7111 		return -EINVAL;
7112 
7113 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7114 		return -EINVAL;
7115 
7116 	if ((ssize_t)cnt < 0)
7117 		return -EINVAL;
7118 
7119 	if (cnt > TRACE_MARKER_MAX_SIZE)
7120 		cnt = TRACE_MARKER_MAX_SIZE;
7121 
7122 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7123  again:
7124 	size = cnt + meta_size;
7125 
7126 	/* If less than "<faulted>", then make sure we can still add that */
7127 	if (cnt < FAULTED_SIZE)
7128 		size += FAULTED_SIZE - cnt;
7129 
7130 	buffer = tr->array_buffer.buffer;
7131 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7132 					    tracing_gen_ctx());
7133 	if (unlikely(!event)) {
7134 		/*
7135 		 * If the size was greater than what was allowed, then
7136 		 * make it smaller and try again.
7137 		 */
7138 		if (size > ring_buffer_max_event_size(buffer)) {
7139 			/* cnt < FAULTED size should never be bigger than max */
7140 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7141 				return -EBADF;
7142 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7143 			/* The above should only happen once */
7144 			if (WARN_ON_ONCE(cnt + meta_size == size))
7145 				return -EBADF;
7146 			goto again;
7147 		}
7148 
7149 		/* Ring buffer disabled, return as if not open for write */
7150 		return -EBADF;
7151 	}
7152 
7153 	entry = ring_buffer_event_data(event);
7154 	entry->ip = _THIS_IP_;
7155 
7156 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7157 	if (len) {
7158 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7159 		cnt = FAULTED_SIZE;
7160 		written = -EFAULT;
7161 	} else
7162 		written = cnt;
7163 
7164 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7165 		/* do not add \n before testing triggers, but add \0 */
7166 		entry->buf[cnt] = '\0';
7167 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7168 	}
7169 
7170 	if (entry->buf[cnt - 1] != '\n') {
7171 		entry->buf[cnt] = '\n';
7172 		entry->buf[cnt + 1] = '\0';
7173 	} else
7174 		entry->buf[cnt] = '\0';
7175 
7176 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7177 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7178 	__buffer_unlock_commit(buffer, event);
7179 
7180 	if (tt)
7181 		event_triggers_post_call(tr->trace_marker_file, tt);
7182 
7183 	return written;
7184 }
7185 
7186 static ssize_t
7187 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7188 					size_t cnt, loff_t *fpos)
7189 {
7190 	struct trace_array *tr = filp->private_data;
7191 	struct ring_buffer_event *event;
7192 	struct trace_buffer *buffer;
7193 	struct raw_data_entry *entry;
7194 	ssize_t written;
7195 	int size;
7196 	int len;
7197 
7198 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7199 
7200 	if (tracing_disabled)
7201 		return -EINVAL;
7202 
7203 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7204 		return -EINVAL;
7205 
7206 	/* The marker must at least have a tag id */
7207 	if (cnt < sizeof(unsigned int))
7208 		return -EINVAL;
7209 
7210 	size = sizeof(*entry) + cnt;
7211 	if (cnt < FAULT_SIZE_ID)
7212 		size += FAULT_SIZE_ID - cnt;
7213 
7214 	buffer = tr->array_buffer.buffer;
7215 
7216 	if (size > ring_buffer_max_event_size(buffer))
7217 		return -EINVAL;
7218 
7219 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7220 					    tracing_gen_ctx());
7221 	if (!event)
7222 		/* Ring buffer disabled, return as if not open for write */
7223 		return -EBADF;
7224 
7225 	entry = ring_buffer_event_data(event);
7226 
7227 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7228 	if (len) {
7229 		entry->id = -1;
7230 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7231 		written = -EFAULT;
7232 	} else
7233 		written = cnt;
7234 
7235 	__buffer_unlock_commit(buffer, event);
7236 
7237 	return written;
7238 }
7239 
7240 static int tracing_clock_show(struct seq_file *m, void *v)
7241 {
7242 	struct trace_array *tr = m->private;
7243 	int i;
7244 
7245 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7246 		seq_printf(m,
7247 			"%s%s%s%s", i ? " " : "",
7248 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7249 			i == tr->clock_id ? "]" : "");
7250 	seq_putc(m, '\n');
7251 
7252 	return 0;
7253 }
7254 
7255 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7256 {
7257 	int i;
7258 
7259 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7260 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7261 			break;
7262 	}
7263 	if (i == ARRAY_SIZE(trace_clocks))
7264 		return -EINVAL;
7265 
7266 	mutex_lock(&trace_types_lock);
7267 
7268 	tr->clock_id = i;
7269 
7270 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7271 
7272 	/*
7273 	 * New clock may not be consistent with the previous clock.
7274 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7275 	 */
7276 	tracing_reset_online_cpus(&tr->array_buffer);
7277 
7278 #ifdef CONFIG_TRACER_MAX_TRACE
7279 	if (tr->max_buffer.buffer)
7280 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7281 	tracing_reset_online_cpus(&tr->max_buffer);
7282 #endif
7283 
7284 	mutex_unlock(&trace_types_lock);
7285 
7286 	return 0;
7287 }
7288 
7289 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7290 				   size_t cnt, loff_t *fpos)
7291 {
7292 	struct seq_file *m = filp->private_data;
7293 	struct trace_array *tr = m->private;
7294 	char buf[64];
7295 	const char *clockstr;
7296 	int ret;
7297 
7298 	if (cnt >= sizeof(buf))
7299 		return -EINVAL;
7300 
7301 	if (copy_from_user(buf, ubuf, cnt))
7302 		return -EFAULT;
7303 
7304 	buf[cnt] = 0;
7305 
7306 	clockstr = strstrip(buf);
7307 
7308 	ret = tracing_set_clock(tr, clockstr);
7309 	if (ret)
7310 		return ret;
7311 
7312 	*fpos += cnt;
7313 
7314 	return cnt;
7315 }
7316 
7317 static int tracing_clock_open(struct inode *inode, struct file *file)
7318 {
7319 	struct trace_array *tr = inode->i_private;
7320 	int ret;
7321 
7322 	ret = tracing_check_open_get_tr(tr);
7323 	if (ret)
7324 		return ret;
7325 
7326 	ret = single_open(file, tracing_clock_show, inode->i_private);
7327 	if (ret < 0)
7328 		trace_array_put(tr);
7329 
7330 	return ret;
7331 }
7332 
7333 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7334 {
7335 	struct trace_array *tr = m->private;
7336 
7337 	mutex_lock(&trace_types_lock);
7338 
7339 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7340 		seq_puts(m, "delta [absolute]\n");
7341 	else
7342 		seq_puts(m, "[delta] absolute\n");
7343 
7344 	mutex_unlock(&trace_types_lock);
7345 
7346 	return 0;
7347 }
7348 
7349 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7350 {
7351 	struct trace_array *tr = inode->i_private;
7352 	int ret;
7353 
7354 	ret = tracing_check_open_get_tr(tr);
7355 	if (ret)
7356 		return ret;
7357 
7358 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7359 	if (ret < 0)
7360 		trace_array_put(tr);
7361 
7362 	return ret;
7363 }
7364 
7365 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7366 {
7367 	if (rbe == this_cpu_read(trace_buffered_event))
7368 		return ring_buffer_time_stamp(buffer);
7369 
7370 	return ring_buffer_event_time_stamp(buffer, rbe);
7371 }
7372 
7373 /*
7374  * Set or disable using the per CPU trace_buffer_event when possible.
7375  */
7376 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7377 {
7378 	guard(mutex)(&trace_types_lock);
7379 
7380 	if (set && tr->no_filter_buffering_ref++)
7381 		return 0;
7382 
7383 	if (!set) {
7384 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7385 			return -EINVAL;
7386 
7387 		--tr->no_filter_buffering_ref;
7388 	}
7389 
7390 	return 0;
7391 }
7392 
7393 struct ftrace_buffer_info {
7394 	struct trace_iterator	iter;
7395 	void			*spare;
7396 	unsigned int		spare_cpu;
7397 	unsigned int		spare_size;
7398 	unsigned int		read;
7399 };
7400 
7401 #ifdef CONFIG_TRACER_SNAPSHOT
7402 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7403 {
7404 	struct trace_array *tr = inode->i_private;
7405 	struct trace_iterator *iter;
7406 	struct seq_file *m;
7407 	int ret;
7408 
7409 	ret = tracing_check_open_get_tr(tr);
7410 	if (ret)
7411 		return ret;
7412 
7413 	if (file->f_mode & FMODE_READ) {
7414 		iter = __tracing_open(inode, file, true);
7415 		if (IS_ERR(iter))
7416 			ret = PTR_ERR(iter);
7417 	} else {
7418 		/* Writes still need the seq_file to hold the private data */
7419 		ret = -ENOMEM;
7420 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7421 		if (!m)
7422 			goto out;
7423 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7424 		if (!iter) {
7425 			kfree(m);
7426 			goto out;
7427 		}
7428 		ret = 0;
7429 
7430 		iter->tr = tr;
7431 		iter->array_buffer = &tr->max_buffer;
7432 		iter->cpu_file = tracing_get_cpu(inode);
7433 		m->private = iter;
7434 		file->private_data = m;
7435 	}
7436 out:
7437 	if (ret < 0)
7438 		trace_array_put(tr);
7439 
7440 	return ret;
7441 }
7442 
7443 static void tracing_swap_cpu_buffer(void *tr)
7444 {
7445 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7446 }
7447 
7448 static ssize_t
7449 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7450 		       loff_t *ppos)
7451 {
7452 	struct seq_file *m = filp->private_data;
7453 	struct trace_iterator *iter = m->private;
7454 	struct trace_array *tr = iter->tr;
7455 	unsigned long val;
7456 	int ret;
7457 
7458 	ret = tracing_update_buffers(tr);
7459 	if (ret < 0)
7460 		return ret;
7461 
7462 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7463 	if (ret)
7464 		return ret;
7465 
7466 	guard(mutex)(&trace_types_lock);
7467 
7468 	if (tr->current_trace->use_max_tr)
7469 		return -EBUSY;
7470 
7471 	local_irq_disable();
7472 	arch_spin_lock(&tr->max_lock);
7473 	if (tr->cond_snapshot)
7474 		ret = -EBUSY;
7475 	arch_spin_unlock(&tr->max_lock);
7476 	local_irq_enable();
7477 	if (ret)
7478 		return ret;
7479 
7480 	switch (val) {
7481 	case 0:
7482 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7483 			return -EINVAL;
7484 		if (tr->allocated_snapshot)
7485 			free_snapshot(tr);
7486 		break;
7487 	case 1:
7488 /* Only allow per-cpu swap if the ring buffer supports it */
7489 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7490 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7491 			return -EINVAL;
7492 #endif
7493 		if (tr->allocated_snapshot)
7494 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7495 					&tr->array_buffer, iter->cpu_file);
7496 
7497 		ret = tracing_arm_snapshot_locked(tr);
7498 		if (ret)
7499 			return ret;
7500 
7501 		/* Now, we're going to swap */
7502 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7503 			local_irq_disable();
7504 			update_max_tr(tr, current, smp_processor_id(), NULL);
7505 			local_irq_enable();
7506 		} else {
7507 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7508 						 (void *)tr, 1);
7509 		}
7510 		tracing_disarm_snapshot(tr);
7511 		break;
7512 	default:
7513 		if (tr->allocated_snapshot) {
7514 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7515 				tracing_reset_online_cpus(&tr->max_buffer);
7516 			else
7517 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7518 		}
7519 		break;
7520 	}
7521 
7522 	if (ret >= 0) {
7523 		*ppos += cnt;
7524 		ret = cnt;
7525 	}
7526 
7527 	return ret;
7528 }
7529 
7530 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7531 {
7532 	struct seq_file *m = file->private_data;
7533 	int ret;
7534 
7535 	ret = tracing_release(inode, file);
7536 
7537 	if (file->f_mode & FMODE_READ)
7538 		return ret;
7539 
7540 	/* If write only, the seq_file is just a stub */
7541 	if (m)
7542 		kfree(m->private);
7543 	kfree(m);
7544 
7545 	return 0;
7546 }
7547 
7548 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7549 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7550 				    size_t count, loff_t *ppos);
7551 static int tracing_buffers_release(struct inode *inode, struct file *file);
7552 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7553 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7554 
7555 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7556 {
7557 	struct ftrace_buffer_info *info;
7558 	int ret;
7559 
7560 	/* The following checks for tracefs lockdown */
7561 	ret = tracing_buffers_open(inode, filp);
7562 	if (ret < 0)
7563 		return ret;
7564 
7565 	info = filp->private_data;
7566 
7567 	if (info->iter.trace->use_max_tr) {
7568 		tracing_buffers_release(inode, filp);
7569 		return -EBUSY;
7570 	}
7571 
7572 	info->iter.snapshot = true;
7573 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7574 
7575 	return ret;
7576 }
7577 
7578 #endif /* CONFIG_TRACER_SNAPSHOT */
7579 
7580 
7581 static const struct file_operations tracing_thresh_fops = {
7582 	.open		= tracing_open_generic,
7583 	.read		= tracing_thresh_read,
7584 	.write		= tracing_thresh_write,
7585 	.llseek		= generic_file_llseek,
7586 };
7587 
7588 #ifdef CONFIG_TRACER_MAX_TRACE
7589 static const struct file_operations tracing_max_lat_fops = {
7590 	.open		= tracing_open_generic_tr,
7591 	.read		= tracing_max_lat_read,
7592 	.write		= tracing_max_lat_write,
7593 	.llseek		= generic_file_llseek,
7594 	.release	= tracing_release_generic_tr,
7595 };
7596 #endif
7597 
7598 static const struct file_operations set_tracer_fops = {
7599 	.open		= tracing_open_generic_tr,
7600 	.read		= tracing_set_trace_read,
7601 	.write		= tracing_set_trace_write,
7602 	.llseek		= generic_file_llseek,
7603 	.release	= tracing_release_generic_tr,
7604 };
7605 
7606 static const struct file_operations tracing_pipe_fops = {
7607 	.open		= tracing_open_pipe,
7608 	.poll		= tracing_poll_pipe,
7609 	.read		= tracing_read_pipe,
7610 	.splice_read	= tracing_splice_read_pipe,
7611 	.release	= tracing_release_pipe,
7612 };
7613 
7614 static const struct file_operations tracing_entries_fops = {
7615 	.open		= tracing_open_generic_tr,
7616 	.read		= tracing_entries_read,
7617 	.write		= tracing_entries_write,
7618 	.llseek		= generic_file_llseek,
7619 	.release	= tracing_release_generic_tr,
7620 };
7621 
7622 static const struct file_operations tracing_buffer_meta_fops = {
7623 	.open		= tracing_buffer_meta_open,
7624 	.read		= seq_read,
7625 	.llseek		= seq_lseek,
7626 	.release	= tracing_seq_release,
7627 };
7628 
7629 static const struct file_operations tracing_total_entries_fops = {
7630 	.open		= tracing_open_generic_tr,
7631 	.read		= tracing_total_entries_read,
7632 	.llseek		= generic_file_llseek,
7633 	.release	= tracing_release_generic_tr,
7634 };
7635 
7636 static const struct file_operations tracing_free_buffer_fops = {
7637 	.open		= tracing_open_generic_tr,
7638 	.write		= tracing_free_buffer_write,
7639 	.release	= tracing_free_buffer_release,
7640 };
7641 
7642 static const struct file_operations tracing_mark_fops = {
7643 	.open		= tracing_mark_open,
7644 	.write		= tracing_mark_write,
7645 	.release	= tracing_release_generic_tr,
7646 };
7647 
7648 static const struct file_operations tracing_mark_raw_fops = {
7649 	.open		= tracing_mark_open,
7650 	.write		= tracing_mark_raw_write,
7651 	.release	= tracing_release_generic_tr,
7652 };
7653 
7654 static const struct file_operations trace_clock_fops = {
7655 	.open		= tracing_clock_open,
7656 	.read		= seq_read,
7657 	.llseek		= seq_lseek,
7658 	.release	= tracing_single_release_tr,
7659 	.write		= tracing_clock_write,
7660 };
7661 
7662 static const struct file_operations trace_time_stamp_mode_fops = {
7663 	.open		= tracing_time_stamp_mode_open,
7664 	.read		= seq_read,
7665 	.llseek		= seq_lseek,
7666 	.release	= tracing_single_release_tr,
7667 };
7668 
7669 static const struct file_operations last_boot_fops = {
7670 	.open		= tracing_last_boot_open,
7671 	.read		= seq_read,
7672 	.llseek		= seq_lseek,
7673 	.release	= tracing_seq_release,
7674 };
7675 
7676 #ifdef CONFIG_TRACER_SNAPSHOT
7677 static const struct file_operations snapshot_fops = {
7678 	.open		= tracing_snapshot_open,
7679 	.read		= seq_read,
7680 	.write		= tracing_snapshot_write,
7681 	.llseek		= tracing_lseek,
7682 	.release	= tracing_snapshot_release,
7683 };
7684 
7685 static const struct file_operations snapshot_raw_fops = {
7686 	.open		= snapshot_raw_open,
7687 	.read		= tracing_buffers_read,
7688 	.release	= tracing_buffers_release,
7689 	.splice_read	= tracing_buffers_splice_read,
7690 };
7691 
7692 #endif /* CONFIG_TRACER_SNAPSHOT */
7693 
7694 /*
7695  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7696  * @filp: The active open file structure
7697  * @ubuf: The userspace provided buffer to read value into
7698  * @cnt: The maximum number of bytes to read
7699  * @ppos: The current "file" position
7700  *
7701  * This function implements the write interface for a struct trace_min_max_param.
7702  * The filp->private_data must point to a trace_min_max_param structure that
7703  * defines where to write the value, the min and the max acceptable values,
7704  * and a lock to protect the write.
7705  */
7706 static ssize_t
7707 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7708 {
7709 	struct trace_min_max_param *param = filp->private_data;
7710 	u64 val;
7711 	int err;
7712 
7713 	if (!param)
7714 		return -EFAULT;
7715 
7716 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7717 	if (err)
7718 		return err;
7719 
7720 	if (param->lock)
7721 		mutex_lock(param->lock);
7722 
7723 	if (param->min && val < *param->min)
7724 		err = -EINVAL;
7725 
7726 	if (param->max && val > *param->max)
7727 		err = -EINVAL;
7728 
7729 	if (!err)
7730 		*param->val = val;
7731 
7732 	if (param->lock)
7733 		mutex_unlock(param->lock);
7734 
7735 	if (err)
7736 		return err;
7737 
7738 	return cnt;
7739 }
7740 
7741 /*
7742  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7743  * @filp: The active open file structure
7744  * @ubuf: The userspace provided buffer to read value into
7745  * @cnt: The maximum number of bytes to read
7746  * @ppos: The current "file" position
7747  *
7748  * This function implements the read interface for a struct trace_min_max_param.
7749  * The filp->private_data must point to a trace_min_max_param struct with valid
7750  * data.
7751  */
7752 static ssize_t
7753 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7754 {
7755 	struct trace_min_max_param *param = filp->private_data;
7756 	char buf[U64_STR_SIZE];
7757 	int len;
7758 	u64 val;
7759 
7760 	if (!param)
7761 		return -EFAULT;
7762 
7763 	val = *param->val;
7764 
7765 	if (cnt > sizeof(buf))
7766 		cnt = sizeof(buf);
7767 
7768 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7769 
7770 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7771 }
7772 
7773 const struct file_operations trace_min_max_fops = {
7774 	.open		= tracing_open_generic,
7775 	.read		= trace_min_max_read,
7776 	.write		= trace_min_max_write,
7777 };
7778 
7779 #define TRACING_LOG_ERRS_MAX	8
7780 #define TRACING_LOG_LOC_MAX	128
7781 
7782 #define CMD_PREFIX "  Command: "
7783 
7784 struct err_info {
7785 	const char	**errs;	/* ptr to loc-specific array of err strings */
7786 	u8		type;	/* index into errs -> specific err string */
7787 	u16		pos;	/* caret position */
7788 	u64		ts;
7789 };
7790 
7791 struct tracing_log_err {
7792 	struct list_head	list;
7793 	struct err_info		info;
7794 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7795 	char			*cmd;                     /* what caused err */
7796 };
7797 
7798 static DEFINE_MUTEX(tracing_err_log_lock);
7799 
7800 static struct tracing_log_err *alloc_tracing_log_err(int len)
7801 {
7802 	struct tracing_log_err *err;
7803 
7804 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7805 	if (!err)
7806 		return ERR_PTR(-ENOMEM);
7807 
7808 	err->cmd = kzalloc(len, GFP_KERNEL);
7809 	if (!err->cmd) {
7810 		kfree(err);
7811 		return ERR_PTR(-ENOMEM);
7812 	}
7813 
7814 	return err;
7815 }
7816 
7817 static void free_tracing_log_err(struct tracing_log_err *err)
7818 {
7819 	kfree(err->cmd);
7820 	kfree(err);
7821 }
7822 
7823 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7824 						   int len)
7825 {
7826 	struct tracing_log_err *err;
7827 	char *cmd;
7828 
7829 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7830 		err = alloc_tracing_log_err(len);
7831 		if (PTR_ERR(err) != -ENOMEM)
7832 			tr->n_err_log_entries++;
7833 
7834 		return err;
7835 	}
7836 	cmd = kzalloc(len, GFP_KERNEL);
7837 	if (!cmd)
7838 		return ERR_PTR(-ENOMEM);
7839 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7840 	kfree(err->cmd);
7841 	err->cmd = cmd;
7842 	list_del(&err->list);
7843 
7844 	return err;
7845 }
7846 
7847 /**
7848  * err_pos - find the position of a string within a command for error careting
7849  * @cmd: The tracing command that caused the error
7850  * @str: The string to position the caret at within @cmd
7851  *
7852  * Finds the position of the first occurrence of @str within @cmd.  The
7853  * return value can be passed to tracing_log_err() for caret placement
7854  * within @cmd.
7855  *
7856  * Returns the index within @cmd of the first occurrence of @str or 0
7857  * if @str was not found.
7858  */
7859 unsigned int err_pos(char *cmd, const char *str)
7860 {
7861 	char *found;
7862 
7863 	if (WARN_ON(!strlen(cmd)))
7864 		return 0;
7865 
7866 	found = strstr(cmd, str);
7867 	if (found)
7868 		return found - cmd;
7869 
7870 	return 0;
7871 }
7872 
7873 /**
7874  * tracing_log_err - write an error to the tracing error log
7875  * @tr: The associated trace array for the error (NULL for top level array)
7876  * @loc: A string describing where the error occurred
7877  * @cmd: The tracing command that caused the error
7878  * @errs: The array of loc-specific static error strings
7879  * @type: The index into errs[], which produces the specific static err string
7880  * @pos: The position the caret should be placed in the cmd
7881  *
7882  * Writes an error into tracing/error_log of the form:
7883  *
7884  * <loc>: error: <text>
7885  *   Command: <cmd>
7886  *              ^
7887  *
7888  * tracing/error_log is a small log file containing the last
7889  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7890  * unless there has been a tracing error, and the error log can be
7891  * cleared and have its memory freed by writing the empty string in
7892  * truncation mode to it i.e. echo > tracing/error_log.
7893  *
7894  * NOTE: the @errs array along with the @type param are used to
7895  * produce a static error string - this string is not copied and saved
7896  * when the error is logged - only a pointer to it is saved.  See
7897  * existing callers for examples of how static strings are typically
7898  * defined for use with tracing_log_err().
7899  */
7900 void tracing_log_err(struct trace_array *tr,
7901 		     const char *loc, const char *cmd,
7902 		     const char **errs, u8 type, u16 pos)
7903 {
7904 	struct tracing_log_err *err;
7905 	int len = 0;
7906 
7907 	if (!tr)
7908 		tr = &global_trace;
7909 
7910 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7911 
7912 	guard(mutex)(&tracing_err_log_lock);
7913 
7914 	err = get_tracing_log_err(tr, len);
7915 	if (PTR_ERR(err) == -ENOMEM)
7916 		return;
7917 
7918 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7919 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7920 
7921 	err->info.errs = errs;
7922 	err->info.type = type;
7923 	err->info.pos = pos;
7924 	err->info.ts = local_clock();
7925 
7926 	list_add_tail(&err->list, &tr->err_log);
7927 }
7928 
7929 static void clear_tracing_err_log(struct trace_array *tr)
7930 {
7931 	struct tracing_log_err *err, *next;
7932 
7933 	mutex_lock(&tracing_err_log_lock);
7934 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7935 		list_del(&err->list);
7936 		free_tracing_log_err(err);
7937 	}
7938 
7939 	tr->n_err_log_entries = 0;
7940 	mutex_unlock(&tracing_err_log_lock);
7941 }
7942 
7943 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7944 {
7945 	struct trace_array *tr = m->private;
7946 
7947 	mutex_lock(&tracing_err_log_lock);
7948 
7949 	return seq_list_start(&tr->err_log, *pos);
7950 }
7951 
7952 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7953 {
7954 	struct trace_array *tr = m->private;
7955 
7956 	return seq_list_next(v, &tr->err_log, pos);
7957 }
7958 
7959 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7960 {
7961 	mutex_unlock(&tracing_err_log_lock);
7962 }
7963 
7964 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7965 {
7966 	u16 i;
7967 
7968 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7969 		seq_putc(m, ' ');
7970 	for (i = 0; i < pos; i++)
7971 		seq_putc(m, ' ');
7972 	seq_puts(m, "^\n");
7973 }
7974 
7975 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7976 {
7977 	struct tracing_log_err *err = v;
7978 
7979 	if (err) {
7980 		const char *err_text = err->info.errs[err->info.type];
7981 		u64 sec = err->info.ts;
7982 		u32 nsec;
7983 
7984 		nsec = do_div(sec, NSEC_PER_SEC);
7985 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7986 			   err->loc, err_text);
7987 		seq_printf(m, "%s", err->cmd);
7988 		tracing_err_log_show_pos(m, err->info.pos);
7989 	}
7990 
7991 	return 0;
7992 }
7993 
7994 static const struct seq_operations tracing_err_log_seq_ops = {
7995 	.start  = tracing_err_log_seq_start,
7996 	.next   = tracing_err_log_seq_next,
7997 	.stop   = tracing_err_log_seq_stop,
7998 	.show   = tracing_err_log_seq_show
7999 };
8000 
8001 static int tracing_err_log_open(struct inode *inode, struct file *file)
8002 {
8003 	struct trace_array *tr = inode->i_private;
8004 	int ret = 0;
8005 
8006 	ret = tracing_check_open_get_tr(tr);
8007 	if (ret)
8008 		return ret;
8009 
8010 	/* If this file was opened for write, then erase contents */
8011 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8012 		clear_tracing_err_log(tr);
8013 
8014 	if (file->f_mode & FMODE_READ) {
8015 		ret = seq_open(file, &tracing_err_log_seq_ops);
8016 		if (!ret) {
8017 			struct seq_file *m = file->private_data;
8018 			m->private = tr;
8019 		} else {
8020 			trace_array_put(tr);
8021 		}
8022 	}
8023 	return ret;
8024 }
8025 
8026 static ssize_t tracing_err_log_write(struct file *file,
8027 				     const char __user *buffer,
8028 				     size_t count, loff_t *ppos)
8029 {
8030 	return count;
8031 }
8032 
8033 static int tracing_err_log_release(struct inode *inode, struct file *file)
8034 {
8035 	struct trace_array *tr = inode->i_private;
8036 
8037 	trace_array_put(tr);
8038 
8039 	if (file->f_mode & FMODE_READ)
8040 		seq_release(inode, file);
8041 
8042 	return 0;
8043 }
8044 
8045 static const struct file_operations tracing_err_log_fops = {
8046 	.open           = tracing_err_log_open,
8047 	.write		= tracing_err_log_write,
8048 	.read           = seq_read,
8049 	.llseek         = tracing_lseek,
8050 	.release        = tracing_err_log_release,
8051 };
8052 
8053 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8054 {
8055 	struct trace_array *tr = inode->i_private;
8056 	struct ftrace_buffer_info *info;
8057 	int ret;
8058 
8059 	ret = tracing_check_open_get_tr(tr);
8060 	if (ret)
8061 		return ret;
8062 
8063 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8064 	if (!info) {
8065 		trace_array_put(tr);
8066 		return -ENOMEM;
8067 	}
8068 
8069 	mutex_lock(&trace_types_lock);
8070 
8071 	info->iter.tr		= tr;
8072 	info->iter.cpu_file	= tracing_get_cpu(inode);
8073 	info->iter.trace	= tr->current_trace;
8074 	info->iter.array_buffer = &tr->array_buffer;
8075 	info->spare		= NULL;
8076 	/* Force reading ring buffer for first read */
8077 	info->read		= (unsigned int)-1;
8078 
8079 	filp->private_data = info;
8080 
8081 	tr->trace_ref++;
8082 
8083 	mutex_unlock(&trace_types_lock);
8084 
8085 	ret = nonseekable_open(inode, filp);
8086 	if (ret < 0)
8087 		trace_array_put(tr);
8088 
8089 	return ret;
8090 }
8091 
8092 static __poll_t
8093 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8094 {
8095 	struct ftrace_buffer_info *info = filp->private_data;
8096 	struct trace_iterator *iter = &info->iter;
8097 
8098 	return trace_poll(iter, filp, poll_table);
8099 }
8100 
8101 static ssize_t
8102 tracing_buffers_read(struct file *filp, char __user *ubuf,
8103 		     size_t count, loff_t *ppos)
8104 {
8105 	struct ftrace_buffer_info *info = filp->private_data;
8106 	struct trace_iterator *iter = &info->iter;
8107 	void *trace_data;
8108 	int page_size;
8109 	ssize_t ret = 0;
8110 	ssize_t size;
8111 
8112 	if (!count)
8113 		return 0;
8114 
8115 #ifdef CONFIG_TRACER_MAX_TRACE
8116 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8117 		return -EBUSY;
8118 #endif
8119 
8120 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8121 
8122 	/* Make sure the spare matches the current sub buffer size */
8123 	if (info->spare) {
8124 		if (page_size != info->spare_size) {
8125 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8126 						   info->spare_cpu, info->spare);
8127 			info->spare = NULL;
8128 		}
8129 	}
8130 
8131 	if (!info->spare) {
8132 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8133 							  iter->cpu_file);
8134 		if (IS_ERR(info->spare)) {
8135 			ret = PTR_ERR(info->spare);
8136 			info->spare = NULL;
8137 		} else {
8138 			info->spare_cpu = iter->cpu_file;
8139 			info->spare_size = page_size;
8140 		}
8141 	}
8142 	if (!info->spare)
8143 		return ret;
8144 
8145 	/* Do we have previous read data to read? */
8146 	if (info->read < page_size)
8147 		goto read;
8148 
8149  again:
8150 	trace_access_lock(iter->cpu_file);
8151 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8152 				    info->spare,
8153 				    count,
8154 				    iter->cpu_file, 0);
8155 	trace_access_unlock(iter->cpu_file);
8156 
8157 	if (ret < 0) {
8158 		if (trace_empty(iter) && !iter->closed) {
8159 			if ((filp->f_flags & O_NONBLOCK))
8160 				return -EAGAIN;
8161 
8162 			ret = wait_on_pipe(iter, 0);
8163 			if (ret)
8164 				return ret;
8165 
8166 			goto again;
8167 		}
8168 		return 0;
8169 	}
8170 
8171 	info->read = 0;
8172  read:
8173 	size = page_size - info->read;
8174 	if (size > count)
8175 		size = count;
8176 	trace_data = ring_buffer_read_page_data(info->spare);
8177 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8178 	if (ret == size)
8179 		return -EFAULT;
8180 
8181 	size -= ret;
8182 
8183 	*ppos += size;
8184 	info->read += size;
8185 
8186 	return size;
8187 }
8188 
8189 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8190 {
8191 	struct ftrace_buffer_info *info = file->private_data;
8192 	struct trace_iterator *iter = &info->iter;
8193 
8194 	iter->closed = true;
8195 	/* Make sure the waiters see the new wait_index */
8196 	(void)atomic_fetch_inc_release(&iter->wait_index);
8197 
8198 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8199 
8200 	return 0;
8201 }
8202 
8203 static int tracing_buffers_release(struct inode *inode, struct file *file)
8204 {
8205 	struct ftrace_buffer_info *info = file->private_data;
8206 	struct trace_iterator *iter = &info->iter;
8207 
8208 	mutex_lock(&trace_types_lock);
8209 
8210 	iter->tr->trace_ref--;
8211 
8212 	__trace_array_put(iter->tr);
8213 
8214 	if (info->spare)
8215 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8216 					   info->spare_cpu, info->spare);
8217 	kvfree(info);
8218 
8219 	mutex_unlock(&trace_types_lock);
8220 
8221 	return 0;
8222 }
8223 
8224 struct buffer_ref {
8225 	struct trace_buffer	*buffer;
8226 	void			*page;
8227 	int			cpu;
8228 	refcount_t		refcount;
8229 };
8230 
8231 static void buffer_ref_release(struct buffer_ref *ref)
8232 {
8233 	if (!refcount_dec_and_test(&ref->refcount))
8234 		return;
8235 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8236 	kfree(ref);
8237 }
8238 
8239 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8240 				    struct pipe_buffer *buf)
8241 {
8242 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8243 
8244 	buffer_ref_release(ref);
8245 	buf->private = 0;
8246 }
8247 
8248 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8249 				struct pipe_buffer *buf)
8250 {
8251 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8252 
8253 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8254 		return false;
8255 
8256 	refcount_inc(&ref->refcount);
8257 	return true;
8258 }
8259 
8260 /* Pipe buffer operations for a buffer. */
8261 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8262 	.release		= buffer_pipe_buf_release,
8263 	.get			= buffer_pipe_buf_get,
8264 };
8265 
8266 /*
8267  * Callback from splice_to_pipe(), if we need to release some pages
8268  * at the end of the spd in case we error'ed out in filling the pipe.
8269  */
8270 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8271 {
8272 	struct buffer_ref *ref =
8273 		(struct buffer_ref *)spd->partial[i].private;
8274 
8275 	buffer_ref_release(ref);
8276 	spd->partial[i].private = 0;
8277 }
8278 
8279 static ssize_t
8280 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8281 			    struct pipe_inode_info *pipe, size_t len,
8282 			    unsigned int flags)
8283 {
8284 	struct ftrace_buffer_info *info = file->private_data;
8285 	struct trace_iterator *iter = &info->iter;
8286 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8287 	struct page *pages_def[PIPE_DEF_BUFFERS];
8288 	struct splice_pipe_desc spd = {
8289 		.pages		= pages_def,
8290 		.partial	= partial_def,
8291 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8292 		.ops		= &buffer_pipe_buf_ops,
8293 		.spd_release	= buffer_spd_release,
8294 	};
8295 	struct buffer_ref *ref;
8296 	bool woken = false;
8297 	int page_size;
8298 	int entries, i;
8299 	ssize_t ret = 0;
8300 
8301 #ifdef CONFIG_TRACER_MAX_TRACE
8302 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8303 		return -EBUSY;
8304 #endif
8305 
8306 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8307 	if (*ppos & (page_size - 1))
8308 		return -EINVAL;
8309 
8310 	if (len & (page_size - 1)) {
8311 		if (len < page_size)
8312 			return -EINVAL;
8313 		len &= (~(page_size - 1));
8314 	}
8315 
8316 	if (splice_grow_spd(pipe, &spd))
8317 		return -ENOMEM;
8318 
8319  again:
8320 	trace_access_lock(iter->cpu_file);
8321 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8322 
8323 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8324 		struct page *page;
8325 		int r;
8326 
8327 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8328 		if (!ref) {
8329 			ret = -ENOMEM;
8330 			break;
8331 		}
8332 
8333 		refcount_set(&ref->refcount, 1);
8334 		ref->buffer = iter->array_buffer->buffer;
8335 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8336 		if (IS_ERR(ref->page)) {
8337 			ret = PTR_ERR(ref->page);
8338 			ref->page = NULL;
8339 			kfree(ref);
8340 			break;
8341 		}
8342 		ref->cpu = iter->cpu_file;
8343 
8344 		r = ring_buffer_read_page(ref->buffer, ref->page,
8345 					  len, iter->cpu_file, 1);
8346 		if (r < 0) {
8347 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8348 						   ref->page);
8349 			kfree(ref);
8350 			break;
8351 		}
8352 
8353 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8354 
8355 		spd.pages[i] = page;
8356 		spd.partial[i].len = page_size;
8357 		spd.partial[i].offset = 0;
8358 		spd.partial[i].private = (unsigned long)ref;
8359 		spd.nr_pages++;
8360 		*ppos += page_size;
8361 
8362 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8363 	}
8364 
8365 	trace_access_unlock(iter->cpu_file);
8366 	spd.nr_pages = i;
8367 
8368 	/* did we read anything? */
8369 	if (!spd.nr_pages) {
8370 
8371 		if (ret)
8372 			goto out;
8373 
8374 		if (woken)
8375 			goto out;
8376 
8377 		ret = -EAGAIN;
8378 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8379 			goto out;
8380 
8381 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8382 		if (ret)
8383 			goto out;
8384 
8385 		/* No need to wait after waking up when tracing is off */
8386 		if (!tracer_tracing_is_on(iter->tr))
8387 			goto out;
8388 
8389 		/* Iterate one more time to collect any new data then exit */
8390 		woken = true;
8391 
8392 		goto again;
8393 	}
8394 
8395 	ret = splice_to_pipe(pipe, &spd);
8396 out:
8397 	splice_shrink_spd(&spd);
8398 
8399 	return ret;
8400 }
8401 
8402 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8403 {
8404 	struct ftrace_buffer_info *info = file->private_data;
8405 	struct trace_iterator *iter = &info->iter;
8406 	int err;
8407 
8408 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8409 		if (!(file->f_flags & O_NONBLOCK)) {
8410 			err = ring_buffer_wait(iter->array_buffer->buffer,
8411 					       iter->cpu_file,
8412 					       iter->tr->buffer_percent,
8413 					       NULL, NULL);
8414 			if (err)
8415 				return err;
8416 		}
8417 
8418 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8419 						  iter->cpu_file);
8420 	} else if (cmd) {
8421 		return -ENOTTY;
8422 	}
8423 
8424 	/*
8425 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8426 	 * waiters
8427 	 */
8428 	mutex_lock(&trace_types_lock);
8429 
8430 	/* Make sure the waiters see the new wait_index */
8431 	(void)atomic_fetch_inc_release(&iter->wait_index);
8432 
8433 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8434 
8435 	mutex_unlock(&trace_types_lock);
8436 	return 0;
8437 }
8438 
8439 #ifdef CONFIG_TRACER_MAX_TRACE
8440 static int get_snapshot_map(struct trace_array *tr)
8441 {
8442 	int err = 0;
8443 
8444 	/*
8445 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8446 	 * take trace_types_lock. Instead use the specific
8447 	 * snapshot_trigger_lock.
8448 	 */
8449 	spin_lock(&tr->snapshot_trigger_lock);
8450 
8451 	if (tr->snapshot || tr->mapped == UINT_MAX)
8452 		err = -EBUSY;
8453 	else
8454 		tr->mapped++;
8455 
8456 	spin_unlock(&tr->snapshot_trigger_lock);
8457 
8458 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8459 	if (tr->mapped == 1)
8460 		synchronize_rcu();
8461 
8462 	return err;
8463 
8464 }
8465 static void put_snapshot_map(struct trace_array *tr)
8466 {
8467 	spin_lock(&tr->snapshot_trigger_lock);
8468 	if (!WARN_ON(!tr->mapped))
8469 		tr->mapped--;
8470 	spin_unlock(&tr->snapshot_trigger_lock);
8471 }
8472 #else
8473 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8474 static inline void put_snapshot_map(struct trace_array *tr) { }
8475 #endif
8476 
8477 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8478 {
8479 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8480 	struct trace_iterator *iter = &info->iter;
8481 
8482 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8483 	put_snapshot_map(iter->tr);
8484 }
8485 
8486 static const struct vm_operations_struct tracing_buffers_vmops = {
8487 	.close		= tracing_buffers_mmap_close,
8488 };
8489 
8490 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8491 {
8492 	struct ftrace_buffer_info *info = filp->private_data;
8493 	struct trace_iterator *iter = &info->iter;
8494 	int ret = 0;
8495 
8496 	/* A memmap'ed buffer is not supported for user space mmap */
8497 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8498 		return -ENODEV;
8499 
8500 	/* Currently the boot mapped buffer is not supported for mmap */
8501 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8502 		return -ENODEV;
8503 
8504 	ret = get_snapshot_map(iter->tr);
8505 	if (ret)
8506 		return ret;
8507 
8508 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8509 	if (ret)
8510 		put_snapshot_map(iter->tr);
8511 
8512 	vma->vm_ops = &tracing_buffers_vmops;
8513 
8514 	return ret;
8515 }
8516 
8517 static const struct file_operations tracing_buffers_fops = {
8518 	.open		= tracing_buffers_open,
8519 	.read		= tracing_buffers_read,
8520 	.poll		= tracing_buffers_poll,
8521 	.release	= tracing_buffers_release,
8522 	.flush		= tracing_buffers_flush,
8523 	.splice_read	= tracing_buffers_splice_read,
8524 	.unlocked_ioctl = tracing_buffers_ioctl,
8525 	.mmap		= tracing_buffers_mmap,
8526 };
8527 
8528 static ssize_t
8529 tracing_stats_read(struct file *filp, char __user *ubuf,
8530 		   size_t count, loff_t *ppos)
8531 {
8532 	struct inode *inode = file_inode(filp);
8533 	struct trace_array *tr = inode->i_private;
8534 	struct array_buffer *trace_buf = &tr->array_buffer;
8535 	int cpu = tracing_get_cpu(inode);
8536 	struct trace_seq *s;
8537 	unsigned long cnt;
8538 	unsigned long long t;
8539 	unsigned long usec_rem;
8540 
8541 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8542 	if (!s)
8543 		return -ENOMEM;
8544 
8545 	trace_seq_init(s);
8546 
8547 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8548 	trace_seq_printf(s, "entries: %ld\n", cnt);
8549 
8550 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8551 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8552 
8553 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8554 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8555 
8556 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8557 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8558 
8559 	if (trace_clocks[tr->clock_id].in_ns) {
8560 		/* local or global for trace_clock */
8561 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8562 		usec_rem = do_div(t, USEC_PER_SEC);
8563 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8564 								t, usec_rem);
8565 
8566 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8567 		usec_rem = do_div(t, USEC_PER_SEC);
8568 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8569 	} else {
8570 		/* counter or tsc mode for trace_clock */
8571 		trace_seq_printf(s, "oldest event ts: %llu\n",
8572 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8573 
8574 		trace_seq_printf(s, "now ts: %llu\n",
8575 				ring_buffer_time_stamp(trace_buf->buffer));
8576 	}
8577 
8578 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8579 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8580 
8581 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8582 	trace_seq_printf(s, "read events: %ld\n", cnt);
8583 
8584 	count = simple_read_from_buffer(ubuf, count, ppos,
8585 					s->buffer, trace_seq_used(s));
8586 
8587 	kfree(s);
8588 
8589 	return count;
8590 }
8591 
8592 static const struct file_operations tracing_stats_fops = {
8593 	.open		= tracing_open_generic_tr,
8594 	.read		= tracing_stats_read,
8595 	.llseek		= generic_file_llseek,
8596 	.release	= tracing_release_generic_tr,
8597 };
8598 
8599 #ifdef CONFIG_DYNAMIC_FTRACE
8600 
8601 static ssize_t
8602 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8603 		  size_t cnt, loff_t *ppos)
8604 {
8605 	ssize_t ret;
8606 	char *buf;
8607 	int r;
8608 
8609 	/* 512 should be plenty to hold the amount needed */
8610 #define DYN_INFO_BUF_SIZE	512
8611 
8612 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8613 	if (!buf)
8614 		return -ENOMEM;
8615 
8616 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8617 		      "%ld pages:%ld groups: %ld\n"
8618 		      "ftrace boot update time = %llu (ns)\n"
8619 		      "ftrace module total update time = %llu (ns)\n",
8620 		      ftrace_update_tot_cnt,
8621 		      ftrace_number_of_pages,
8622 		      ftrace_number_of_groups,
8623 		      ftrace_update_time,
8624 		      ftrace_total_mod_time);
8625 
8626 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8627 	kfree(buf);
8628 	return ret;
8629 }
8630 
8631 static const struct file_operations tracing_dyn_info_fops = {
8632 	.open		= tracing_open_generic,
8633 	.read		= tracing_read_dyn_info,
8634 	.llseek		= generic_file_llseek,
8635 };
8636 #endif /* CONFIG_DYNAMIC_FTRACE */
8637 
8638 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8639 static void
8640 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8641 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8642 		void *data)
8643 {
8644 	tracing_snapshot_instance(tr);
8645 }
8646 
8647 static void
8648 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8649 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8650 		      void *data)
8651 {
8652 	struct ftrace_func_mapper *mapper = data;
8653 	long *count = NULL;
8654 
8655 	if (mapper)
8656 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8657 
8658 	if (count) {
8659 
8660 		if (*count <= 0)
8661 			return;
8662 
8663 		(*count)--;
8664 	}
8665 
8666 	tracing_snapshot_instance(tr);
8667 }
8668 
8669 static int
8670 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8671 		      struct ftrace_probe_ops *ops, void *data)
8672 {
8673 	struct ftrace_func_mapper *mapper = data;
8674 	long *count = NULL;
8675 
8676 	seq_printf(m, "%ps:", (void *)ip);
8677 
8678 	seq_puts(m, "snapshot");
8679 
8680 	if (mapper)
8681 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8682 
8683 	if (count)
8684 		seq_printf(m, ":count=%ld\n", *count);
8685 	else
8686 		seq_puts(m, ":unlimited\n");
8687 
8688 	return 0;
8689 }
8690 
8691 static int
8692 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8693 		     unsigned long ip, void *init_data, void **data)
8694 {
8695 	struct ftrace_func_mapper *mapper = *data;
8696 
8697 	if (!mapper) {
8698 		mapper = allocate_ftrace_func_mapper();
8699 		if (!mapper)
8700 			return -ENOMEM;
8701 		*data = mapper;
8702 	}
8703 
8704 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8705 }
8706 
8707 static void
8708 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8709 		     unsigned long ip, void *data)
8710 {
8711 	struct ftrace_func_mapper *mapper = data;
8712 
8713 	if (!ip) {
8714 		if (!mapper)
8715 			return;
8716 		free_ftrace_func_mapper(mapper, NULL);
8717 		return;
8718 	}
8719 
8720 	ftrace_func_mapper_remove_ip(mapper, ip);
8721 }
8722 
8723 static struct ftrace_probe_ops snapshot_probe_ops = {
8724 	.func			= ftrace_snapshot,
8725 	.print			= ftrace_snapshot_print,
8726 };
8727 
8728 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8729 	.func			= ftrace_count_snapshot,
8730 	.print			= ftrace_snapshot_print,
8731 	.init			= ftrace_snapshot_init,
8732 	.free			= ftrace_snapshot_free,
8733 };
8734 
8735 static int
8736 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8737 			       char *glob, char *cmd, char *param, int enable)
8738 {
8739 	struct ftrace_probe_ops *ops;
8740 	void *count = (void *)-1;
8741 	char *number;
8742 	int ret;
8743 
8744 	if (!tr)
8745 		return -ENODEV;
8746 
8747 	/* hash funcs only work with set_ftrace_filter */
8748 	if (!enable)
8749 		return -EINVAL;
8750 
8751 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8752 
8753 	if (glob[0] == '!') {
8754 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8755 		if (!ret)
8756 			tracing_disarm_snapshot(tr);
8757 
8758 		return ret;
8759 	}
8760 
8761 	if (!param)
8762 		goto out_reg;
8763 
8764 	number = strsep(&param, ":");
8765 
8766 	if (!strlen(number))
8767 		goto out_reg;
8768 
8769 	/*
8770 	 * We use the callback data field (which is a pointer)
8771 	 * as our counter.
8772 	 */
8773 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8774 	if (ret)
8775 		return ret;
8776 
8777  out_reg:
8778 	ret = tracing_arm_snapshot(tr);
8779 	if (ret < 0)
8780 		goto out;
8781 
8782 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8783 	if (ret < 0)
8784 		tracing_disarm_snapshot(tr);
8785  out:
8786 	return ret < 0 ? ret : 0;
8787 }
8788 
8789 static struct ftrace_func_command ftrace_snapshot_cmd = {
8790 	.name			= "snapshot",
8791 	.func			= ftrace_trace_snapshot_callback,
8792 };
8793 
8794 static __init int register_snapshot_cmd(void)
8795 {
8796 	return register_ftrace_command(&ftrace_snapshot_cmd);
8797 }
8798 #else
8799 static inline __init int register_snapshot_cmd(void) { return 0; }
8800 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8801 
8802 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8803 {
8804 	if (WARN_ON(!tr->dir))
8805 		return ERR_PTR(-ENODEV);
8806 
8807 	/* Top directory uses NULL as the parent */
8808 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8809 		return NULL;
8810 
8811 	/* All sub buffers have a descriptor */
8812 	return tr->dir;
8813 }
8814 
8815 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8816 {
8817 	struct dentry *d_tracer;
8818 
8819 	if (tr->percpu_dir)
8820 		return tr->percpu_dir;
8821 
8822 	d_tracer = tracing_get_dentry(tr);
8823 	if (IS_ERR(d_tracer))
8824 		return NULL;
8825 
8826 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8827 
8828 	MEM_FAIL(!tr->percpu_dir,
8829 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8830 
8831 	return tr->percpu_dir;
8832 }
8833 
8834 static struct dentry *
8835 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8836 		      void *data, long cpu, const struct file_operations *fops)
8837 {
8838 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8839 
8840 	if (ret) /* See tracing_get_cpu() */
8841 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8842 	return ret;
8843 }
8844 
8845 static void
8846 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8847 {
8848 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8849 	struct dentry *d_cpu;
8850 	char cpu_dir[30]; /* 30 characters should be more than enough */
8851 
8852 	if (!d_percpu)
8853 		return;
8854 
8855 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8856 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8857 	if (!d_cpu) {
8858 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8859 		return;
8860 	}
8861 
8862 	/* per cpu trace_pipe */
8863 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8864 				tr, cpu, &tracing_pipe_fops);
8865 
8866 	/* per cpu trace */
8867 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8868 				tr, cpu, &tracing_fops);
8869 
8870 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8871 				tr, cpu, &tracing_buffers_fops);
8872 
8873 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8874 				tr, cpu, &tracing_stats_fops);
8875 
8876 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8877 				tr, cpu, &tracing_entries_fops);
8878 
8879 	if (tr->range_addr_start)
8880 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8881 				      tr, cpu, &tracing_buffer_meta_fops);
8882 #ifdef CONFIG_TRACER_SNAPSHOT
8883 	if (!tr->range_addr_start) {
8884 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8885 				      tr, cpu, &snapshot_fops);
8886 
8887 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8888 				      tr, cpu, &snapshot_raw_fops);
8889 	}
8890 #endif
8891 }
8892 
8893 #ifdef CONFIG_FTRACE_SELFTEST
8894 /* Let selftest have access to static functions in this file */
8895 #include "trace_selftest.c"
8896 #endif
8897 
8898 static ssize_t
8899 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8900 			loff_t *ppos)
8901 {
8902 	struct trace_option_dentry *topt = filp->private_data;
8903 	char *buf;
8904 
8905 	if (topt->flags->val & topt->opt->bit)
8906 		buf = "1\n";
8907 	else
8908 		buf = "0\n";
8909 
8910 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8911 }
8912 
8913 static ssize_t
8914 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8915 			 loff_t *ppos)
8916 {
8917 	struct trace_option_dentry *topt = filp->private_data;
8918 	unsigned long val;
8919 	int ret;
8920 
8921 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8922 	if (ret)
8923 		return ret;
8924 
8925 	if (val != 0 && val != 1)
8926 		return -EINVAL;
8927 
8928 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8929 		mutex_lock(&trace_types_lock);
8930 		ret = __set_tracer_option(topt->tr, topt->flags,
8931 					  topt->opt, !val);
8932 		mutex_unlock(&trace_types_lock);
8933 		if (ret)
8934 			return ret;
8935 	}
8936 
8937 	*ppos += cnt;
8938 
8939 	return cnt;
8940 }
8941 
8942 static int tracing_open_options(struct inode *inode, struct file *filp)
8943 {
8944 	struct trace_option_dentry *topt = inode->i_private;
8945 	int ret;
8946 
8947 	ret = tracing_check_open_get_tr(topt->tr);
8948 	if (ret)
8949 		return ret;
8950 
8951 	filp->private_data = inode->i_private;
8952 	return 0;
8953 }
8954 
8955 static int tracing_release_options(struct inode *inode, struct file *file)
8956 {
8957 	struct trace_option_dentry *topt = file->private_data;
8958 
8959 	trace_array_put(topt->tr);
8960 	return 0;
8961 }
8962 
8963 static const struct file_operations trace_options_fops = {
8964 	.open = tracing_open_options,
8965 	.read = trace_options_read,
8966 	.write = trace_options_write,
8967 	.llseek	= generic_file_llseek,
8968 	.release = tracing_release_options,
8969 };
8970 
8971 /*
8972  * In order to pass in both the trace_array descriptor as well as the index
8973  * to the flag that the trace option file represents, the trace_array
8974  * has a character array of trace_flags_index[], which holds the index
8975  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8976  * The address of this character array is passed to the flag option file
8977  * read/write callbacks.
8978  *
8979  * In order to extract both the index and the trace_array descriptor,
8980  * get_tr_index() uses the following algorithm.
8981  *
8982  *   idx = *ptr;
8983  *
8984  * As the pointer itself contains the address of the index (remember
8985  * index[1] == 1).
8986  *
8987  * Then to get the trace_array descriptor, by subtracting that index
8988  * from the ptr, we get to the start of the index itself.
8989  *
8990  *   ptr - idx == &index[0]
8991  *
8992  * Then a simple container_of() from that pointer gets us to the
8993  * trace_array descriptor.
8994  */
8995 static void get_tr_index(void *data, struct trace_array **ptr,
8996 			 unsigned int *pindex)
8997 {
8998 	*pindex = *(unsigned char *)data;
8999 
9000 	*ptr = container_of(data - *pindex, struct trace_array,
9001 			    trace_flags_index);
9002 }
9003 
9004 static ssize_t
9005 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9006 			loff_t *ppos)
9007 {
9008 	void *tr_index = filp->private_data;
9009 	struct trace_array *tr;
9010 	unsigned int index;
9011 	char *buf;
9012 
9013 	get_tr_index(tr_index, &tr, &index);
9014 
9015 	if (tr->trace_flags & (1 << index))
9016 		buf = "1\n";
9017 	else
9018 		buf = "0\n";
9019 
9020 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9021 }
9022 
9023 static ssize_t
9024 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9025 			 loff_t *ppos)
9026 {
9027 	void *tr_index = filp->private_data;
9028 	struct trace_array *tr;
9029 	unsigned int index;
9030 	unsigned long val;
9031 	int ret;
9032 
9033 	get_tr_index(tr_index, &tr, &index);
9034 
9035 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9036 	if (ret)
9037 		return ret;
9038 
9039 	if (val != 0 && val != 1)
9040 		return -EINVAL;
9041 
9042 	mutex_lock(&event_mutex);
9043 	mutex_lock(&trace_types_lock);
9044 	ret = set_tracer_flag(tr, 1 << index, val);
9045 	mutex_unlock(&trace_types_lock);
9046 	mutex_unlock(&event_mutex);
9047 
9048 	if (ret < 0)
9049 		return ret;
9050 
9051 	*ppos += cnt;
9052 
9053 	return cnt;
9054 }
9055 
9056 static const struct file_operations trace_options_core_fops = {
9057 	.open = tracing_open_generic,
9058 	.read = trace_options_core_read,
9059 	.write = trace_options_core_write,
9060 	.llseek = generic_file_llseek,
9061 };
9062 
9063 struct dentry *trace_create_file(const char *name,
9064 				 umode_t mode,
9065 				 struct dentry *parent,
9066 				 void *data,
9067 				 const struct file_operations *fops)
9068 {
9069 	struct dentry *ret;
9070 
9071 	ret = tracefs_create_file(name, mode, parent, data, fops);
9072 	if (!ret)
9073 		pr_warn("Could not create tracefs '%s' entry\n", name);
9074 
9075 	return ret;
9076 }
9077 
9078 
9079 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9080 {
9081 	struct dentry *d_tracer;
9082 
9083 	if (tr->options)
9084 		return tr->options;
9085 
9086 	d_tracer = tracing_get_dentry(tr);
9087 	if (IS_ERR(d_tracer))
9088 		return NULL;
9089 
9090 	tr->options = tracefs_create_dir("options", d_tracer);
9091 	if (!tr->options) {
9092 		pr_warn("Could not create tracefs directory 'options'\n");
9093 		return NULL;
9094 	}
9095 
9096 	return tr->options;
9097 }
9098 
9099 static void
9100 create_trace_option_file(struct trace_array *tr,
9101 			 struct trace_option_dentry *topt,
9102 			 struct tracer_flags *flags,
9103 			 struct tracer_opt *opt)
9104 {
9105 	struct dentry *t_options;
9106 
9107 	t_options = trace_options_init_dentry(tr);
9108 	if (!t_options)
9109 		return;
9110 
9111 	topt->flags = flags;
9112 	topt->opt = opt;
9113 	topt->tr = tr;
9114 
9115 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9116 					t_options, topt, &trace_options_fops);
9117 
9118 }
9119 
9120 static void
9121 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9122 {
9123 	struct trace_option_dentry *topts;
9124 	struct trace_options *tr_topts;
9125 	struct tracer_flags *flags;
9126 	struct tracer_opt *opts;
9127 	int cnt;
9128 	int i;
9129 
9130 	if (!tracer)
9131 		return;
9132 
9133 	flags = tracer->flags;
9134 
9135 	if (!flags || !flags->opts)
9136 		return;
9137 
9138 	/*
9139 	 * If this is an instance, only create flags for tracers
9140 	 * the instance may have.
9141 	 */
9142 	if (!trace_ok_for_array(tracer, tr))
9143 		return;
9144 
9145 	for (i = 0; i < tr->nr_topts; i++) {
9146 		/* Make sure there's no duplicate flags. */
9147 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9148 			return;
9149 	}
9150 
9151 	opts = flags->opts;
9152 
9153 	for (cnt = 0; opts[cnt].name; cnt++)
9154 		;
9155 
9156 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9157 	if (!topts)
9158 		return;
9159 
9160 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9161 			    GFP_KERNEL);
9162 	if (!tr_topts) {
9163 		kfree(topts);
9164 		return;
9165 	}
9166 
9167 	tr->topts = tr_topts;
9168 	tr->topts[tr->nr_topts].tracer = tracer;
9169 	tr->topts[tr->nr_topts].topts = topts;
9170 	tr->nr_topts++;
9171 
9172 	for (cnt = 0; opts[cnt].name; cnt++) {
9173 		create_trace_option_file(tr, &topts[cnt], flags,
9174 					 &opts[cnt]);
9175 		MEM_FAIL(topts[cnt].entry == NULL,
9176 			  "Failed to create trace option: %s",
9177 			  opts[cnt].name);
9178 	}
9179 }
9180 
9181 static struct dentry *
9182 create_trace_option_core_file(struct trace_array *tr,
9183 			      const char *option, long index)
9184 {
9185 	struct dentry *t_options;
9186 
9187 	t_options = trace_options_init_dentry(tr);
9188 	if (!t_options)
9189 		return NULL;
9190 
9191 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9192 				 (void *)&tr->trace_flags_index[index],
9193 				 &trace_options_core_fops);
9194 }
9195 
9196 static void create_trace_options_dir(struct trace_array *tr)
9197 {
9198 	struct dentry *t_options;
9199 	bool top_level = tr == &global_trace;
9200 	int i;
9201 
9202 	t_options = trace_options_init_dentry(tr);
9203 	if (!t_options)
9204 		return;
9205 
9206 	for (i = 0; trace_options[i]; i++) {
9207 		if (top_level ||
9208 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9209 			create_trace_option_core_file(tr, trace_options[i], i);
9210 	}
9211 }
9212 
9213 static ssize_t
9214 rb_simple_read(struct file *filp, char __user *ubuf,
9215 	       size_t cnt, loff_t *ppos)
9216 {
9217 	struct trace_array *tr = filp->private_data;
9218 	char buf[64];
9219 	int r;
9220 
9221 	r = tracer_tracing_is_on(tr);
9222 	r = sprintf(buf, "%d\n", r);
9223 
9224 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9225 }
9226 
9227 static ssize_t
9228 rb_simple_write(struct file *filp, const char __user *ubuf,
9229 		size_t cnt, loff_t *ppos)
9230 {
9231 	struct trace_array *tr = filp->private_data;
9232 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9233 	unsigned long val;
9234 	int ret;
9235 
9236 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9237 	if (ret)
9238 		return ret;
9239 
9240 	if (buffer) {
9241 		mutex_lock(&trace_types_lock);
9242 		if (!!val == tracer_tracing_is_on(tr)) {
9243 			val = 0; /* do nothing */
9244 		} else if (val) {
9245 			tracer_tracing_on(tr);
9246 			if (tr->current_trace->start)
9247 				tr->current_trace->start(tr);
9248 		} else {
9249 			tracer_tracing_off(tr);
9250 			if (tr->current_trace->stop)
9251 				tr->current_trace->stop(tr);
9252 			/* Wake up any waiters */
9253 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9254 		}
9255 		mutex_unlock(&trace_types_lock);
9256 	}
9257 
9258 	(*ppos)++;
9259 
9260 	return cnt;
9261 }
9262 
9263 static const struct file_operations rb_simple_fops = {
9264 	.open		= tracing_open_generic_tr,
9265 	.read		= rb_simple_read,
9266 	.write		= rb_simple_write,
9267 	.release	= tracing_release_generic_tr,
9268 	.llseek		= default_llseek,
9269 };
9270 
9271 static ssize_t
9272 buffer_percent_read(struct file *filp, char __user *ubuf,
9273 		    size_t cnt, loff_t *ppos)
9274 {
9275 	struct trace_array *tr = filp->private_data;
9276 	char buf[64];
9277 	int r;
9278 
9279 	r = tr->buffer_percent;
9280 	r = sprintf(buf, "%d\n", r);
9281 
9282 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9283 }
9284 
9285 static ssize_t
9286 buffer_percent_write(struct file *filp, const char __user *ubuf,
9287 		     size_t cnt, loff_t *ppos)
9288 {
9289 	struct trace_array *tr = filp->private_data;
9290 	unsigned long val;
9291 	int ret;
9292 
9293 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9294 	if (ret)
9295 		return ret;
9296 
9297 	if (val > 100)
9298 		return -EINVAL;
9299 
9300 	tr->buffer_percent = val;
9301 
9302 	(*ppos)++;
9303 
9304 	return cnt;
9305 }
9306 
9307 static const struct file_operations buffer_percent_fops = {
9308 	.open		= tracing_open_generic_tr,
9309 	.read		= buffer_percent_read,
9310 	.write		= buffer_percent_write,
9311 	.release	= tracing_release_generic_tr,
9312 	.llseek		= default_llseek,
9313 };
9314 
9315 static ssize_t
9316 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9317 {
9318 	struct trace_array *tr = filp->private_data;
9319 	size_t size;
9320 	char buf[64];
9321 	int order;
9322 	int r;
9323 
9324 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9325 	size = (PAGE_SIZE << order) / 1024;
9326 
9327 	r = sprintf(buf, "%zd\n", size);
9328 
9329 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9330 }
9331 
9332 static ssize_t
9333 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9334 			 size_t cnt, loff_t *ppos)
9335 {
9336 	struct trace_array *tr = filp->private_data;
9337 	unsigned long val;
9338 	int old_order;
9339 	int order;
9340 	int pages;
9341 	int ret;
9342 
9343 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9344 	if (ret)
9345 		return ret;
9346 
9347 	val *= 1024; /* value passed in is in KB */
9348 
9349 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9350 	order = fls(pages - 1);
9351 
9352 	/* limit between 1 and 128 system pages */
9353 	if (order < 0 || order > 7)
9354 		return -EINVAL;
9355 
9356 	/* Do not allow tracing while changing the order of the ring buffer */
9357 	tracing_stop_tr(tr);
9358 
9359 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9360 	if (old_order == order)
9361 		goto out;
9362 
9363 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9364 	if (ret)
9365 		goto out;
9366 
9367 #ifdef CONFIG_TRACER_MAX_TRACE
9368 
9369 	if (!tr->allocated_snapshot)
9370 		goto out_max;
9371 
9372 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9373 	if (ret) {
9374 		/* Put back the old order */
9375 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9376 		if (WARN_ON_ONCE(cnt)) {
9377 			/*
9378 			 * AARGH! We are left with different orders!
9379 			 * The max buffer is our "snapshot" buffer.
9380 			 * When a tracer needs a snapshot (one of the
9381 			 * latency tracers), it swaps the max buffer
9382 			 * with the saved snap shot. We succeeded to
9383 			 * update the order of the main buffer, but failed to
9384 			 * update the order of the max buffer. But when we tried
9385 			 * to reset the main buffer to the original size, we
9386 			 * failed there too. This is very unlikely to
9387 			 * happen, but if it does, warn and kill all
9388 			 * tracing.
9389 			 */
9390 			tracing_disabled = 1;
9391 		}
9392 		goto out;
9393 	}
9394  out_max:
9395 #endif
9396 	(*ppos)++;
9397  out:
9398 	if (ret)
9399 		cnt = ret;
9400 	tracing_start_tr(tr);
9401 	return cnt;
9402 }
9403 
9404 static const struct file_operations buffer_subbuf_size_fops = {
9405 	.open		= tracing_open_generic_tr,
9406 	.read		= buffer_subbuf_size_read,
9407 	.write		= buffer_subbuf_size_write,
9408 	.release	= tracing_release_generic_tr,
9409 	.llseek		= default_llseek,
9410 };
9411 
9412 static struct dentry *trace_instance_dir;
9413 
9414 static void
9415 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9416 
9417 #ifdef CONFIG_MODULES
9418 static int make_mod_delta(struct module *mod, void *data)
9419 {
9420 	struct trace_module_delta *module_delta;
9421 	struct trace_scratch *tscratch;
9422 	struct trace_mod_entry *entry;
9423 	struct trace_array *tr = data;
9424 	int i;
9425 
9426 	tscratch = tr->scratch;
9427 	module_delta = READ_ONCE(tr->module_delta);
9428 	for (i = 0; i < tscratch->nr_entries; i++) {
9429 		entry = &tscratch->entries[i];
9430 		if (strcmp(mod->name, entry->mod_name))
9431 			continue;
9432 		if (mod->state == MODULE_STATE_GOING)
9433 			module_delta->delta[i] = 0;
9434 		else
9435 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9436 						 - entry->mod_addr;
9437 		break;
9438 	}
9439 	return 0;
9440 }
9441 #else
9442 static int make_mod_delta(struct module *mod, void *data)
9443 {
9444 	return 0;
9445 }
9446 #endif
9447 
9448 static int mod_addr_comp(const void *a, const void *b, const void *data)
9449 {
9450 	const struct trace_mod_entry *e1 = a;
9451 	const struct trace_mod_entry *e2 = b;
9452 
9453 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9454 }
9455 
9456 static void setup_trace_scratch(struct trace_array *tr,
9457 				struct trace_scratch *tscratch, unsigned int size)
9458 {
9459 	struct trace_module_delta *module_delta;
9460 	struct trace_mod_entry *entry;
9461 	int i, nr_entries;
9462 
9463 	if (!tscratch)
9464 		return;
9465 
9466 	tr->scratch = tscratch;
9467 	tr->scratch_size = size;
9468 
9469 	if (tscratch->text_addr)
9470 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9471 
9472 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9473 		goto reset;
9474 
9475 	/* Check if each module name is a valid string */
9476 	for (i = 0; i < tscratch->nr_entries; i++) {
9477 		int n;
9478 
9479 		entry = &tscratch->entries[i];
9480 
9481 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9482 			if (entry->mod_name[n] == '\0')
9483 				break;
9484 			if (!isprint(entry->mod_name[n]))
9485 				goto reset;
9486 		}
9487 		if (n == MODULE_NAME_LEN)
9488 			goto reset;
9489 	}
9490 
9491 	/* Sort the entries so that we can find appropriate module from address. */
9492 	nr_entries = tscratch->nr_entries;
9493 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9494 	       mod_addr_comp, NULL, NULL);
9495 
9496 	if (IS_ENABLED(CONFIG_MODULES)) {
9497 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9498 		if (!module_delta) {
9499 			pr_info("module_delta allocation failed. Not able to decode module address.");
9500 			goto reset;
9501 		}
9502 		init_rcu_head(&module_delta->rcu);
9503 	} else
9504 		module_delta = NULL;
9505 	WRITE_ONCE(tr->module_delta, module_delta);
9506 
9507 	/* Scan modules to make text delta for modules. */
9508 	module_for_each_mod(make_mod_delta, tr);
9509 	return;
9510  reset:
9511 	/* Invalid trace modules */
9512 	memset(tscratch, 0, size);
9513 }
9514 
9515 static int
9516 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9517 {
9518 	enum ring_buffer_flags rb_flags;
9519 	struct trace_scratch *tscratch;
9520 	unsigned int scratch_size = 0;
9521 
9522 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9523 
9524 	buf->tr = tr;
9525 
9526 	if (tr->range_addr_start && tr->range_addr_size) {
9527 		/* Add scratch buffer to handle 128 modules */
9528 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9529 						      tr->range_addr_start,
9530 						      tr->range_addr_size,
9531 						      struct_size(tscratch, entries, 128));
9532 
9533 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9534 		setup_trace_scratch(tr, tscratch, scratch_size);
9535 
9536 		/*
9537 		 * This is basically the same as a mapped buffer,
9538 		 * with the same restrictions.
9539 		 */
9540 		tr->mapped++;
9541 	} else {
9542 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9543 	}
9544 	if (!buf->buffer)
9545 		return -ENOMEM;
9546 
9547 	buf->data = alloc_percpu(struct trace_array_cpu);
9548 	if (!buf->data) {
9549 		ring_buffer_free(buf->buffer);
9550 		buf->buffer = NULL;
9551 		return -ENOMEM;
9552 	}
9553 
9554 	/* Allocate the first page for all buffers */
9555 	set_buffer_entries(&tr->array_buffer,
9556 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9557 
9558 	return 0;
9559 }
9560 
9561 static void free_trace_buffer(struct array_buffer *buf)
9562 {
9563 	if (buf->buffer) {
9564 		ring_buffer_free(buf->buffer);
9565 		buf->buffer = NULL;
9566 		free_percpu(buf->data);
9567 		buf->data = NULL;
9568 	}
9569 }
9570 
9571 static int allocate_trace_buffers(struct trace_array *tr, int size)
9572 {
9573 	int ret;
9574 
9575 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9576 	if (ret)
9577 		return ret;
9578 
9579 #ifdef CONFIG_TRACER_MAX_TRACE
9580 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9581 	if (tr->range_addr_start)
9582 		return 0;
9583 
9584 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9585 				    allocate_snapshot ? size : 1);
9586 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9587 		free_trace_buffer(&tr->array_buffer);
9588 		return -ENOMEM;
9589 	}
9590 	tr->allocated_snapshot = allocate_snapshot;
9591 
9592 	allocate_snapshot = false;
9593 #endif
9594 
9595 	return 0;
9596 }
9597 
9598 static void free_trace_buffers(struct trace_array *tr)
9599 {
9600 	if (!tr)
9601 		return;
9602 
9603 	free_trace_buffer(&tr->array_buffer);
9604 
9605 #ifdef CONFIG_TRACER_MAX_TRACE
9606 	free_trace_buffer(&tr->max_buffer);
9607 #endif
9608 }
9609 
9610 static void init_trace_flags_index(struct trace_array *tr)
9611 {
9612 	int i;
9613 
9614 	/* Used by the trace options files */
9615 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9616 		tr->trace_flags_index[i] = i;
9617 }
9618 
9619 static void __update_tracer_options(struct trace_array *tr)
9620 {
9621 	struct tracer *t;
9622 
9623 	for (t = trace_types; t; t = t->next)
9624 		add_tracer_options(tr, t);
9625 }
9626 
9627 static void update_tracer_options(struct trace_array *tr)
9628 {
9629 	mutex_lock(&trace_types_lock);
9630 	tracer_options_updated = true;
9631 	__update_tracer_options(tr);
9632 	mutex_unlock(&trace_types_lock);
9633 }
9634 
9635 /* Must have trace_types_lock held */
9636 struct trace_array *trace_array_find(const char *instance)
9637 {
9638 	struct trace_array *tr, *found = NULL;
9639 
9640 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9641 		if (tr->name && strcmp(tr->name, instance) == 0) {
9642 			found = tr;
9643 			break;
9644 		}
9645 	}
9646 
9647 	return found;
9648 }
9649 
9650 struct trace_array *trace_array_find_get(const char *instance)
9651 {
9652 	struct trace_array *tr;
9653 
9654 	mutex_lock(&trace_types_lock);
9655 	tr = trace_array_find(instance);
9656 	if (tr)
9657 		tr->ref++;
9658 	mutex_unlock(&trace_types_lock);
9659 
9660 	return tr;
9661 }
9662 
9663 static int trace_array_create_dir(struct trace_array *tr)
9664 {
9665 	int ret;
9666 
9667 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9668 	if (!tr->dir)
9669 		return -EINVAL;
9670 
9671 	ret = event_trace_add_tracer(tr->dir, tr);
9672 	if (ret) {
9673 		tracefs_remove(tr->dir);
9674 		return ret;
9675 	}
9676 
9677 	init_tracer_tracefs(tr, tr->dir);
9678 	__update_tracer_options(tr);
9679 
9680 	return ret;
9681 }
9682 
9683 static struct trace_array *
9684 trace_array_create_systems(const char *name, const char *systems,
9685 			   unsigned long range_addr_start,
9686 			   unsigned long range_addr_size)
9687 {
9688 	struct trace_array *tr;
9689 	int ret;
9690 
9691 	ret = -ENOMEM;
9692 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9693 	if (!tr)
9694 		return ERR_PTR(ret);
9695 
9696 	tr->name = kstrdup(name, GFP_KERNEL);
9697 	if (!tr->name)
9698 		goto out_free_tr;
9699 
9700 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9701 		goto out_free_tr;
9702 
9703 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9704 		goto out_free_tr;
9705 
9706 	if (systems) {
9707 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9708 		if (!tr->system_names)
9709 			goto out_free_tr;
9710 	}
9711 
9712 	/* Only for boot up memory mapped ring buffers */
9713 	tr->range_addr_start = range_addr_start;
9714 	tr->range_addr_size = range_addr_size;
9715 
9716 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9717 
9718 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9719 
9720 	raw_spin_lock_init(&tr->start_lock);
9721 
9722 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9723 #ifdef CONFIG_TRACER_MAX_TRACE
9724 	spin_lock_init(&tr->snapshot_trigger_lock);
9725 #endif
9726 	tr->current_trace = &nop_trace;
9727 
9728 	INIT_LIST_HEAD(&tr->systems);
9729 	INIT_LIST_HEAD(&tr->events);
9730 	INIT_LIST_HEAD(&tr->hist_vars);
9731 	INIT_LIST_HEAD(&tr->err_log);
9732 
9733 #ifdef CONFIG_MODULES
9734 	INIT_LIST_HEAD(&tr->mod_events);
9735 #endif
9736 
9737 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9738 		goto out_free_tr;
9739 
9740 	/* The ring buffer is defaultly expanded */
9741 	trace_set_ring_buffer_expanded(tr);
9742 
9743 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9744 		goto out_free_tr;
9745 
9746 	ftrace_init_trace_array(tr);
9747 
9748 	init_trace_flags_index(tr);
9749 
9750 	if (trace_instance_dir) {
9751 		ret = trace_array_create_dir(tr);
9752 		if (ret)
9753 			goto out_free_tr;
9754 	} else
9755 		__trace_early_add_events(tr);
9756 
9757 	list_add(&tr->list, &ftrace_trace_arrays);
9758 
9759 	tr->ref++;
9760 
9761 	return tr;
9762 
9763  out_free_tr:
9764 	ftrace_free_ftrace_ops(tr);
9765 	free_trace_buffers(tr);
9766 	free_cpumask_var(tr->pipe_cpumask);
9767 	free_cpumask_var(tr->tracing_cpumask);
9768 	kfree_const(tr->system_names);
9769 	kfree(tr->range_name);
9770 	kfree(tr->name);
9771 	kfree(tr);
9772 
9773 	return ERR_PTR(ret);
9774 }
9775 
9776 static struct trace_array *trace_array_create(const char *name)
9777 {
9778 	return trace_array_create_systems(name, NULL, 0, 0);
9779 }
9780 
9781 static int instance_mkdir(const char *name)
9782 {
9783 	struct trace_array *tr;
9784 	int ret;
9785 
9786 	guard(mutex)(&event_mutex);
9787 	guard(mutex)(&trace_types_lock);
9788 
9789 	ret = -EEXIST;
9790 	if (trace_array_find(name))
9791 		return -EEXIST;
9792 
9793 	tr = trace_array_create(name);
9794 
9795 	ret = PTR_ERR_OR_ZERO(tr);
9796 
9797 	return ret;
9798 }
9799 
9800 static u64 map_pages(unsigned long start, unsigned long size)
9801 {
9802 	unsigned long vmap_start, vmap_end;
9803 	struct vm_struct *area;
9804 	int ret;
9805 
9806 	area = get_vm_area(size, VM_IOREMAP);
9807 	if (!area)
9808 		return 0;
9809 
9810 	vmap_start = (unsigned long) area->addr;
9811 	vmap_end = vmap_start + size;
9812 
9813 	ret = vmap_page_range(vmap_start, vmap_end,
9814 			      start, pgprot_nx(PAGE_KERNEL));
9815 	if (ret < 0) {
9816 		free_vm_area(area);
9817 		return 0;
9818 	}
9819 
9820 	return (u64)vmap_start;
9821 }
9822 
9823 /**
9824  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9825  * @name: The name of the trace array to be looked up/created.
9826  * @systems: A list of systems to create event directories for (NULL for all)
9827  *
9828  * Returns pointer to trace array with given name.
9829  * NULL, if it cannot be created.
9830  *
9831  * NOTE: This function increments the reference counter associated with the
9832  * trace array returned. This makes sure it cannot be freed while in use.
9833  * Use trace_array_put() once the trace array is no longer needed.
9834  * If the trace_array is to be freed, trace_array_destroy() needs to
9835  * be called after the trace_array_put(), or simply let user space delete
9836  * it from the tracefs instances directory. But until the
9837  * trace_array_put() is called, user space can not delete it.
9838  *
9839  */
9840 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9841 {
9842 	struct trace_array *tr;
9843 
9844 	guard(mutex)(&event_mutex);
9845 	guard(mutex)(&trace_types_lock);
9846 
9847 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9848 		if (tr->name && strcmp(tr->name, name) == 0) {
9849 			tr->ref++;
9850 			return tr;
9851 		}
9852 	}
9853 
9854 	tr = trace_array_create_systems(name, systems, 0, 0);
9855 
9856 	if (IS_ERR(tr))
9857 		tr = NULL;
9858 	else
9859 		tr->ref++;
9860 
9861 	return tr;
9862 }
9863 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9864 
9865 static int __remove_instance(struct trace_array *tr)
9866 {
9867 	int i;
9868 
9869 	/* Reference counter for a newly created trace array = 1. */
9870 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9871 		return -EBUSY;
9872 
9873 	list_del(&tr->list);
9874 
9875 	/* Disable all the flags that were enabled coming in */
9876 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9877 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9878 			set_tracer_flag(tr, 1 << i, 0);
9879 	}
9880 
9881 	if (printk_trace == tr)
9882 		update_printk_trace(&global_trace);
9883 
9884 	tracing_set_nop(tr);
9885 	clear_ftrace_function_probes(tr);
9886 	event_trace_del_tracer(tr);
9887 	ftrace_clear_pids(tr);
9888 	ftrace_destroy_function_files(tr);
9889 	tracefs_remove(tr->dir);
9890 	free_percpu(tr->last_func_repeats);
9891 	free_trace_buffers(tr);
9892 	clear_tracing_err_log(tr);
9893 
9894 	if (tr->range_name) {
9895 		reserve_mem_release_by_name(tr->range_name);
9896 		kfree(tr->range_name);
9897 	}
9898 
9899 	for (i = 0; i < tr->nr_topts; i++) {
9900 		kfree(tr->topts[i].topts);
9901 	}
9902 	kfree(tr->topts);
9903 
9904 	free_cpumask_var(tr->pipe_cpumask);
9905 	free_cpumask_var(tr->tracing_cpumask);
9906 	kfree_const(tr->system_names);
9907 	kfree(tr->name);
9908 	kfree(tr);
9909 
9910 	return 0;
9911 }
9912 
9913 int trace_array_destroy(struct trace_array *this_tr)
9914 {
9915 	struct trace_array *tr;
9916 
9917 	if (!this_tr)
9918 		return -EINVAL;
9919 
9920 	guard(mutex)(&event_mutex);
9921 	guard(mutex)(&trace_types_lock);
9922 
9923 
9924 	/* Making sure trace array exists before destroying it. */
9925 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9926 		if (tr == this_tr)
9927 			return __remove_instance(tr);
9928 	}
9929 
9930 	return -ENODEV;
9931 }
9932 EXPORT_SYMBOL_GPL(trace_array_destroy);
9933 
9934 static int instance_rmdir(const char *name)
9935 {
9936 	struct trace_array *tr;
9937 
9938 	guard(mutex)(&event_mutex);
9939 	guard(mutex)(&trace_types_lock);
9940 
9941 	tr = trace_array_find(name);
9942 	if (!tr)
9943 		return -ENODEV;
9944 
9945 	return __remove_instance(tr);
9946 }
9947 
9948 static __init void create_trace_instances(struct dentry *d_tracer)
9949 {
9950 	struct trace_array *tr;
9951 
9952 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9953 							 instance_mkdir,
9954 							 instance_rmdir);
9955 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9956 		return;
9957 
9958 	guard(mutex)(&event_mutex);
9959 	guard(mutex)(&trace_types_lock);
9960 
9961 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9962 		if (!tr->name)
9963 			continue;
9964 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9965 			     "Failed to create instance directory\n"))
9966 			return;
9967 	}
9968 }
9969 
9970 static void
9971 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9972 {
9973 	int cpu;
9974 
9975 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9976 			tr, &show_traces_fops);
9977 
9978 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9979 			tr, &set_tracer_fops);
9980 
9981 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9982 			  tr, &tracing_cpumask_fops);
9983 
9984 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9985 			  tr, &tracing_iter_fops);
9986 
9987 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9988 			  tr, &tracing_fops);
9989 
9990 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9991 			  tr, &tracing_pipe_fops);
9992 
9993 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9994 			  tr, &tracing_entries_fops);
9995 
9996 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9997 			  tr, &tracing_total_entries_fops);
9998 
9999 	trace_create_file("free_buffer", 0200, d_tracer,
10000 			  tr, &tracing_free_buffer_fops);
10001 
10002 	trace_create_file("trace_marker", 0220, d_tracer,
10003 			  tr, &tracing_mark_fops);
10004 
10005 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10006 
10007 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10008 			  tr, &tracing_mark_raw_fops);
10009 
10010 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10011 			  &trace_clock_fops);
10012 
10013 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10014 			  tr, &rb_simple_fops);
10015 
10016 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10017 			  &trace_time_stamp_mode_fops);
10018 
10019 	tr->buffer_percent = 50;
10020 
10021 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10022 			tr, &buffer_percent_fops);
10023 
10024 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10025 			  tr, &buffer_subbuf_size_fops);
10026 
10027 	create_trace_options_dir(tr);
10028 
10029 #ifdef CONFIG_TRACER_MAX_TRACE
10030 	trace_create_maxlat_file(tr, d_tracer);
10031 #endif
10032 
10033 	if (ftrace_create_function_files(tr, d_tracer))
10034 		MEM_FAIL(1, "Could not allocate function filter files");
10035 
10036 	if (tr->range_addr_start) {
10037 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10038 				  tr, &last_boot_fops);
10039 #ifdef CONFIG_TRACER_SNAPSHOT
10040 	} else {
10041 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10042 				  tr, &snapshot_fops);
10043 #endif
10044 	}
10045 
10046 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10047 			  tr, &tracing_err_log_fops);
10048 
10049 	for_each_tracing_cpu(cpu)
10050 		tracing_init_tracefs_percpu(tr, cpu);
10051 
10052 	ftrace_init_tracefs(tr, d_tracer);
10053 }
10054 
10055 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10056 {
10057 	struct vfsmount *mnt;
10058 	struct file_system_type *type;
10059 
10060 	/*
10061 	 * To maintain backward compatibility for tools that mount
10062 	 * debugfs to get to the tracing facility, tracefs is automatically
10063 	 * mounted to the debugfs/tracing directory.
10064 	 */
10065 	type = get_fs_type("tracefs");
10066 	if (!type)
10067 		return NULL;
10068 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10069 	put_filesystem(type);
10070 	if (IS_ERR(mnt))
10071 		return NULL;
10072 	mntget(mnt);
10073 
10074 	return mnt;
10075 }
10076 
10077 /**
10078  * tracing_init_dentry - initialize top level trace array
10079  *
10080  * This is called when creating files or directories in the tracing
10081  * directory. It is called via fs_initcall() by any of the boot up code
10082  * and expects to return the dentry of the top level tracing directory.
10083  */
10084 int tracing_init_dentry(void)
10085 {
10086 	struct trace_array *tr = &global_trace;
10087 
10088 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10089 		pr_warn("Tracing disabled due to lockdown\n");
10090 		return -EPERM;
10091 	}
10092 
10093 	/* The top level trace array uses  NULL as parent */
10094 	if (tr->dir)
10095 		return 0;
10096 
10097 	if (WARN_ON(!tracefs_initialized()))
10098 		return -ENODEV;
10099 
10100 	/*
10101 	 * As there may still be users that expect the tracing
10102 	 * files to exist in debugfs/tracing, we must automount
10103 	 * the tracefs file system there, so older tools still
10104 	 * work with the newer kernel.
10105 	 */
10106 	tr->dir = debugfs_create_automount("tracing", NULL,
10107 					   trace_automount, NULL);
10108 
10109 	return 0;
10110 }
10111 
10112 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10113 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10114 
10115 static struct workqueue_struct *eval_map_wq __initdata;
10116 static struct work_struct eval_map_work __initdata;
10117 static struct work_struct tracerfs_init_work __initdata;
10118 
10119 static void __init eval_map_work_func(struct work_struct *work)
10120 {
10121 	int len;
10122 
10123 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10124 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10125 }
10126 
10127 static int __init trace_eval_init(void)
10128 {
10129 	INIT_WORK(&eval_map_work, eval_map_work_func);
10130 
10131 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10132 	if (!eval_map_wq) {
10133 		pr_err("Unable to allocate eval_map_wq\n");
10134 		/* Do work here */
10135 		eval_map_work_func(&eval_map_work);
10136 		return -ENOMEM;
10137 	}
10138 
10139 	queue_work(eval_map_wq, &eval_map_work);
10140 	return 0;
10141 }
10142 
10143 subsys_initcall(trace_eval_init);
10144 
10145 static int __init trace_eval_sync(void)
10146 {
10147 	/* Make sure the eval map updates are finished */
10148 	if (eval_map_wq)
10149 		destroy_workqueue(eval_map_wq);
10150 	return 0;
10151 }
10152 
10153 late_initcall_sync(trace_eval_sync);
10154 
10155 
10156 #ifdef CONFIG_MODULES
10157 
10158 bool module_exists(const char *module)
10159 {
10160 	/* All modules have the symbol __this_module */
10161 	static const char this_mod[] = "__this_module";
10162 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10163 	unsigned long val;
10164 	int n;
10165 
10166 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10167 
10168 	if (n > sizeof(modname) - 1)
10169 		return false;
10170 
10171 	val = module_kallsyms_lookup_name(modname);
10172 	return val != 0;
10173 }
10174 
10175 static void trace_module_add_evals(struct module *mod)
10176 {
10177 	if (!mod->num_trace_evals)
10178 		return;
10179 
10180 	/*
10181 	 * Modules with bad taint do not have events created, do
10182 	 * not bother with enums either.
10183 	 */
10184 	if (trace_module_has_bad_taint(mod))
10185 		return;
10186 
10187 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10188 }
10189 
10190 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10191 static void trace_module_remove_evals(struct module *mod)
10192 {
10193 	union trace_eval_map_item *map;
10194 	union trace_eval_map_item **last = &trace_eval_maps;
10195 
10196 	if (!mod->num_trace_evals)
10197 		return;
10198 
10199 	guard(mutex)(&trace_eval_mutex);
10200 
10201 	map = trace_eval_maps;
10202 
10203 	while (map) {
10204 		if (map->head.mod == mod)
10205 			break;
10206 		map = trace_eval_jmp_to_tail(map);
10207 		last = &map->tail.next;
10208 		map = map->tail.next;
10209 	}
10210 	if (!map)
10211 		return;
10212 
10213 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10214 	kfree(map);
10215 }
10216 #else
10217 static inline void trace_module_remove_evals(struct module *mod) { }
10218 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10219 
10220 static void trace_module_record(struct module *mod, bool add)
10221 {
10222 	struct trace_array *tr;
10223 	unsigned long flags;
10224 
10225 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10226 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10227 		/* Update any persistent trace array that has already been started */
10228 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10229 			guard(mutex)(&scratch_mutex);
10230 			save_mod(mod, tr);
10231 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10232 			/* Update delta if the module loaded in previous boot */
10233 			make_mod_delta(mod, tr);
10234 		}
10235 	}
10236 }
10237 
10238 static int trace_module_notify(struct notifier_block *self,
10239 			       unsigned long val, void *data)
10240 {
10241 	struct module *mod = data;
10242 
10243 	switch (val) {
10244 	case MODULE_STATE_COMING:
10245 		trace_module_add_evals(mod);
10246 		trace_module_record(mod, true);
10247 		break;
10248 	case MODULE_STATE_GOING:
10249 		trace_module_remove_evals(mod);
10250 		trace_module_record(mod, false);
10251 		break;
10252 	}
10253 
10254 	return NOTIFY_OK;
10255 }
10256 
10257 static struct notifier_block trace_module_nb = {
10258 	.notifier_call = trace_module_notify,
10259 	.priority = 0,
10260 };
10261 #endif /* CONFIG_MODULES */
10262 
10263 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10264 {
10265 
10266 	event_trace_init();
10267 
10268 	init_tracer_tracefs(&global_trace, NULL);
10269 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10270 
10271 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10272 			&global_trace, &tracing_thresh_fops);
10273 
10274 	trace_create_file("README", TRACE_MODE_READ, NULL,
10275 			NULL, &tracing_readme_fops);
10276 
10277 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10278 			NULL, &tracing_saved_cmdlines_fops);
10279 
10280 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10281 			  NULL, &tracing_saved_cmdlines_size_fops);
10282 
10283 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10284 			NULL, &tracing_saved_tgids_fops);
10285 
10286 	trace_create_eval_file(NULL);
10287 
10288 #ifdef CONFIG_MODULES
10289 	register_module_notifier(&trace_module_nb);
10290 #endif
10291 
10292 #ifdef CONFIG_DYNAMIC_FTRACE
10293 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10294 			NULL, &tracing_dyn_info_fops);
10295 #endif
10296 
10297 	create_trace_instances(NULL);
10298 
10299 	update_tracer_options(&global_trace);
10300 }
10301 
10302 static __init int tracer_init_tracefs(void)
10303 {
10304 	int ret;
10305 
10306 	trace_access_lock_init();
10307 
10308 	ret = tracing_init_dentry();
10309 	if (ret)
10310 		return 0;
10311 
10312 	if (eval_map_wq) {
10313 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10314 		queue_work(eval_map_wq, &tracerfs_init_work);
10315 	} else {
10316 		tracer_init_tracefs_work_func(NULL);
10317 	}
10318 
10319 	rv_init_interface();
10320 
10321 	return 0;
10322 }
10323 
10324 fs_initcall(tracer_init_tracefs);
10325 
10326 static int trace_die_panic_handler(struct notifier_block *self,
10327 				unsigned long ev, void *unused);
10328 
10329 static struct notifier_block trace_panic_notifier = {
10330 	.notifier_call = trace_die_panic_handler,
10331 	.priority = INT_MAX - 1,
10332 };
10333 
10334 static struct notifier_block trace_die_notifier = {
10335 	.notifier_call = trace_die_panic_handler,
10336 	.priority = INT_MAX - 1,
10337 };
10338 
10339 /*
10340  * The idea is to execute the following die/panic callback early, in order
10341  * to avoid showing irrelevant information in the trace (like other panic
10342  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10343  * warnings get disabled (to prevent potential log flooding).
10344  */
10345 static int trace_die_panic_handler(struct notifier_block *self,
10346 				unsigned long ev, void *unused)
10347 {
10348 	if (!ftrace_dump_on_oops_enabled())
10349 		return NOTIFY_DONE;
10350 
10351 	/* The die notifier requires DIE_OOPS to trigger */
10352 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10353 		return NOTIFY_DONE;
10354 
10355 	ftrace_dump(DUMP_PARAM);
10356 
10357 	return NOTIFY_DONE;
10358 }
10359 
10360 /*
10361  * printk is set to max of 1024, we really don't need it that big.
10362  * Nothing should be printing 1000 characters anyway.
10363  */
10364 #define TRACE_MAX_PRINT		1000
10365 
10366 /*
10367  * Define here KERN_TRACE so that we have one place to modify
10368  * it if we decide to change what log level the ftrace dump
10369  * should be at.
10370  */
10371 #define KERN_TRACE		KERN_EMERG
10372 
10373 void
10374 trace_printk_seq(struct trace_seq *s)
10375 {
10376 	/* Probably should print a warning here. */
10377 	if (s->seq.len >= TRACE_MAX_PRINT)
10378 		s->seq.len = TRACE_MAX_PRINT;
10379 
10380 	/*
10381 	 * More paranoid code. Although the buffer size is set to
10382 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10383 	 * an extra layer of protection.
10384 	 */
10385 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10386 		s->seq.len = s->seq.size - 1;
10387 
10388 	/* should be zero ended, but we are paranoid. */
10389 	s->buffer[s->seq.len] = 0;
10390 
10391 	printk(KERN_TRACE "%s", s->buffer);
10392 
10393 	trace_seq_init(s);
10394 }
10395 
10396 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10397 {
10398 	iter->tr = tr;
10399 	iter->trace = iter->tr->current_trace;
10400 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10401 	iter->array_buffer = &tr->array_buffer;
10402 
10403 	if (iter->trace && iter->trace->open)
10404 		iter->trace->open(iter);
10405 
10406 	/* Annotate start of buffers if we had overruns */
10407 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10408 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10409 
10410 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10411 	if (trace_clocks[iter->tr->clock_id].in_ns)
10412 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10413 
10414 	/* Can not use kmalloc for iter.temp and iter.fmt */
10415 	iter->temp = static_temp_buf;
10416 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10417 	iter->fmt = static_fmt_buf;
10418 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10419 }
10420 
10421 void trace_init_global_iter(struct trace_iterator *iter)
10422 {
10423 	trace_init_iter(iter, &global_trace);
10424 }
10425 
10426 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10427 {
10428 	/* use static because iter can be a bit big for the stack */
10429 	static struct trace_iterator iter;
10430 	unsigned int old_userobj;
10431 	unsigned long flags;
10432 	int cnt = 0, cpu;
10433 
10434 	/*
10435 	 * Always turn off tracing when we dump.
10436 	 * We don't need to show trace output of what happens
10437 	 * between multiple crashes.
10438 	 *
10439 	 * If the user does a sysrq-z, then they can re-enable
10440 	 * tracing with echo 1 > tracing_on.
10441 	 */
10442 	tracer_tracing_off(tr);
10443 
10444 	local_irq_save(flags);
10445 
10446 	/* Simulate the iterator */
10447 	trace_init_iter(&iter, tr);
10448 
10449 	for_each_tracing_cpu(cpu) {
10450 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10451 	}
10452 
10453 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10454 
10455 	/* don't look at user memory in panic mode */
10456 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10457 
10458 	if (dump_mode == DUMP_ORIG)
10459 		iter.cpu_file = raw_smp_processor_id();
10460 	else
10461 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10462 
10463 	if (tr == &global_trace)
10464 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10465 	else
10466 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10467 
10468 	/* Did function tracer already get disabled? */
10469 	if (ftrace_is_dead()) {
10470 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10471 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10472 	}
10473 
10474 	/*
10475 	 * We need to stop all tracing on all CPUS to read
10476 	 * the next buffer. This is a bit expensive, but is
10477 	 * not done often. We fill all what we can read,
10478 	 * and then release the locks again.
10479 	 */
10480 
10481 	while (!trace_empty(&iter)) {
10482 
10483 		if (!cnt)
10484 			printk(KERN_TRACE "---------------------------------\n");
10485 
10486 		cnt++;
10487 
10488 		trace_iterator_reset(&iter);
10489 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10490 
10491 		if (trace_find_next_entry_inc(&iter) != NULL) {
10492 			int ret;
10493 
10494 			ret = print_trace_line(&iter);
10495 			if (ret != TRACE_TYPE_NO_CONSUME)
10496 				trace_consume(&iter);
10497 		}
10498 		touch_nmi_watchdog();
10499 
10500 		trace_printk_seq(&iter.seq);
10501 	}
10502 
10503 	if (!cnt)
10504 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10505 	else
10506 		printk(KERN_TRACE "---------------------------------\n");
10507 
10508 	tr->trace_flags |= old_userobj;
10509 
10510 	for_each_tracing_cpu(cpu) {
10511 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10512 	}
10513 	local_irq_restore(flags);
10514 }
10515 
10516 static void ftrace_dump_by_param(void)
10517 {
10518 	bool first_param = true;
10519 	char dump_param[MAX_TRACER_SIZE];
10520 	char *buf, *token, *inst_name;
10521 	struct trace_array *tr;
10522 
10523 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10524 	buf = dump_param;
10525 
10526 	while ((token = strsep(&buf, ",")) != NULL) {
10527 		if (first_param) {
10528 			first_param = false;
10529 			if (!strcmp("0", token))
10530 				continue;
10531 			else if (!strcmp("1", token)) {
10532 				ftrace_dump_one(&global_trace, DUMP_ALL);
10533 				continue;
10534 			}
10535 			else if (!strcmp("2", token) ||
10536 			  !strcmp("orig_cpu", token)) {
10537 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10538 				continue;
10539 			}
10540 		}
10541 
10542 		inst_name = strsep(&token, "=");
10543 		tr = trace_array_find(inst_name);
10544 		if (!tr) {
10545 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10546 			continue;
10547 		}
10548 
10549 		if (token && (!strcmp("2", token) ||
10550 			  !strcmp("orig_cpu", token)))
10551 			ftrace_dump_one(tr, DUMP_ORIG);
10552 		else
10553 			ftrace_dump_one(tr, DUMP_ALL);
10554 	}
10555 }
10556 
10557 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10558 {
10559 	static atomic_t dump_running;
10560 
10561 	/* Only allow one dump user at a time. */
10562 	if (atomic_inc_return(&dump_running) != 1) {
10563 		atomic_dec(&dump_running);
10564 		return;
10565 	}
10566 
10567 	switch (oops_dump_mode) {
10568 	case DUMP_ALL:
10569 		ftrace_dump_one(&global_trace, DUMP_ALL);
10570 		break;
10571 	case DUMP_ORIG:
10572 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10573 		break;
10574 	case DUMP_PARAM:
10575 		ftrace_dump_by_param();
10576 		break;
10577 	case DUMP_NONE:
10578 		break;
10579 	default:
10580 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10581 		ftrace_dump_one(&global_trace, DUMP_ALL);
10582 	}
10583 
10584 	atomic_dec(&dump_running);
10585 }
10586 EXPORT_SYMBOL_GPL(ftrace_dump);
10587 
10588 #define WRITE_BUFSIZE  4096
10589 
10590 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10591 				size_t count, loff_t *ppos,
10592 				int (*createfn)(const char *))
10593 {
10594 	char *kbuf, *buf, *tmp;
10595 	int ret = 0;
10596 	size_t done = 0;
10597 	size_t size;
10598 
10599 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10600 	if (!kbuf)
10601 		return -ENOMEM;
10602 
10603 	while (done < count) {
10604 		size = count - done;
10605 
10606 		if (size >= WRITE_BUFSIZE)
10607 			size = WRITE_BUFSIZE - 1;
10608 
10609 		if (copy_from_user(kbuf, buffer + done, size)) {
10610 			ret = -EFAULT;
10611 			goto out;
10612 		}
10613 		kbuf[size] = '\0';
10614 		buf = kbuf;
10615 		do {
10616 			tmp = strchr(buf, '\n');
10617 			if (tmp) {
10618 				*tmp = '\0';
10619 				size = tmp - buf + 1;
10620 			} else {
10621 				size = strlen(buf);
10622 				if (done + size < count) {
10623 					if (buf != kbuf)
10624 						break;
10625 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10626 					pr_warn("Line length is too long: Should be less than %d\n",
10627 						WRITE_BUFSIZE - 2);
10628 					ret = -EINVAL;
10629 					goto out;
10630 				}
10631 			}
10632 			done += size;
10633 
10634 			/* Remove comments */
10635 			tmp = strchr(buf, '#');
10636 
10637 			if (tmp)
10638 				*tmp = '\0';
10639 
10640 			ret = createfn(buf);
10641 			if (ret)
10642 				goto out;
10643 			buf += size;
10644 
10645 		} while (done < count);
10646 	}
10647 	ret = done;
10648 
10649 out:
10650 	kfree(kbuf);
10651 
10652 	return ret;
10653 }
10654 
10655 #ifdef CONFIG_TRACER_MAX_TRACE
10656 __init static bool tr_needs_alloc_snapshot(const char *name)
10657 {
10658 	char *test;
10659 	int len = strlen(name);
10660 	bool ret;
10661 
10662 	if (!boot_snapshot_index)
10663 		return false;
10664 
10665 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10666 	    boot_snapshot_info[len] == '\t')
10667 		return true;
10668 
10669 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10670 	if (!test)
10671 		return false;
10672 
10673 	sprintf(test, "\t%s\t", name);
10674 	ret = strstr(boot_snapshot_info, test) == NULL;
10675 	kfree(test);
10676 	return ret;
10677 }
10678 
10679 __init static void do_allocate_snapshot(const char *name)
10680 {
10681 	if (!tr_needs_alloc_snapshot(name))
10682 		return;
10683 
10684 	/*
10685 	 * When allocate_snapshot is set, the next call to
10686 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10687 	 * will allocate the snapshot buffer. That will alse clear
10688 	 * this flag.
10689 	 */
10690 	allocate_snapshot = true;
10691 }
10692 #else
10693 static inline void do_allocate_snapshot(const char *name) { }
10694 #endif
10695 
10696 __init static void enable_instances(void)
10697 {
10698 	struct trace_array *tr;
10699 	bool memmap_area = false;
10700 	char *curr_str;
10701 	char *name;
10702 	char *str;
10703 	char *tok;
10704 
10705 	/* A tab is always appended */
10706 	boot_instance_info[boot_instance_index - 1] = '\0';
10707 	str = boot_instance_info;
10708 
10709 	while ((curr_str = strsep(&str, "\t"))) {
10710 		phys_addr_t start = 0;
10711 		phys_addr_t size = 0;
10712 		unsigned long addr = 0;
10713 		bool traceprintk = false;
10714 		bool traceoff = false;
10715 		char *flag_delim;
10716 		char *addr_delim;
10717 		char *rname __free(kfree) = NULL;
10718 
10719 		tok = strsep(&curr_str, ",");
10720 
10721 		flag_delim = strchr(tok, '^');
10722 		addr_delim = strchr(tok, '@');
10723 
10724 		if (addr_delim)
10725 			*addr_delim++ = '\0';
10726 
10727 		if (flag_delim)
10728 			*flag_delim++ = '\0';
10729 
10730 		name = tok;
10731 
10732 		if (flag_delim) {
10733 			char *flag;
10734 
10735 			while ((flag = strsep(&flag_delim, "^"))) {
10736 				if (strcmp(flag, "traceoff") == 0) {
10737 					traceoff = true;
10738 				} else if ((strcmp(flag, "printk") == 0) ||
10739 					   (strcmp(flag, "traceprintk") == 0) ||
10740 					   (strcmp(flag, "trace_printk") == 0)) {
10741 					traceprintk = true;
10742 				} else {
10743 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10744 						flag, name);
10745 				}
10746 			}
10747 		}
10748 
10749 		tok = addr_delim;
10750 		if (tok && isdigit(*tok)) {
10751 			start = memparse(tok, &tok);
10752 			if (!start) {
10753 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10754 					name);
10755 				continue;
10756 			}
10757 			if (*tok != ':') {
10758 				pr_warn("Tracing: No size specified for instance %s\n", name);
10759 				continue;
10760 			}
10761 			tok++;
10762 			size = memparse(tok, &tok);
10763 			if (!size) {
10764 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10765 					name);
10766 				continue;
10767 			}
10768 			memmap_area = true;
10769 		} else if (tok) {
10770 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10771 				start = 0;
10772 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10773 				continue;
10774 			}
10775 			rname = kstrdup(tok, GFP_KERNEL);
10776 		}
10777 
10778 		if (start) {
10779 			/* Start and size must be page aligned */
10780 			if (start & ~PAGE_MASK) {
10781 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10782 				continue;
10783 			}
10784 			if (size & ~PAGE_MASK) {
10785 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10786 				continue;
10787 			}
10788 
10789 			if (memmap_area)
10790 				addr = map_pages(start, size);
10791 			else
10792 				addr = (unsigned long)phys_to_virt(start);
10793 			if (addr) {
10794 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10795 					name, &start, (unsigned long)size);
10796 			} else {
10797 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10798 				continue;
10799 			}
10800 		} else {
10801 			/* Only non mapped buffers have snapshot buffers */
10802 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10803 				do_allocate_snapshot(name);
10804 		}
10805 
10806 		tr = trace_array_create_systems(name, NULL, addr, size);
10807 		if (IS_ERR(tr)) {
10808 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10809 			continue;
10810 		}
10811 
10812 		if (traceoff)
10813 			tracer_tracing_off(tr);
10814 
10815 		if (traceprintk)
10816 			update_printk_trace(tr);
10817 
10818 		/*
10819 		 * memmap'd buffers can not be freed.
10820 		 */
10821 		if (memmap_area) {
10822 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10823 			tr->ref++;
10824 		}
10825 
10826 		if (start) {
10827 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10828 			tr->range_name = no_free_ptr(rname);
10829 		}
10830 
10831 		while ((tok = strsep(&curr_str, ","))) {
10832 			early_enable_events(tr, tok, true);
10833 		}
10834 	}
10835 }
10836 
10837 __init static int tracer_alloc_buffers(void)
10838 {
10839 	int ring_buf_size;
10840 	int ret = -ENOMEM;
10841 
10842 
10843 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10844 		pr_warn("Tracing disabled due to lockdown\n");
10845 		return -EPERM;
10846 	}
10847 
10848 	/*
10849 	 * Make sure we don't accidentally add more trace options
10850 	 * than we have bits for.
10851 	 */
10852 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10853 
10854 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10855 		goto out;
10856 
10857 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10858 		goto out_free_buffer_mask;
10859 
10860 	/* Only allocate trace_printk buffers if a trace_printk exists */
10861 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10862 		/* Must be called before global_trace.buffer is allocated */
10863 		trace_printk_init_buffers();
10864 
10865 	/* To save memory, keep the ring buffer size to its minimum */
10866 	if (global_trace.ring_buffer_expanded)
10867 		ring_buf_size = trace_buf_size;
10868 	else
10869 		ring_buf_size = 1;
10870 
10871 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10872 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10873 
10874 	raw_spin_lock_init(&global_trace.start_lock);
10875 
10876 	/*
10877 	 * The prepare callbacks allocates some memory for the ring buffer. We
10878 	 * don't free the buffer if the CPU goes down. If we were to free
10879 	 * the buffer, then the user would lose any trace that was in the
10880 	 * buffer. The memory will be removed once the "instance" is removed.
10881 	 */
10882 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10883 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10884 				      NULL);
10885 	if (ret < 0)
10886 		goto out_free_cpumask;
10887 	/* Used for event triggers */
10888 	ret = -ENOMEM;
10889 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10890 	if (!temp_buffer)
10891 		goto out_rm_hp_state;
10892 
10893 	if (trace_create_savedcmd() < 0)
10894 		goto out_free_temp_buffer;
10895 
10896 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10897 		goto out_free_savedcmd;
10898 
10899 	/* TODO: make the number of buffers hot pluggable with CPUS */
10900 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10901 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10902 		goto out_free_pipe_cpumask;
10903 	}
10904 	if (global_trace.buffer_disabled)
10905 		tracing_off();
10906 
10907 	if (trace_boot_clock) {
10908 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10909 		if (ret < 0)
10910 			pr_warn("Trace clock %s not defined, going back to default\n",
10911 				trace_boot_clock);
10912 	}
10913 
10914 	/*
10915 	 * register_tracer() might reference current_trace, so it
10916 	 * needs to be set before we register anything. This is
10917 	 * just a bootstrap of current_trace anyway.
10918 	 */
10919 	global_trace.current_trace = &nop_trace;
10920 
10921 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10922 #ifdef CONFIG_TRACER_MAX_TRACE
10923 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10924 #endif
10925 	ftrace_init_global_array_ops(&global_trace);
10926 
10927 #ifdef CONFIG_MODULES
10928 	INIT_LIST_HEAD(&global_trace.mod_events);
10929 #endif
10930 
10931 	init_trace_flags_index(&global_trace);
10932 
10933 	register_tracer(&nop_trace);
10934 
10935 	/* Function tracing may start here (via kernel command line) */
10936 	init_function_trace();
10937 
10938 	/* All seems OK, enable tracing */
10939 	tracing_disabled = 0;
10940 
10941 	atomic_notifier_chain_register(&panic_notifier_list,
10942 				       &trace_panic_notifier);
10943 
10944 	register_die_notifier(&trace_die_notifier);
10945 
10946 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10947 
10948 	INIT_LIST_HEAD(&global_trace.systems);
10949 	INIT_LIST_HEAD(&global_trace.events);
10950 	INIT_LIST_HEAD(&global_trace.hist_vars);
10951 	INIT_LIST_HEAD(&global_trace.err_log);
10952 	list_add(&global_trace.list, &ftrace_trace_arrays);
10953 
10954 	apply_trace_boot_options();
10955 
10956 	register_snapshot_cmd();
10957 
10958 	return 0;
10959 
10960 out_free_pipe_cpumask:
10961 	free_cpumask_var(global_trace.pipe_cpumask);
10962 out_free_savedcmd:
10963 	trace_free_saved_cmdlines_buffer();
10964 out_free_temp_buffer:
10965 	ring_buffer_free(temp_buffer);
10966 out_rm_hp_state:
10967 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10968 out_free_cpumask:
10969 	free_cpumask_var(global_trace.tracing_cpumask);
10970 out_free_buffer_mask:
10971 	free_cpumask_var(tracing_buffer_mask);
10972 out:
10973 	return ret;
10974 }
10975 
10976 #ifdef CONFIG_FUNCTION_TRACER
10977 /* Used to set module cached ftrace filtering at boot up */
10978 __init struct trace_array *trace_get_global_array(void)
10979 {
10980 	return &global_trace;
10981 }
10982 #endif
10983 
10984 void __init ftrace_boot_snapshot(void)
10985 {
10986 #ifdef CONFIG_TRACER_MAX_TRACE
10987 	struct trace_array *tr;
10988 
10989 	if (!snapshot_at_boot)
10990 		return;
10991 
10992 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10993 		if (!tr->allocated_snapshot)
10994 			continue;
10995 
10996 		tracing_snapshot_instance(tr);
10997 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10998 	}
10999 #endif
11000 }
11001 
11002 void __init early_trace_init(void)
11003 {
11004 	if (tracepoint_printk) {
11005 		tracepoint_print_iter =
11006 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11007 		if (MEM_FAIL(!tracepoint_print_iter,
11008 			     "Failed to allocate trace iterator\n"))
11009 			tracepoint_printk = 0;
11010 		else
11011 			static_key_enable(&tracepoint_printk_key.key);
11012 	}
11013 	tracer_alloc_buffers();
11014 
11015 	init_events();
11016 }
11017 
11018 void __init trace_init(void)
11019 {
11020 	trace_event_init();
11021 
11022 	if (boot_instance_index)
11023 		enable_instances();
11024 }
11025 
11026 __init static void clear_boot_tracer(void)
11027 {
11028 	/*
11029 	 * The default tracer at boot buffer is an init section.
11030 	 * This function is called in lateinit. If we did not
11031 	 * find the boot tracer, then clear it out, to prevent
11032 	 * later registration from accessing the buffer that is
11033 	 * about to be freed.
11034 	 */
11035 	if (!default_bootup_tracer)
11036 		return;
11037 
11038 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11039 	       default_bootup_tracer);
11040 	default_bootup_tracer = NULL;
11041 }
11042 
11043 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11044 __init static void tracing_set_default_clock(void)
11045 {
11046 	/* sched_clock_stable() is determined in late_initcall */
11047 	if (!trace_boot_clock && !sched_clock_stable()) {
11048 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11049 			pr_warn("Can not set tracing clock due to lockdown\n");
11050 			return;
11051 		}
11052 
11053 		printk(KERN_WARNING
11054 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11055 		       "If you want to keep using the local clock, then add:\n"
11056 		       "  \"trace_clock=local\"\n"
11057 		       "on the kernel command line\n");
11058 		tracing_set_clock(&global_trace, "global");
11059 	}
11060 }
11061 #else
11062 static inline void tracing_set_default_clock(void) { }
11063 #endif
11064 
11065 __init static int late_trace_init(void)
11066 {
11067 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11068 		static_key_disable(&tracepoint_printk_key.key);
11069 		tracepoint_printk = 0;
11070 	}
11071 
11072 	tracing_set_default_clock();
11073 	clear_boot_tracer();
11074 	return 0;
11075 }
11076 
11077 late_initcall_sync(late_trace_init);
11078