xref: /linux-6.15/kernel/trace/trace.c (revision 25742aeb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
6  * Copyright (C) 2008 Ingo Molnar <[email protected]>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <[email protected]>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72 
73 void __init disable_tracing_selftest(const char *reason)
74 {
75 	if (!tracing_selftest_disabled) {
76 		tracing_selftest_disabled = true;
77 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
78 	}
79 }
80 #else
81 #define tracing_selftest_running	0
82 #define tracing_selftest_disabled	0
83 #endif
84 
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned int trace_ctx);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 static bool snapshot_at_boot;
187 
188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189 static int boot_instance_index;
190 
191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_snapshot_index;
193 
194 static int __init set_cmdline_ftrace(char *str)
195 {
196 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197 	default_bootup_tracer = bootup_tracer_buf;
198 	/* We are using ftrace early, expand it */
199 	trace_set_ring_buffer_expanded(NULL);
200 	return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203 
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
207 		ftrace_dump_on_oops = DUMP_ALL;
208 		return 1;
209 	}
210 
211 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212 		ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215 
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219 
220 static int __init stop_trace_on_warning(char *str)
221 {
222 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223 		__disable_trace_on_warning = 1;
224 	return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227 
228 static int __init boot_alloc_snapshot(char *str)
229 {
230 	char *slot = boot_snapshot_info + boot_snapshot_index;
231 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232 	int ret;
233 
234 	if (str[0] == '=') {
235 		str++;
236 		if (strlen(str) >= left)
237 			return -1;
238 
239 		ret = snprintf(slot, left, "%s\t", str);
240 		boot_snapshot_index += ret;
241 	} else {
242 		allocate_snapshot = true;
243 		/* We also need the main ring buffer expanded */
244 		trace_set_ring_buffer_expanded(NULL);
245 	}
246 	return 1;
247 }
248 __setup("alloc_snapshot", boot_alloc_snapshot);
249 
250 
251 static int __init boot_snapshot(char *str)
252 {
253 	snapshot_at_boot = true;
254 	boot_alloc_snapshot(str);
255 	return 1;
256 }
257 __setup("ftrace_boot_snapshot", boot_snapshot);
258 
259 
260 static int __init boot_instance(char *str)
261 {
262 	char *slot = boot_instance_info + boot_instance_index;
263 	int left = sizeof(boot_instance_info) - boot_instance_index;
264 	int ret;
265 
266 	if (strlen(str) >= left)
267 		return -1;
268 
269 	ret = snprintf(slot, left, "%s\t", str);
270 	boot_instance_index += ret;
271 
272 	return 1;
273 }
274 __setup("trace_instance=", boot_instance);
275 
276 
277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278 
279 static int __init set_trace_boot_options(char *str)
280 {
281 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
282 	return 1;
283 }
284 __setup("trace_options=", set_trace_boot_options);
285 
286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287 static char *trace_boot_clock __initdata;
288 
289 static int __init set_trace_boot_clock(char *str)
290 {
291 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
292 	trace_boot_clock = trace_boot_clock_buf;
293 	return 1;
294 }
295 __setup("trace_clock=", set_trace_boot_clock);
296 
297 static int __init set_tracepoint_printk(char *str)
298 {
299 	/* Ignore the "tp_printk_stop_on_boot" param */
300 	if (*str == '_')
301 		return 0;
302 
303 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304 		tracepoint_printk = 1;
305 	return 1;
306 }
307 __setup("tp_printk", set_tracepoint_printk);
308 
309 static int __init set_tracepoint_printk_stop(char *str)
310 {
311 	tracepoint_printk_stop_on_boot = true;
312 	return 1;
313 }
314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315 
316 unsigned long long ns2usecs(u64 nsec)
317 {
318 	nsec += 500;
319 	do_div(nsec, 1000);
320 	return nsec;
321 }
322 
323 static void
324 trace_process_export(struct trace_export *export,
325 	       struct ring_buffer_event *event, int flag)
326 {
327 	struct trace_entry *entry;
328 	unsigned int size = 0;
329 
330 	if (export->flags & flag) {
331 		entry = ring_buffer_event_data(event);
332 		size = ring_buffer_event_length(event);
333 		export->write(export, entry, size);
334 	}
335 }
336 
337 static DEFINE_MUTEX(ftrace_export_lock);
338 
339 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340 
341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344 
345 static inline void ftrace_exports_enable(struct trace_export *export)
346 {
347 	if (export->flags & TRACE_EXPORT_FUNCTION)
348 		static_branch_inc(&trace_function_exports_enabled);
349 
350 	if (export->flags & TRACE_EXPORT_EVENT)
351 		static_branch_inc(&trace_event_exports_enabled);
352 
353 	if (export->flags & TRACE_EXPORT_MARKER)
354 		static_branch_inc(&trace_marker_exports_enabled);
355 }
356 
357 static inline void ftrace_exports_disable(struct trace_export *export)
358 {
359 	if (export->flags & TRACE_EXPORT_FUNCTION)
360 		static_branch_dec(&trace_function_exports_enabled);
361 
362 	if (export->flags & TRACE_EXPORT_EVENT)
363 		static_branch_dec(&trace_event_exports_enabled);
364 
365 	if (export->flags & TRACE_EXPORT_MARKER)
366 		static_branch_dec(&trace_marker_exports_enabled);
367 }
368 
369 static void ftrace_exports(struct ring_buffer_event *event, int flag)
370 {
371 	struct trace_export *export;
372 
373 	preempt_disable_notrace();
374 
375 	export = rcu_dereference_raw_check(ftrace_exports_list);
376 	while (export) {
377 		trace_process_export(export, event, flag);
378 		export = rcu_dereference_raw_check(export->next);
379 	}
380 
381 	preempt_enable_notrace();
382 }
383 
384 static inline void
385 add_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387 	rcu_assign_pointer(export->next, *list);
388 	/*
389 	 * We are entering export into the list but another
390 	 * CPU might be walking that list. We need to make sure
391 	 * the export->next pointer is valid before another CPU sees
392 	 * the export pointer included into the list.
393 	 */
394 	rcu_assign_pointer(*list, export);
395 }
396 
397 static inline int
398 rm_trace_export(struct trace_export **list, struct trace_export *export)
399 {
400 	struct trace_export **p;
401 
402 	for (p = list; *p != NULL; p = &(*p)->next)
403 		if (*p == export)
404 			break;
405 
406 	if (*p != export)
407 		return -1;
408 
409 	rcu_assign_pointer(*p, (*p)->next);
410 
411 	return 0;
412 }
413 
414 static inline void
415 add_ftrace_export(struct trace_export **list, struct trace_export *export)
416 {
417 	ftrace_exports_enable(export);
418 
419 	add_trace_export(list, export);
420 }
421 
422 static inline int
423 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425 	int ret;
426 
427 	ret = rm_trace_export(list, export);
428 	ftrace_exports_disable(export);
429 
430 	return ret;
431 }
432 
433 int register_ftrace_export(struct trace_export *export)
434 {
435 	if (WARN_ON_ONCE(!export->write))
436 		return -1;
437 
438 	mutex_lock(&ftrace_export_lock);
439 
440 	add_ftrace_export(&ftrace_exports_list, export);
441 
442 	mutex_unlock(&ftrace_export_lock);
443 
444 	return 0;
445 }
446 EXPORT_SYMBOL_GPL(register_ftrace_export);
447 
448 int unregister_ftrace_export(struct trace_export *export)
449 {
450 	int ret;
451 
452 	mutex_lock(&ftrace_export_lock);
453 
454 	ret = rm_ftrace_export(&ftrace_exports_list, export);
455 
456 	mutex_unlock(&ftrace_export_lock);
457 
458 	return ret;
459 }
460 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461 
462 /* trace_flags holds trace_options default values */
463 #define TRACE_DEFAULT_FLAGS						\
464 	(FUNCTION_DEFAULT_FLAGS |					\
465 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
466 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
467 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
468 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
469 	 TRACE_ITER_HASH_PTR)
470 
471 /* trace_options that are only supported by global_trace */
472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
473 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474 
475 /* trace_flags that are default zero for instances */
476 #define ZEROED_TRACE_FLAGS \
477 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478 
479 /*
480  * The global_trace is the descriptor that holds the top-level tracing
481  * buffers for the live tracing.
482  */
483 static struct trace_array global_trace = {
484 	.trace_flags = TRACE_DEFAULT_FLAGS,
485 };
486 
487 void trace_set_ring_buffer_expanded(struct trace_array *tr)
488 {
489 	if (!tr)
490 		tr = &global_trace;
491 	tr->ring_buffer_expanded = true;
492 }
493 
494 LIST_HEAD(ftrace_trace_arrays);
495 
496 int trace_array_get(struct trace_array *this_tr)
497 {
498 	struct trace_array *tr;
499 	int ret = -ENODEV;
500 
501 	mutex_lock(&trace_types_lock);
502 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 		if (tr == this_tr) {
504 			tr->ref++;
505 			ret = 0;
506 			break;
507 		}
508 	}
509 	mutex_unlock(&trace_types_lock);
510 
511 	return ret;
512 }
513 
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516 	WARN_ON(!this_tr->ref);
517 	this_tr->ref--;
518 }
519 
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531 	if (!this_tr)
532 		return;
533 
534 	mutex_lock(&trace_types_lock);
535 	__trace_array_put(this_tr);
536 	mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539 
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542 	int ret;
543 
544 	ret = security_locked_down(LOCKDOWN_TRACEFS);
545 	if (ret)
546 		return ret;
547 
548 	if (tracing_disabled)
549 		return -ENODEV;
550 
551 	if (tr && trace_array_get(tr) < 0)
552 		return -ENODEV;
553 
554 	return 0;
555 }
556 
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 			      struct trace_buffer *buffer,
559 			      struct ring_buffer_event *event)
560 {
561 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 	    !filter_match_preds(call->filter, rec)) {
563 		__trace_event_discard_commit(buffer, event);
564 		return 1;
565 	}
566 
567 	return 0;
568 }
569 
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580 	return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582 
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 		       struct trace_pid_list *filtered_no_pids,
596 		       struct task_struct *task)
597 {
598 	/*
599 	 * If filtered_no_pids is not empty, and the task's pid is listed
600 	 * in filtered_no_pids, then return true.
601 	 * Otherwise, if filtered_pids is empty, that means we can
602 	 * trace all tasks. If it has content, then only trace pids
603 	 * within filtered_pids.
604 	 */
605 
606 	return (filtered_pids &&
607 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 		(filtered_no_pids &&
609 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611 
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 				  struct task_struct *self,
626 				  struct task_struct *task)
627 {
628 	if (!pid_list)
629 		return;
630 
631 	/* For forks, we only add if the forking task is listed */
632 	if (self) {
633 		if (!trace_find_filtered_pid(pid_list, self->pid))
634 			return;
635 	}
636 
637 	/* "self" is set for forks, and NULL for exits */
638 	if (self)
639 		trace_pid_list_set(pid_list, task->pid);
640 	else
641 		trace_pid_list_clear(pid_list, task->pid);
642 }
643 
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658 	long pid = (unsigned long)v;
659 	unsigned int next;
660 
661 	(*pos)++;
662 
663 	/* pid already is +1 of the actual previous bit */
664 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
665 		return NULL;
666 
667 	pid = next;
668 
669 	/* Return pid + 1 to allow zero to be represented */
670 	return (void *)(pid + 1);
671 }
672 
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686 	unsigned long pid;
687 	unsigned int first;
688 	loff_t l = 0;
689 
690 	if (trace_pid_list_first(pid_list, &first) < 0)
691 		return NULL;
692 
693 	pid = first;
694 
695 	/* Return pid + 1 so that zero can be the exit value */
696 	for (pid++; pid && l < *pos;
697 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698 		;
699 	return (void *)pid;
700 }
701 
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712 	unsigned long pid = (unsigned long)v - 1;
713 
714 	seq_printf(m, "%lu\n", pid);
715 	return 0;
716 }
717 
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE		127
720 
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722 		    struct trace_pid_list **new_pid_list,
723 		    const char __user *ubuf, size_t cnt)
724 {
725 	struct trace_pid_list *pid_list;
726 	struct trace_parser parser;
727 	unsigned long val;
728 	int nr_pids = 0;
729 	ssize_t read = 0;
730 	ssize_t ret;
731 	loff_t pos;
732 	pid_t pid;
733 
734 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735 		return -ENOMEM;
736 
737 	/*
738 	 * Always recreate a new array. The write is an all or nothing
739 	 * operation. Always create a new array when adding new pids by
740 	 * the user. If the operation fails, then the current list is
741 	 * not modified.
742 	 */
743 	pid_list = trace_pid_list_alloc();
744 	if (!pid_list) {
745 		trace_parser_put(&parser);
746 		return -ENOMEM;
747 	}
748 
749 	if (filtered_pids) {
750 		/* copy the current bits to the new max */
751 		ret = trace_pid_list_first(filtered_pids, &pid);
752 		while (!ret) {
753 			trace_pid_list_set(pid_list, pid);
754 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755 			nr_pids++;
756 		}
757 	}
758 
759 	ret = 0;
760 	while (cnt > 0) {
761 
762 		pos = 0;
763 
764 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
765 		if (ret < 0)
766 			break;
767 
768 		read += ret;
769 		ubuf += ret;
770 		cnt -= ret;
771 
772 		if (!trace_parser_loaded(&parser))
773 			break;
774 
775 		ret = -EINVAL;
776 		if (kstrtoul(parser.buffer, 0, &val))
777 			break;
778 
779 		pid = (pid_t)val;
780 
781 		if (trace_pid_list_set(pid_list, pid) < 0) {
782 			ret = -1;
783 			break;
784 		}
785 		nr_pids++;
786 
787 		trace_parser_clear(&parser);
788 		ret = 0;
789 	}
790 	trace_parser_put(&parser);
791 
792 	if (ret < 0) {
793 		trace_pid_list_free(pid_list);
794 		return ret;
795 	}
796 
797 	if (!nr_pids) {
798 		/* Cleared the list of pids */
799 		trace_pid_list_free(pid_list);
800 		pid_list = NULL;
801 	}
802 
803 	*new_pid_list = pid_list;
804 
805 	return read;
806 }
807 
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810 	u64 ts;
811 
812 	/* Early boot up does not have a buffer yet */
813 	if (!buf->buffer)
814 		return trace_clock_local();
815 
816 	ts = ring_buffer_time_stamp(buf->buffer);
817 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818 
819 	return ts;
820 }
821 
822 u64 ftrace_now(int cpu)
823 {
824 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826 
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838 	/*
839 	 * For quick access (irqsoff uses this in fast path), just
840 	 * return the mirror variable of the state of the ring buffer.
841 	 * It's a little racy, but we don't really care.
842 	 */
843 	smp_rmb();
844 	return !global_trace.buffer_disabled;
845 }
846 
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
858 
859 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860 
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer		*trace_types __read_mostly;
863 
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868 
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890 
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894 
895 static inline void trace_access_lock(int cpu)
896 {
897 	if (cpu == RING_BUFFER_ALL_CPUS) {
898 		/* gain it for accessing the whole ring buffer. */
899 		down_write(&all_cpu_access_lock);
900 	} else {
901 		/* gain it for accessing a cpu ring buffer. */
902 
903 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 		down_read(&all_cpu_access_lock);
905 
906 		/* Secondly block other access to this @cpu ring buffer. */
907 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 	}
909 }
910 
911 static inline void trace_access_unlock(int cpu)
912 {
913 	if (cpu == RING_BUFFER_ALL_CPUS) {
914 		up_write(&all_cpu_access_lock);
915 	} else {
916 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917 		up_read(&all_cpu_access_lock);
918 	}
919 }
920 
921 static inline void trace_access_lock_init(void)
922 {
923 	int cpu;
924 
925 	for_each_possible_cpu(cpu)
926 		mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928 
929 #else
930 
931 static DEFINE_MUTEX(access_lock);
932 
933 static inline void trace_access_lock(int cpu)
934 {
935 	(void)cpu;
936 	mutex_lock(&access_lock);
937 }
938 
939 static inline void trace_access_unlock(int cpu)
940 {
941 	(void)cpu;
942 	mutex_unlock(&access_lock);
943 }
944 
945 static inline void trace_access_lock_init(void)
946 {
947 }
948 
949 #endif
950 
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 				 unsigned int trace_ctx,
954 				 int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956 				      struct trace_buffer *buffer,
957 				      unsigned int trace_ctx,
958 				      int skip, struct pt_regs *regs);
959 
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 					unsigned int trace_ctx,
963 					int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967 				      struct trace_buffer *buffer,
968 				      unsigned long trace_ctx,
969 				      int skip, struct pt_regs *regs)
970 {
971 }
972 
973 #endif
974 
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977 		  int type, unsigned int trace_ctx)
978 {
979 	struct trace_entry *ent = ring_buffer_event_data(event);
980 
981 	tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983 
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 			  int type,
987 			  unsigned long len,
988 			  unsigned int trace_ctx)
989 {
990 	struct ring_buffer_event *event;
991 
992 	event = ring_buffer_lock_reserve(buffer, len);
993 	if (event != NULL)
994 		trace_event_setup(event, type, trace_ctx);
995 
996 	return event;
997 }
998 
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001 	if (tr->array_buffer.buffer)
1002 		ring_buffer_record_on(tr->array_buffer.buffer);
1003 	/*
1004 	 * This flag is looked at when buffers haven't been allocated
1005 	 * yet, or by some tracers (like irqsoff), that just want to
1006 	 * know if the ring buffer has been disabled, but it can handle
1007 	 * races of where it gets disabled but we still do a record.
1008 	 * As the check is in the fast path of the tracers, it is more
1009 	 * important to be fast than accurate.
1010 	 */
1011 	tr->buffer_disabled = 0;
1012 	/* Make the flag seen by readers */
1013 	smp_wmb();
1014 }
1015 
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024 	tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027 
1028 
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032 	__this_cpu_write(trace_taskinfo_save, true);
1033 
1034 	/* If this is the temp buffer, we need to commit fully */
1035 	if (this_cpu_read(trace_buffered_event) == event) {
1036 		/* Length is in event->array[0] */
1037 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038 		/* Release the temp buffer */
1039 		this_cpu_dec(trace_buffered_event_cnt);
1040 		/* ring_buffer_unlock_commit() enables preemption */
1041 		preempt_enable_notrace();
1042 	} else
1043 		ring_buffer_unlock_commit(buffer);
1044 }
1045 
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 		       const char *str, int size)
1048 {
1049 	struct ring_buffer_event *event;
1050 	struct trace_buffer *buffer;
1051 	struct print_entry *entry;
1052 	unsigned int trace_ctx;
1053 	int alloc;
1054 
1055 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 		return 0;
1057 
1058 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 		return 0;
1060 
1061 	if (unlikely(tracing_disabled))
1062 		return 0;
1063 
1064 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065 
1066 	trace_ctx = tracing_gen_ctx();
1067 	buffer = tr->array_buffer.buffer;
1068 	ring_buffer_nest_start(buffer);
1069 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070 					    trace_ctx);
1071 	if (!event) {
1072 		size = 0;
1073 		goto out;
1074 	}
1075 
1076 	entry = ring_buffer_event_data(event);
1077 	entry->ip = ip;
1078 
1079 	memcpy(&entry->buf, str, size);
1080 
1081 	/* Add a newline if necessary */
1082 	if (entry->buf[size - 1] != '\n') {
1083 		entry->buf[size] = '\n';
1084 		entry->buf[size + 1] = '\0';
1085 	} else
1086 		entry->buf[size] = '\0';
1087 
1088 	__buffer_unlock_commit(buffer, event);
1089 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091 	ring_buffer_nest_end(buffer);
1092 	return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095 
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:	   The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104 	return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107 
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:	   The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115 	struct ring_buffer_event *event;
1116 	struct trace_buffer *buffer;
1117 	struct bputs_entry *entry;
1118 	unsigned int trace_ctx;
1119 	int size = sizeof(struct bputs_entry);
1120 	int ret = 0;
1121 
1122 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 		return 0;
1124 
1125 	if (unlikely(tracing_selftest_running || tracing_disabled))
1126 		return 0;
1127 
1128 	trace_ctx = tracing_gen_ctx();
1129 	buffer = global_trace.array_buffer.buffer;
1130 
1131 	ring_buffer_nest_start(buffer);
1132 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 					    trace_ctx);
1134 	if (!event)
1135 		goto out;
1136 
1137 	entry = ring_buffer_event_data(event);
1138 	entry->ip			= ip;
1139 	entry->str			= str;
1140 
1141 	__buffer_unlock_commit(buffer, event);
1142 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143 
1144 	ret = 1;
1145  out:
1146 	ring_buffer_nest_end(buffer);
1147 	return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150 
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 					   void *cond_data)
1154 {
1155 	struct tracer *tracer = tr->current_trace;
1156 	unsigned long flags;
1157 
1158 	if (in_nmi()) {
1159 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161 		return;
1162 	}
1163 
1164 	if (!tr->allocated_snapshot) {
1165 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167 		tracer_tracing_off(tr);
1168 		return;
1169 	}
1170 
1171 	/* Note, snapshot can not be used when the tracer uses it */
1172 	if (tracer->use_max_tr) {
1173 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 		return;
1176 	}
1177 
1178 	local_irq_save(flags);
1179 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 	local_irq_restore(flags);
1181 }
1182 
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185 	tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187 
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204 	struct trace_array *tr = &global_trace;
1205 
1206 	tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209 
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:		The tracing instance to snapshot
1213  * @cond_data:	The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225 	tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228 
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:		The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245 	void *cond_data = NULL;
1246 
1247 	local_irq_disable();
1248 	arch_spin_lock(&tr->max_lock);
1249 
1250 	if (tr->cond_snapshot)
1251 		cond_data = tr->cond_snapshot->cond_data;
1252 
1253 	arch_spin_unlock(&tr->max_lock);
1254 	local_irq_enable();
1255 
1256 	return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259 
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 					struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263 
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266 	int order;
1267 	int ret;
1268 
1269 	if (!tr->allocated_snapshot) {
1270 
1271 		/* Make the snapshot buffer have the same order as main buffer */
1272 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1273 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1274 		if (ret < 0)
1275 			return ret;
1276 
1277 		/* allocate spare buffer */
1278 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1279 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1280 		if (ret < 0)
1281 			return ret;
1282 
1283 		tr->allocated_snapshot = true;
1284 	}
1285 
1286 	return 0;
1287 }
1288 
1289 static void free_snapshot(struct trace_array *tr)
1290 {
1291 	/*
1292 	 * We don't free the ring buffer. instead, resize it because
1293 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1294 	 * we want preserve it.
1295 	 */
1296 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1297 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1298 	set_buffer_entries(&tr->max_buffer, 1);
1299 	tracing_reset_online_cpus(&tr->max_buffer);
1300 	tr->allocated_snapshot = false;
1301 }
1302 
1303 /**
1304  * tracing_alloc_snapshot - allocate snapshot buffer.
1305  *
1306  * This only allocates the snapshot buffer if it isn't already
1307  * allocated - it doesn't also take a snapshot.
1308  *
1309  * This is meant to be used in cases where the snapshot buffer needs
1310  * to be set up for events that can't sleep but need to be able to
1311  * trigger a snapshot.
1312  */
1313 int tracing_alloc_snapshot(void)
1314 {
1315 	struct trace_array *tr = &global_trace;
1316 	int ret;
1317 
1318 	ret = tracing_alloc_snapshot_instance(tr);
1319 	WARN_ON(ret < 0);
1320 
1321 	return ret;
1322 }
1323 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1324 
1325 /**
1326  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1327  *
1328  * This is similar to tracing_snapshot(), but it will allocate the
1329  * snapshot buffer if it isn't already allocated. Use this only
1330  * where it is safe to sleep, as the allocation may sleep.
1331  *
1332  * This causes a swap between the snapshot buffer and the current live
1333  * tracing buffer. You can use this to take snapshots of the live
1334  * trace when some condition is triggered, but continue to trace.
1335  */
1336 void tracing_snapshot_alloc(void)
1337 {
1338 	int ret;
1339 
1340 	ret = tracing_alloc_snapshot();
1341 	if (ret < 0)
1342 		return;
1343 
1344 	tracing_snapshot();
1345 }
1346 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1347 
1348 /**
1349  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1350  * @tr:		The tracing instance
1351  * @cond_data:	User data to associate with the snapshot
1352  * @update:	Implementation of the cond_snapshot update function
1353  *
1354  * Check whether the conditional snapshot for the given instance has
1355  * already been enabled, or if the current tracer is already using a
1356  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1357  * save the cond_data and update function inside.
1358  *
1359  * Returns 0 if successful, error otherwise.
1360  */
1361 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1362 				 cond_update_fn_t update)
1363 {
1364 	struct cond_snapshot *cond_snapshot;
1365 	int ret = 0;
1366 
1367 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1368 	if (!cond_snapshot)
1369 		return -ENOMEM;
1370 
1371 	cond_snapshot->cond_data = cond_data;
1372 	cond_snapshot->update = update;
1373 
1374 	mutex_lock(&trace_types_lock);
1375 
1376 	ret = tracing_alloc_snapshot_instance(tr);
1377 	if (ret)
1378 		goto fail_unlock;
1379 
1380 	if (tr->current_trace->use_max_tr) {
1381 		ret = -EBUSY;
1382 		goto fail_unlock;
1383 	}
1384 
1385 	/*
1386 	 * The cond_snapshot can only change to NULL without the
1387 	 * trace_types_lock. We don't care if we race with it going
1388 	 * to NULL, but we want to make sure that it's not set to
1389 	 * something other than NULL when we get here, which we can
1390 	 * do safely with only holding the trace_types_lock and not
1391 	 * having to take the max_lock.
1392 	 */
1393 	if (tr->cond_snapshot) {
1394 		ret = -EBUSY;
1395 		goto fail_unlock;
1396 	}
1397 
1398 	local_irq_disable();
1399 	arch_spin_lock(&tr->max_lock);
1400 	tr->cond_snapshot = cond_snapshot;
1401 	arch_spin_unlock(&tr->max_lock);
1402 	local_irq_enable();
1403 
1404 	mutex_unlock(&trace_types_lock);
1405 
1406 	return ret;
1407 
1408  fail_unlock:
1409 	mutex_unlock(&trace_types_lock);
1410 	kfree(cond_snapshot);
1411 	return ret;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1414 
1415 /**
1416  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1417  * @tr:		The tracing instance
1418  *
1419  * Check whether the conditional snapshot for the given instance is
1420  * enabled; if so, free the cond_snapshot associated with it,
1421  * otherwise return -EINVAL.
1422  *
1423  * Returns 0 if successful, error otherwise.
1424  */
1425 int tracing_snapshot_cond_disable(struct trace_array *tr)
1426 {
1427 	int ret = 0;
1428 
1429 	local_irq_disable();
1430 	arch_spin_lock(&tr->max_lock);
1431 
1432 	if (!tr->cond_snapshot)
1433 		ret = -EINVAL;
1434 	else {
1435 		kfree(tr->cond_snapshot);
1436 		tr->cond_snapshot = NULL;
1437 	}
1438 
1439 	arch_spin_unlock(&tr->max_lock);
1440 	local_irq_enable();
1441 
1442 	return ret;
1443 }
1444 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1445 #else
1446 void tracing_snapshot(void)
1447 {
1448 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1449 }
1450 EXPORT_SYMBOL_GPL(tracing_snapshot);
1451 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1452 {
1453 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1456 int tracing_alloc_snapshot(void)
1457 {
1458 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1459 	return -ENODEV;
1460 }
1461 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1462 void tracing_snapshot_alloc(void)
1463 {
1464 	/* Give warning */
1465 	tracing_snapshot();
1466 }
1467 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1468 void *tracing_cond_snapshot_data(struct trace_array *tr)
1469 {
1470 	return NULL;
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1473 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1474 {
1475 	return -ENODEV;
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1478 int tracing_snapshot_cond_disable(struct trace_array *tr)
1479 {
1480 	return false;
1481 }
1482 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1483 #define free_snapshot(tr)	do { } while (0)
1484 #endif /* CONFIG_TRACER_SNAPSHOT */
1485 
1486 void tracer_tracing_off(struct trace_array *tr)
1487 {
1488 	if (tr->array_buffer.buffer)
1489 		ring_buffer_record_off(tr->array_buffer.buffer);
1490 	/*
1491 	 * This flag is looked at when buffers haven't been allocated
1492 	 * yet, or by some tracers (like irqsoff), that just want to
1493 	 * know if the ring buffer has been disabled, but it can handle
1494 	 * races of where it gets disabled but we still do a record.
1495 	 * As the check is in the fast path of the tracers, it is more
1496 	 * important to be fast than accurate.
1497 	 */
1498 	tr->buffer_disabled = 1;
1499 	/* Make the flag seen by readers */
1500 	smp_wmb();
1501 }
1502 
1503 /**
1504  * tracing_off - turn off tracing buffers
1505  *
1506  * This function stops the tracing buffers from recording data.
1507  * It does not disable any overhead the tracers themselves may
1508  * be causing. This function simply causes all recording to
1509  * the ring buffers to fail.
1510  */
1511 void tracing_off(void)
1512 {
1513 	tracer_tracing_off(&global_trace);
1514 }
1515 EXPORT_SYMBOL_GPL(tracing_off);
1516 
1517 void disable_trace_on_warning(void)
1518 {
1519 	if (__disable_trace_on_warning) {
1520 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1521 			"Disabling tracing due to warning\n");
1522 		tracing_off();
1523 	}
1524 }
1525 
1526 /**
1527  * tracer_tracing_is_on - show real state of ring buffer enabled
1528  * @tr : the trace array to know if ring buffer is enabled
1529  *
1530  * Shows real state of the ring buffer if it is enabled or not.
1531  */
1532 bool tracer_tracing_is_on(struct trace_array *tr)
1533 {
1534 	if (tr->array_buffer.buffer)
1535 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1536 	return !tr->buffer_disabled;
1537 }
1538 
1539 /**
1540  * tracing_is_on - show state of ring buffers enabled
1541  */
1542 int tracing_is_on(void)
1543 {
1544 	return tracer_tracing_is_on(&global_trace);
1545 }
1546 EXPORT_SYMBOL_GPL(tracing_is_on);
1547 
1548 static int __init set_buf_size(char *str)
1549 {
1550 	unsigned long buf_size;
1551 
1552 	if (!str)
1553 		return 0;
1554 	buf_size = memparse(str, &str);
1555 	/*
1556 	 * nr_entries can not be zero and the startup
1557 	 * tests require some buffer space. Therefore
1558 	 * ensure we have at least 4096 bytes of buffer.
1559 	 */
1560 	trace_buf_size = max(4096UL, buf_size);
1561 	return 1;
1562 }
1563 __setup("trace_buf_size=", set_buf_size);
1564 
1565 static int __init set_tracing_thresh(char *str)
1566 {
1567 	unsigned long threshold;
1568 	int ret;
1569 
1570 	if (!str)
1571 		return 0;
1572 	ret = kstrtoul(str, 0, &threshold);
1573 	if (ret < 0)
1574 		return 0;
1575 	tracing_thresh = threshold * 1000;
1576 	return 1;
1577 }
1578 __setup("tracing_thresh=", set_tracing_thresh);
1579 
1580 unsigned long nsecs_to_usecs(unsigned long nsecs)
1581 {
1582 	return nsecs / 1000;
1583 }
1584 
1585 /*
1586  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1587  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1588  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1589  * of strings in the order that the evals (enum) were defined.
1590  */
1591 #undef C
1592 #define C(a, b) b
1593 
1594 /* These must match the bit positions in trace_iterator_flags */
1595 static const char *trace_options[] = {
1596 	TRACE_FLAGS
1597 	NULL
1598 };
1599 
1600 static struct {
1601 	u64 (*func)(void);
1602 	const char *name;
1603 	int in_ns;		/* is this clock in nanoseconds? */
1604 } trace_clocks[] = {
1605 	{ trace_clock_local,		"local",	1 },
1606 	{ trace_clock_global,		"global",	1 },
1607 	{ trace_clock_counter,		"counter",	0 },
1608 	{ trace_clock_jiffies,		"uptime",	0 },
1609 	{ trace_clock,			"perf",		1 },
1610 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1611 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1612 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1613 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1614 	ARCH_TRACE_CLOCKS
1615 };
1616 
1617 bool trace_clock_in_ns(struct trace_array *tr)
1618 {
1619 	if (trace_clocks[tr->clock_id].in_ns)
1620 		return true;
1621 
1622 	return false;
1623 }
1624 
1625 /*
1626  * trace_parser_get_init - gets the buffer for trace parser
1627  */
1628 int trace_parser_get_init(struct trace_parser *parser, int size)
1629 {
1630 	memset(parser, 0, sizeof(*parser));
1631 
1632 	parser->buffer = kmalloc(size, GFP_KERNEL);
1633 	if (!parser->buffer)
1634 		return 1;
1635 
1636 	parser->size = size;
1637 	return 0;
1638 }
1639 
1640 /*
1641  * trace_parser_put - frees the buffer for trace parser
1642  */
1643 void trace_parser_put(struct trace_parser *parser)
1644 {
1645 	kfree(parser->buffer);
1646 	parser->buffer = NULL;
1647 }
1648 
1649 /*
1650  * trace_get_user - reads the user input string separated by  space
1651  * (matched by isspace(ch))
1652  *
1653  * For each string found the 'struct trace_parser' is updated,
1654  * and the function returns.
1655  *
1656  * Returns number of bytes read.
1657  *
1658  * See kernel/trace/trace.h for 'struct trace_parser' details.
1659  */
1660 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1661 	size_t cnt, loff_t *ppos)
1662 {
1663 	char ch;
1664 	size_t read = 0;
1665 	ssize_t ret;
1666 
1667 	if (!*ppos)
1668 		trace_parser_clear(parser);
1669 
1670 	ret = get_user(ch, ubuf++);
1671 	if (ret)
1672 		goto out;
1673 
1674 	read++;
1675 	cnt--;
1676 
1677 	/*
1678 	 * The parser is not finished with the last write,
1679 	 * continue reading the user input without skipping spaces.
1680 	 */
1681 	if (!parser->cont) {
1682 		/* skip white space */
1683 		while (cnt && isspace(ch)) {
1684 			ret = get_user(ch, ubuf++);
1685 			if (ret)
1686 				goto out;
1687 			read++;
1688 			cnt--;
1689 		}
1690 
1691 		parser->idx = 0;
1692 
1693 		/* only spaces were written */
1694 		if (isspace(ch) || !ch) {
1695 			*ppos += read;
1696 			ret = read;
1697 			goto out;
1698 		}
1699 	}
1700 
1701 	/* read the non-space input */
1702 	while (cnt && !isspace(ch) && ch) {
1703 		if (parser->idx < parser->size - 1)
1704 			parser->buffer[parser->idx++] = ch;
1705 		else {
1706 			ret = -EINVAL;
1707 			goto out;
1708 		}
1709 		ret = get_user(ch, ubuf++);
1710 		if (ret)
1711 			goto out;
1712 		read++;
1713 		cnt--;
1714 	}
1715 
1716 	/* We either got finished input or we have to wait for another call. */
1717 	if (isspace(ch) || !ch) {
1718 		parser->buffer[parser->idx] = 0;
1719 		parser->cont = false;
1720 	} else if (parser->idx < parser->size - 1) {
1721 		parser->cont = true;
1722 		parser->buffer[parser->idx++] = ch;
1723 		/* Make sure the parsed string always terminates with '\0'. */
1724 		parser->buffer[parser->idx] = 0;
1725 	} else {
1726 		ret = -EINVAL;
1727 		goto out;
1728 	}
1729 
1730 	*ppos += read;
1731 	ret = read;
1732 
1733 out:
1734 	return ret;
1735 }
1736 
1737 /* TODO add a seq_buf_to_buffer() */
1738 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1739 {
1740 	int len;
1741 
1742 	if (trace_seq_used(s) <= s->readpos)
1743 		return -EBUSY;
1744 
1745 	len = trace_seq_used(s) - s->readpos;
1746 	if (cnt > len)
1747 		cnt = len;
1748 	memcpy(buf, s->buffer + s->readpos, cnt);
1749 
1750 	s->readpos += cnt;
1751 	return cnt;
1752 }
1753 
1754 unsigned long __read_mostly	tracing_thresh;
1755 
1756 #ifdef CONFIG_TRACER_MAX_TRACE
1757 static const struct file_operations tracing_max_lat_fops;
1758 
1759 #ifdef LATENCY_FS_NOTIFY
1760 
1761 static struct workqueue_struct *fsnotify_wq;
1762 
1763 static void latency_fsnotify_workfn(struct work_struct *work)
1764 {
1765 	struct trace_array *tr = container_of(work, struct trace_array,
1766 					      fsnotify_work);
1767 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1768 }
1769 
1770 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1771 {
1772 	struct trace_array *tr = container_of(iwork, struct trace_array,
1773 					      fsnotify_irqwork);
1774 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1775 }
1776 
1777 static void trace_create_maxlat_file(struct trace_array *tr,
1778 				     struct dentry *d_tracer)
1779 {
1780 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1781 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1782 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1783 					      TRACE_MODE_WRITE,
1784 					      d_tracer, tr,
1785 					      &tracing_max_lat_fops);
1786 }
1787 
1788 __init static int latency_fsnotify_init(void)
1789 {
1790 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1791 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1792 	if (!fsnotify_wq) {
1793 		pr_err("Unable to allocate tr_max_lat_wq\n");
1794 		return -ENOMEM;
1795 	}
1796 	return 0;
1797 }
1798 
1799 late_initcall_sync(latency_fsnotify_init);
1800 
1801 void latency_fsnotify(struct trace_array *tr)
1802 {
1803 	if (!fsnotify_wq)
1804 		return;
1805 	/*
1806 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1807 	 * possible that we are called from __schedule() or do_idle(), which
1808 	 * could cause a deadlock.
1809 	 */
1810 	irq_work_queue(&tr->fsnotify_irqwork);
1811 }
1812 
1813 #else /* !LATENCY_FS_NOTIFY */
1814 
1815 #define trace_create_maxlat_file(tr, d_tracer)				\
1816 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1817 			  d_tracer, tr, &tracing_max_lat_fops)
1818 
1819 #endif
1820 
1821 /*
1822  * Copy the new maximum trace into the separate maximum-trace
1823  * structure. (this way the maximum trace is permanently saved,
1824  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1825  */
1826 static void
1827 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1828 {
1829 	struct array_buffer *trace_buf = &tr->array_buffer;
1830 	struct array_buffer *max_buf = &tr->max_buffer;
1831 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1832 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1833 
1834 	max_buf->cpu = cpu;
1835 	max_buf->time_start = data->preempt_timestamp;
1836 
1837 	max_data->saved_latency = tr->max_latency;
1838 	max_data->critical_start = data->critical_start;
1839 	max_data->critical_end = data->critical_end;
1840 
1841 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1842 	max_data->pid = tsk->pid;
1843 	/*
1844 	 * If tsk == current, then use current_uid(), as that does not use
1845 	 * RCU. The irq tracer can be called out of RCU scope.
1846 	 */
1847 	if (tsk == current)
1848 		max_data->uid = current_uid();
1849 	else
1850 		max_data->uid = task_uid(tsk);
1851 
1852 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1853 	max_data->policy = tsk->policy;
1854 	max_data->rt_priority = tsk->rt_priority;
1855 
1856 	/* record this tasks comm */
1857 	tracing_record_cmdline(tsk);
1858 	latency_fsnotify(tr);
1859 }
1860 
1861 /**
1862  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1863  * @tr: tracer
1864  * @tsk: the task with the latency
1865  * @cpu: The cpu that initiated the trace.
1866  * @cond_data: User data associated with a conditional snapshot
1867  *
1868  * Flip the buffers between the @tr and the max_tr and record information
1869  * about which task was the cause of this latency.
1870  */
1871 void
1872 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1873 	      void *cond_data)
1874 {
1875 	if (tr->stop_count)
1876 		return;
1877 
1878 	WARN_ON_ONCE(!irqs_disabled());
1879 
1880 	if (!tr->allocated_snapshot) {
1881 		/* Only the nop tracer should hit this when disabling */
1882 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1883 		return;
1884 	}
1885 
1886 	arch_spin_lock(&tr->max_lock);
1887 
1888 	/* Inherit the recordable setting from array_buffer */
1889 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1890 		ring_buffer_record_on(tr->max_buffer.buffer);
1891 	else
1892 		ring_buffer_record_off(tr->max_buffer.buffer);
1893 
1894 #ifdef CONFIG_TRACER_SNAPSHOT
1895 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1896 		arch_spin_unlock(&tr->max_lock);
1897 		return;
1898 	}
1899 #endif
1900 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1901 
1902 	__update_max_tr(tr, tsk, cpu);
1903 
1904 	arch_spin_unlock(&tr->max_lock);
1905 }
1906 
1907 /**
1908  * update_max_tr_single - only copy one trace over, and reset the rest
1909  * @tr: tracer
1910  * @tsk: task with the latency
1911  * @cpu: the cpu of the buffer to copy.
1912  *
1913  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1914  */
1915 void
1916 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1917 {
1918 	int ret;
1919 
1920 	if (tr->stop_count)
1921 		return;
1922 
1923 	WARN_ON_ONCE(!irqs_disabled());
1924 	if (!tr->allocated_snapshot) {
1925 		/* Only the nop tracer should hit this when disabling */
1926 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1927 		return;
1928 	}
1929 
1930 	arch_spin_lock(&tr->max_lock);
1931 
1932 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1933 
1934 	if (ret == -EBUSY) {
1935 		/*
1936 		 * We failed to swap the buffer due to a commit taking
1937 		 * place on this CPU. We fail to record, but we reset
1938 		 * the max trace buffer (no one writes directly to it)
1939 		 * and flag that it failed.
1940 		 * Another reason is resize is in progress.
1941 		 */
1942 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1943 			"Failed to swap buffers due to commit or resize in progress\n");
1944 	}
1945 
1946 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1947 
1948 	__update_max_tr(tr, tsk, cpu);
1949 	arch_spin_unlock(&tr->max_lock);
1950 }
1951 
1952 #endif /* CONFIG_TRACER_MAX_TRACE */
1953 
1954 static int wait_on_pipe(struct trace_iterator *iter, int full)
1955 {
1956 	/* Iterators are static, they should be filled or empty */
1957 	if (trace_buffer_iter(iter, iter->cpu_file))
1958 		return 0;
1959 
1960 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1961 				full);
1962 }
1963 
1964 #ifdef CONFIG_FTRACE_STARTUP_TEST
1965 static bool selftests_can_run;
1966 
1967 struct trace_selftests {
1968 	struct list_head		list;
1969 	struct tracer			*type;
1970 };
1971 
1972 static LIST_HEAD(postponed_selftests);
1973 
1974 static int save_selftest(struct tracer *type)
1975 {
1976 	struct trace_selftests *selftest;
1977 
1978 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1979 	if (!selftest)
1980 		return -ENOMEM;
1981 
1982 	selftest->type = type;
1983 	list_add(&selftest->list, &postponed_selftests);
1984 	return 0;
1985 }
1986 
1987 static int run_tracer_selftest(struct tracer *type)
1988 {
1989 	struct trace_array *tr = &global_trace;
1990 	struct tracer *saved_tracer = tr->current_trace;
1991 	int ret;
1992 
1993 	if (!type->selftest || tracing_selftest_disabled)
1994 		return 0;
1995 
1996 	/*
1997 	 * If a tracer registers early in boot up (before scheduling is
1998 	 * initialized and such), then do not run its selftests yet.
1999 	 * Instead, run it a little later in the boot process.
2000 	 */
2001 	if (!selftests_can_run)
2002 		return save_selftest(type);
2003 
2004 	if (!tracing_is_on()) {
2005 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2006 			type->name);
2007 		return 0;
2008 	}
2009 
2010 	/*
2011 	 * Run a selftest on this tracer.
2012 	 * Here we reset the trace buffer, and set the current
2013 	 * tracer to be this tracer. The tracer can then run some
2014 	 * internal tracing to verify that everything is in order.
2015 	 * If we fail, we do not register this tracer.
2016 	 */
2017 	tracing_reset_online_cpus(&tr->array_buffer);
2018 
2019 	tr->current_trace = type;
2020 
2021 #ifdef CONFIG_TRACER_MAX_TRACE
2022 	if (type->use_max_tr) {
2023 		/* If we expanded the buffers, make sure the max is expanded too */
2024 		if (tr->ring_buffer_expanded)
2025 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2026 					   RING_BUFFER_ALL_CPUS);
2027 		tr->allocated_snapshot = true;
2028 	}
2029 #endif
2030 
2031 	/* the test is responsible for initializing and enabling */
2032 	pr_info("Testing tracer %s: ", type->name);
2033 	ret = type->selftest(type, tr);
2034 	/* the test is responsible for resetting too */
2035 	tr->current_trace = saved_tracer;
2036 	if (ret) {
2037 		printk(KERN_CONT "FAILED!\n");
2038 		/* Add the warning after printing 'FAILED' */
2039 		WARN_ON(1);
2040 		return -1;
2041 	}
2042 	/* Only reset on passing, to avoid touching corrupted buffers */
2043 	tracing_reset_online_cpus(&tr->array_buffer);
2044 
2045 #ifdef CONFIG_TRACER_MAX_TRACE
2046 	if (type->use_max_tr) {
2047 		tr->allocated_snapshot = false;
2048 
2049 		/* Shrink the max buffer again */
2050 		if (tr->ring_buffer_expanded)
2051 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2052 					   RING_BUFFER_ALL_CPUS);
2053 	}
2054 #endif
2055 
2056 	printk(KERN_CONT "PASSED\n");
2057 	return 0;
2058 }
2059 
2060 static int do_run_tracer_selftest(struct tracer *type)
2061 {
2062 	int ret;
2063 
2064 	/*
2065 	 * Tests can take a long time, especially if they are run one after the
2066 	 * other, as does happen during bootup when all the tracers are
2067 	 * registered. This could cause the soft lockup watchdog to trigger.
2068 	 */
2069 	cond_resched();
2070 
2071 	tracing_selftest_running = true;
2072 	ret = run_tracer_selftest(type);
2073 	tracing_selftest_running = false;
2074 
2075 	return ret;
2076 }
2077 
2078 static __init int init_trace_selftests(void)
2079 {
2080 	struct trace_selftests *p, *n;
2081 	struct tracer *t, **last;
2082 	int ret;
2083 
2084 	selftests_can_run = true;
2085 
2086 	mutex_lock(&trace_types_lock);
2087 
2088 	if (list_empty(&postponed_selftests))
2089 		goto out;
2090 
2091 	pr_info("Running postponed tracer tests:\n");
2092 
2093 	tracing_selftest_running = true;
2094 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2095 		/* This loop can take minutes when sanitizers are enabled, so
2096 		 * lets make sure we allow RCU processing.
2097 		 */
2098 		cond_resched();
2099 		ret = run_tracer_selftest(p->type);
2100 		/* If the test fails, then warn and remove from available_tracers */
2101 		if (ret < 0) {
2102 			WARN(1, "tracer: %s failed selftest, disabling\n",
2103 			     p->type->name);
2104 			last = &trace_types;
2105 			for (t = trace_types; t; t = t->next) {
2106 				if (t == p->type) {
2107 					*last = t->next;
2108 					break;
2109 				}
2110 				last = &t->next;
2111 			}
2112 		}
2113 		list_del(&p->list);
2114 		kfree(p);
2115 	}
2116 	tracing_selftest_running = false;
2117 
2118  out:
2119 	mutex_unlock(&trace_types_lock);
2120 
2121 	return 0;
2122 }
2123 core_initcall(init_trace_selftests);
2124 #else
2125 static inline int run_tracer_selftest(struct tracer *type)
2126 {
2127 	return 0;
2128 }
2129 static inline int do_run_tracer_selftest(struct tracer *type)
2130 {
2131 	return 0;
2132 }
2133 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2134 
2135 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2136 
2137 static void __init apply_trace_boot_options(void);
2138 
2139 /**
2140  * register_tracer - register a tracer with the ftrace system.
2141  * @type: the plugin for the tracer
2142  *
2143  * Register a new plugin tracer.
2144  */
2145 int __init register_tracer(struct tracer *type)
2146 {
2147 	struct tracer *t;
2148 	int ret = 0;
2149 
2150 	if (!type->name) {
2151 		pr_info("Tracer must have a name\n");
2152 		return -1;
2153 	}
2154 
2155 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2156 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2157 		return -1;
2158 	}
2159 
2160 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2161 		pr_warn("Can not register tracer %s due to lockdown\n",
2162 			   type->name);
2163 		return -EPERM;
2164 	}
2165 
2166 	mutex_lock(&trace_types_lock);
2167 
2168 	for (t = trace_types; t; t = t->next) {
2169 		if (strcmp(type->name, t->name) == 0) {
2170 			/* already found */
2171 			pr_info("Tracer %s already registered\n",
2172 				type->name);
2173 			ret = -1;
2174 			goto out;
2175 		}
2176 	}
2177 
2178 	if (!type->set_flag)
2179 		type->set_flag = &dummy_set_flag;
2180 	if (!type->flags) {
2181 		/*allocate a dummy tracer_flags*/
2182 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2183 		if (!type->flags) {
2184 			ret = -ENOMEM;
2185 			goto out;
2186 		}
2187 		type->flags->val = 0;
2188 		type->flags->opts = dummy_tracer_opt;
2189 	} else
2190 		if (!type->flags->opts)
2191 			type->flags->opts = dummy_tracer_opt;
2192 
2193 	/* store the tracer for __set_tracer_option */
2194 	type->flags->trace = type;
2195 
2196 	ret = do_run_tracer_selftest(type);
2197 	if (ret < 0)
2198 		goto out;
2199 
2200 	type->next = trace_types;
2201 	trace_types = type;
2202 	add_tracer_options(&global_trace, type);
2203 
2204  out:
2205 	mutex_unlock(&trace_types_lock);
2206 
2207 	if (ret || !default_bootup_tracer)
2208 		goto out_unlock;
2209 
2210 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2211 		goto out_unlock;
2212 
2213 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2214 	/* Do we want this tracer to start on bootup? */
2215 	tracing_set_tracer(&global_trace, type->name);
2216 	default_bootup_tracer = NULL;
2217 
2218 	apply_trace_boot_options();
2219 
2220 	/* disable other selftests, since this will break it. */
2221 	disable_tracing_selftest("running a tracer");
2222 
2223  out_unlock:
2224 	return ret;
2225 }
2226 
2227 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2228 {
2229 	struct trace_buffer *buffer = buf->buffer;
2230 
2231 	if (!buffer)
2232 		return;
2233 
2234 	ring_buffer_record_disable(buffer);
2235 
2236 	/* Make sure all commits have finished */
2237 	synchronize_rcu();
2238 	ring_buffer_reset_cpu(buffer, cpu);
2239 
2240 	ring_buffer_record_enable(buffer);
2241 }
2242 
2243 void tracing_reset_online_cpus(struct array_buffer *buf)
2244 {
2245 	struct trace_buffer *buffer = buf->buffer;
2246 
2247 	if (!buffer)
2248 		return;
2249 
2250 	ring_buffer_record_disable(buffer);
2251 
2252 	/* Make sure all commits have finished */
2253 	synchronize_rcu();
2254 
2255 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2256 
2257 	ring_buffer_reset_online_cpus(buffer);
2258 
2259 	ring_buffer_record_enable(buffer);
2260 }
2261 
2262 /* Must have trace_types_lock held */
2263 void tracing_reset_all_online_cpus_unlocked(void)
2264 {
2265 	struct trace_array *tr;
2266 
2267 	lockdep_assert_held(&trace_types_lock);
2268 
2269 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2270 		if (!tr->clear_trace)
2271 			continue;
2272 		tr->clear_trace = false;
2273 		tracing_reset_online_cpus(&tr->array_buffer);
2274 #ifdef CONFIG_TRACER_MAX_TRACE
2275 		tracing_reset_online_cpus(&tr->max_buffer);
2276 #endif
2277 	}
2278 }
2279 
2280 void tracing_reset_all_online_cpus(void)
2281 {
2282 	mutex_lock(&trace_types_lock);
2283 	tracing_reset_all_online_cpus_unlocked();
2284 	mutex_unlock(&trace_types_lock);
2285 }
2286 
2287 /*
2288  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2289  * is the tgid last observed corresponding to pid=i.
2290  */
2291 static int *tgid_map;
2292 
2293 /* The maximum valid index into tgid_map. */
2294 static size_t tgid_map_max;
2295 
2296 #define SAVED_CMDLINES_DEFAULT 128
2297 #define NO_CMDLINE_MAP UINT_MAX
2298 /*
2299  * Preemption must be disabled before acquiring trace_cmdline_lock.
2300  * The various trace_arrays' max_lock must be acquired in a context
2301  * where interrupt is disabled.
2302  */
2303 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2304 struct saved_cmdlines_buffer {
2305 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2306 	unsigned *map_cmdline_to_pid;
2307 	unsigned cmdline_num;
2308 	int cmdline_idx;
2309 	char *saved_cmdlines;
2310 };
2311 static struct saved_cmdlines_buffer *savedcmd;
2312 
2313 static inline char *get_saved_cmdlines(int idx)
2314 {
2315 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2316 }
2317 
2318 static inline void set_cmdline(int idx, const char *cmdline)
2319 {
2320 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2321 }
2322 
2323 static int allocate_cmdlines_buffer(unsigned int val,
2324 				    struct saved_cmdlines_buffer *s)
2325 {
2326 	s->map_cmdline_to_pid = kmalloc_array(val,
2327 					      sizeof(*s->map_cmdline_to_pid),
2328 					      GFP_KERNEL);
2329 	if (!s->map_cmdline_to_pid)
2330 		return -ENOMEM;
2331 
2332 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2333 	if (!s->saved_cmdlines) {
2334 		kfree(s->map_cmdline_to_pid);
2335 		return -ENOMEM;
2336 	}
2337 
2338 	s->cmdline_idx = 0;
2339 	s->cmdline_num = val;
2340 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2341 	       sizeof(s->map_pid_to_cmdline));
2342 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2343 	       val * sizeof(*s->map_cmdline_to_pid));
2344 
2345 	return 0;
2346 }
2347 
2348 static int trace_create_savedcmd(void)
2349 {
2350 	int ret;
2351 
2352 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2353 	if (!savedcmd)
2354 		return -ENOMEM;
2355 
2356 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2357 	if (ret < 0) {
2358 		kfree(savedcmd);
2359 		savedcmd = NULL;
2360 		return -ENOMEM;
2361 	}
2362 
2363 	return 0;
2364 }
2365 
2366 int is_tracing_stopped(void)
2367 {
2368 	return global_trace.stop_count;
2369 }
2370 
2371 static void tracing_start_tr(struct trace_array *tr)
2372 {
2373 	struct trace_buffer *buffer;
2374 	unsigned long flags;
2375 
2376 	if (tracing_disabled)
2377 		return;
2378 
2379 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2380 	if (--tr->stop_count) {
2381 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2382 			/* Someone screwed up their debugging */
2383 			tr->stop_count = 0;
2384 		}
2385 		goto out;
2386 	}
2387 
2388 	/* Prevent the buffers from switching */
2389 	arch_spin_lock(&tr->max_lock);
2390 
2391 	buffer = tr->array_buffer.buffer;
2392 	if (buffer)
2393 		ring_buffer_record_enable(buffer);
2394 
2395 #ifdef CONFIG_TRACER_MAX_TRACE
2396 	buffer = tr->max_buffer.buffer;
2397 	if (buffer)
2398 		ring_buffer_record_enable(buffer);
2399 #endif
2400 
2401 	arch_spin_unlock(&tr->max_lock);
2402 
2403  out:
2404 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2405 }
2406 
2407 /**
2408  * tracing_start - quick start of the tracer
2409  *
2410  * If tracing is enabled but was stopped by tracing_stop,
2411  * this will start the tracer back up.
2412  */
2413 void tracing_start(void)
2414 
2415 {
2416 	return tracing_start_tr(&global_trace);
2417 }
2418 
2419 static void tracing_stop_tr(struct trace_array *tr)
2420 {
2421 	struct trace_buffer *buffer;
2422 	unsigned long flags;
2423 
2424 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2425 	if (tr->stop_count++)
2426 		goto out;
2427 
2428 	/* Prevent the buffers from switching */
2429 	arch_spin_lock(&tr->max_lock);
2430 
2431 	buffer = tr->array_buffer.buffer;
2432 	if (buffer)
2433 		ring_buffer_record_disable(buffer);
2434 
2435 #ifdef CONFIG_TRACER_MAX_TRACE
2436 	buffer = tr->max_buffer.buffer;
2437 	if (buffer)
2438 		ring_buffer_record_disable(buffer);
2439 #endif
2440 
2441 	arch_spin_unlock(&tr->max_lock);
2442 
2443  out:
2444 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2445 }
2446 
2447 /**
2448  * tracing_stop - quick stop of the tracer
2449  *
2450  * Light weight way to stop tracing. Use in conjunction with
2451  * tracing_start.
2452  */
2453 void tracing_stop(void)
2454 {
2455 	return tracing_stop_tr(&global_trace);
2456 }
2457 
2458 static int trace_save_cmdline(struct task_struct *tsk)
2459 {
2460 	unsigned tpid, idx;
2461 
2462 	/* treat recording of idle task as a success */
2463 	if (!tsk->pid)
2464 		return 1;
2465 
2466 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2467 
2468 	/*
2469 	 * It's not the end of the world if we don't get
2470 	 * the lock, but we also don't want to spin
2471 	 * nor do we want to disable interrupts,
2472 	 * so if we miss here, then better luck next time.
2473 	 *
2474 	 * This is called within the scheduler and wake up, so interrupts
2475 	 * had better been disabled and run queue lock been held.
2476 	 */
2477 	lockdep_assert_preemption_disabled();
2478 	if (!arch_spin_trylock(&trace_cmdline_lock))
2479 		return 0;
2480 
2481 	idx = savedcmd->map_pid_to_cmdline[tpid];
2482 	if (idx == NO_CMDLINE_MAP) {
2483 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2484 
2485 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2486 		savedcmd->cmdline_idx = idx;
2487 	}
2488 
2489 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2490 	set_cmdline(idx, tsk->comm);
2491 
2492 	arch_spin_unlock(&trace_cmdline_lock);
2493 
2494 	return 1;
2495 }
2496 
2497 static void __trace_find_cmdline(int pid, char comm[])
2498 {
2499 	unsigned map;
2500 	int tpid;
2501 
2502 	if (!pid) {
2503 		strcpy(comm, "<idle>");
2504 		return;
2505 	}
2506 
2507 	if (WARN_ON_ONCE(pid < 0)) {
2508 		strcpy(comm, "<XXX>");
2509 		return;
2510 	}
2511 
2512 	tpid = pid & (PID_MAX_DEFAULT - 1);
2513 	map = savedcmd->map_pid_to_cmdline[tpid];
2514 	if (map != NO_CMDLINE_MAP) {
2515 		tpid = savedcmd->map_cmdline_to_pid[map];
2516 		if (tpid == pid) {
2517 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2518 			return;
2519 		}
2520 	}
2521 	strcpy(comm, "<...>");
2522 }
2523 
2524 void trace_find_cmdline(int pid, char comm[])
2525 {
2526 	preempt_disable();
2527 	arch_spin_lock(&trace_cmdline_lock);
2528 
2529 	__trace_find_cmdline(pid, comm);
2530 
2531 	arch_spin_unlock(&trace_cmdline_lock);
2532 	preempt_enable();
2533 }
2534 
2535 static int *trace_find_tgid_ptr(int pid)
2536 {
2537 	/*
2538 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2539 	 * if we observe a non-NULL tgid_map then we also observe the correct
2540 	 * tgid_map_max.
2541 	 */
2542 	int *map = smp_load_acquire(&tgid_map);
2543 
2544 	if (unlikely(!map || pid > tgid_map_max))
2545 		return NULL;
2546 
2547 	return &map[pid];
2548 }
2549 
2550 int trace_find_tgid(int pid)
2551 {
2552 	int *ptr = trace_find_tgid_ptr(pid);
2553 
2554 	return ptr ? *ptr : 0;
2555 }
2556 
2557 static int trace_save_tgid(struct task_struct *tsk)
2558 {
2559 	int *ptr;
2560 
2561 	/* treat recording of idle task as a success */
2562 	if (!tsk->pid)
2563 		return 1;
2564 
2565 	ptr = trace_find_tgid_ptr(tsk->pid);
2566 	if (!ptr)
2567 		return 0;
2568 
2569 	*ptr = tsk->tgid;
2570 	return 1;
2571 }
2572 
2573 static bool tracing_record_taskinfo_skip(int flags)
2574 {
2575 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2576 		return true;
2577 	if (!__this_cpu_read(trace_taskinfo_save))
2578 		return true;
2579 	return false;
2580 }
2581 
2582 /**
2583  * tracing_record_taskinfo - record the task info of a task
2584  *
2585  * @task:  task to record
2586  * @flags: TRACE_RECORD_CMDLINE for recording comm
2587  *         TRACE_RECORD_TGID for recording tgid
2588  */
2589 void tracing_record_taskinfo(struct task_struct *task, int flags)
2590 {
2591 	bool done;
2592 
2593 	if (tracing_record_taskinfo_skip(flags))
2594 		return;
2595 
2596 	/*
2597 	 * Record as much task information as possible. If some fail, continue
2598 	 * to try to record the others.
2599 	 */
2600 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2601 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2602 
2603 	/* If recording any information failed, retry again soon. */
2604 	if (!done)
2605 		return;
2606 
2607 	__this_cpu_write(trace_taskinfo_save, false);
2608 }
2609 
2610 /**
2611  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2612  *
2613  * @prev: previous task during sched_switch
2614  * @next: next task during sched_switch
2615  * @flags: TRACE_RECORD_CMDLINE for recording comm
2616  *         TRACE_RECORD_TGID for recording tgid
2617  */
2618 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2619 					  struct task_struct *next, int flags)
2620 {
2621 	bool done;
2622 
2623 	if (tracing_record_taskinfo_skip(flags))
2624 		return;
2625 
2626 	/*
2627 	 * Record as much task information as possible. If some fail, continue
2628 	 * to try to record the others.
2629 	 */
2630 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2631 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2632 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2633 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2634 
2635 	/* If recording any information failed, retry again soon. */
2636 	if (!done)
2637 		return;
2638 
2639 	__this_cpu_write(trace_taskinfo_save, false);
2640 }
2641 
2642 /* Helpers to record a specific task information */
2643 void tracing_record_cmdline(struct task_struct *task)
2644 {
2645 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2646 }
2647 
2648 void tracing_record_tgid(struct task_struct *task)
2649 {
2650 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2651 }
2652 
2653 /*
2654  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2655  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2656  * simplifies those functions and keeps them in sync.
2657  */
2658 enum print_line_t trace_handle_return(struct trace_seq *s)
2659 {
2660 	return trace_seq_has_overflowed(s) ?
2661 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2662 }
2663 EXPORT_SYMBOL_GPL(trace_handle_return);
2664 
2665 static unsigned short migration_disable_value(void)
2666 {
2667 #if defined(CONFIG_SMP)
2668 	return current->migration_disabled;
2669 #else
2670 	return 0;
2671 #endif
2672 }
2673 
2674 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2675 {
2676 	unsigned int trace_flags = irqs_status;
2677 	unsigned int pc;
2678 
2679 	pc = preempt_count();
2680 
2681 	if (pc & NMI_MASK)
2682 		trace_flags |= TRACE_FLAG_NMI;
2683 	if (pc & HARDIRQ_MASK)
2684 		trace_flags |= TRACE_FLAG_HARDIRQ;
2685 	if (in_serving_softirq())
2686 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2687 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2688 		trace_flags |= TRACE_FLAG_BH_OFF;
2689 
2690 	if (tif_need_resched())
2691 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2692 	if (test_preempt_need_resched())
2693 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2694 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2695 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2696 }
2697 
2698 struct ring_buffer_event *
2699 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2700 			  int type,
2701 			  unsigned long len,
2702 			  unsigned int trace_ctx)
2703 {
2704 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2705 }
2706 
2707 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2708 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2709 static int trace_buffered_event_ref;
2710 
2711 /**
2712  * trace_buffered_event_enable - enable buffering events
2713  *
2714  * When events are being filtered, it is quicker to use a temporary
2715  * buffer to write the event data into if there's a likely chance
2716  * that it will not be committed. The discard of the ring buffer
2717  * is not as fast as committing, and is much slower than copying
2718  * a commit.
2719  *
2720  * When an event is to be filtered, allocate per cpu buffers to
2721  * write the event data into, and if the event is filtered and discarded
2722  * it is simply dropped, otherwise, the entire data is to be committed
2723  * in one shot.
2724  */
2725 void trace_buffered_event_enable(void)
2726 {
2727 	struct ring_buffer_event *event;
2728 	struct page *page;
2729 	int cpu;
2730 
2731 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2732 
2733 	if (trace_buffered_event_ref++)
2734 		return;
2735 
2736 	for_each_tracing_cpu(cpu) {
2737 		page = alloc_pages_node(cpu_to_node(cpu),
2738 					GFP_KERNEL | __GFP_NORETRY, 0);
2739 		/* This is just an optimization and can handle failures */
2740 		if (!page) {
2741 			pr_err("Failed to allocate event buffer\n");
2742 			break;
2743 		}
2744 
2745 		event = page_address(page);
2746 		memset(event, 0, sizeof(*event));
2747 
2748 		per_cpu(trace_buffered_event, cpu) = event;
2749 
2750 		preempt_disable();
2751 		if (cpu == smp_processor_id() &&
2752 		    __this_cpu_read(trace_buffered_event) !=
2753 		    per_cpu(trace_buffered_event, cpu))
2754 			WARN_ON_ONCE(1);
2755 		preempt_enable();
2756 	}
2757 }
2758 
2759 static void enable_trace_buffered_event(void *data)
2760 {
2761 	/* Probably not needed, but do it anyway */
2762 	smp_rmb();
2763 	this_cpu_dec(trace_buffered_event_cnt);
2764 }
2765 
2766 static void disable_trace_buffered_event(void *data)
2767 {
2768 	this_cpu_inc(trace_buffered_event_cnt);
2769 }
2770 
2771 /**
2772  * trace_buffered_event_disable - disable buffering events
2773  *
2774  * When a filter is removed, it is faster to not use the buffered
2775  * events, and to commit directly into the ring buffer. Free up
2776  * the temp buffers when there are no more users. This requires
2777  * special synchronization with current events.
2778  */
2779 void trace_buffered_event_disable(void)
2780 {
2781 	int cpu;
2782 
2783 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2784 
2785 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2786 		return;
2787 
2788 	if (--trace_buffered_event_ref)
2789 		return;
2790 
2791 	/* For each CPU, set the buffer as used. */
2792 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2793 			 NULL, true);
2794 
2795 	/* Wait for all current users to finish */
2796 	synchronize_rcu();
2797 
2798 	for_each_tracing_cpu(cpu) {
2799 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2800 		per_cpu(trace_buffered_event, cpu) = NULL;
2801 	}
2802 
2803 	/*
2804 	 * Wait for all CPUs that potentially started checking if they can use
2805 	 * their event buffer only after the previous synchronize_rcu() call and
2806 	 * they still read a valid pointer from trace_buffered_event. It must be
2807 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2808 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2809 	 */
2810 	synchronize_rcu();
2811 
2812 	/* For each CPU, relinquish the buffer */
2813 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2814 			 true);
2815 }
2816 
2817 static struct trace_buffer *temp_buffer;
2818 
2819 struct ring_buffer_event *
2820 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2821 			  struct trace_event_file *trace_file,
2822 			  int type, unsigned long len,
2823 			  unsigned int trace_ctx)
2824 {
2825 	struct ring_buffer_event *entry;
2826 	struct trace_array *tr = trace_file->tr;
2827 	int val;
2828 
2829 	*current_rb = tr->array_buffer.buffer;
2830 
2831 	if (!tr->no_filter_buffering_ref &&
2832 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2833 		preempt_disable_notrace();
2834 		/*
2835 		 * Filtering is on, so try to use the per cpu buffer first.
2836 		 * This buffer will simulate a ring_buffer_event,
2837 		 * where the type_len is zero and the array[0] will
2838 		 * hold the full length.
2839 		 * (see include/linux/ring-buffer.h for details on
2840 		 *  how the ring_buffer_event is structured).
2841 		 *
2842 		 * Using a temp buffer during filtering and copying it
2843 		 * on a matched filter is quicker than writing directly
2844 		 * into the ring buffer and then discarding it when
2845 		 * it doesn't match. That is because the discard
2846 		 * requires several atomic operations to get right.
2847 		 * Copying on match and doing nothing on a failed match
2848 		 * is still quicker than no copy on match, but having
2849 		 * to discard out of the ring buffer on a failed match.
2850 		 */
2851 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2852 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2853 
2854 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2855 
2856 			/*
2857 			 * Preemption is disabled, but interrupts and NMIs
2858 			 * can still come in now. If that happens after
2859 			 * the above increment, then it will have to go
2860 			 * back to the old method of allocating the event
2861 			 * on the ring buffer, and if the filter fails, it
2862 			 * will have to call ring_buffer_discard_commit()
2863 			 * to remove it.
2864 			 *
2865 			 * Need to also check the unlikely case that the
2866 			 * length is bigger than the temp buffer size.
2867 			 * If that happens, then the reserve is pretty much
2868 			 * guaranteed to fail, as the ring buffer currently
2869 			 * only allows events less than a page. But that may
2870 			 * change in the future, so let the ring buffer reserve
2871 			 * handle the failure in that case.
2872 			 */
2873 			if (val == 1 && likely(len <= max_len)) {
2874 				trace_event_setup(entry, type, trace_ctx);
2875 				entry->array[0] = len;
2876 				/* Return with preemption disabled */
2877 				return entry;
2878 			}
2879 			this_cpu_dec(trace_buffered_event_cnt);
2880 		}
2881 		/* __trace_buffer_lock_reserve() disables preemption */
2882 		preempt_enable_notrace();
2883 	}
2884 
2885 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2886 					    trace_ctx);
2887 	/*
2888 	 * If tracing is off, but we have triggers enabled
2889 	 * we still need to look at the event data. Use the temp_buffer
2890 	 * to store the trace event for the trigger to use. It's recursive
2891 	 * safe and will not be recorded anywhere.
2892 	 */
2893 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2894 		*current_rb = temp_buffer;
2895 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2896 						    trace_ctx);
2897 	}
2898 	return entry;
2899 }
2900 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2901 
2902 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2903 static DEFINE_MUTEX(tracepoint_printk_mutex);
2904 
2905 static void output_printk(struct trace_event_buffer *fbuffer)
2906 {
2907 	struct trace_event_call *event_call;
2908 	struct trace_event_file *file;
2909 	struct trace_event *event;
2910 	unsigned long flags;
2911 	struct trace_iterator *iter = tracepoint_print_iter;
2912 
2913 	/* We should never get here if iter is NULL */
2914 	if (WARN_ON_ONCE(!iter))
2915 		return;
2916 
2917 	event_call = fbuffer->trace_file->event_call;
2918 	if (!event_call || !event_call->event.funcs ||
2919 	    !event_call->event.funcs->trace)
2920 		return;
2921 
2922 	file = fbuffer->trace_file;
2923 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2924 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2925 	     !filter_match_preds(file->filter, fbuffer->entry)))
2926 		return;
2927 
2928 	event = &fbuffer->trace_file->event_call->event;
2929 
2930 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2931 	trace_seq_init(&iter->seq);
2932 	iter->ent = fbuffer->entry;
2933 	event_call->event.funcs->trace(iter, 0, event);
2934 	trace_seq_putc(&iter->seq, 0);
2935 	printk("%s", iter->seq.buffer);
2936 
2937 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2938 }
2939 
2940 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2941 			     void *buffer, size_t *lenp,
2942 			     loff_t *ppos)
2943 {
2944 	int save_tracepoint_printk;
2945 	int ret;
2946 
2947 	mutex_lock(&tracepoint_printk_mutex);
2948 	save_tracepoint_printk = tracepoint_printk;
2949 
2950 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2951 
2952 	/*
2953 	 * This will force exiting early, as tracepoint_printk
2954 	 * is always zero when tracepoint_printk_iter is not allocated
2955 	 */
2956 	if (!tracepoint_print_iter)
2957 		tracepoint_printk = 0;
2958 
2959 	if (save_tracepoint_printk == tracepoint_printk)
2960 		goto out;
2961 
2962 	if (tracepoint_printk)
2963 		static_key_enable(&tracepoint_printk_key.key);
2964 	else
2965 		static_key_disable(&tracepoint_printk_key.key);
2966 
2967  out:
2968 	mutex_unlock(&tracepoint_printk_mutex);
2969 
2970 	return ret;
2971 }
2972 
2973 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2974 {
2975 	enum event_trigger_type tt = ETT_NONE;
2976 	struct trace_event_file *file = fbuffer->trace_file;
2977 
2978 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2979 			fbuffer->entry, &tt))
2980 		goto discard;
2981 
2982 	if (static_key_false(&tracepoint_printk_key.key))
2983 		output_printk(fbuffer);
2984 
2985 	if (static_branch_unlikely(&trace_event_exports_enabled))
2986 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2987 
2988 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2989 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2990 
2991 discard:
2992 	if (tt)
2993 		event_triggers_post_call(file, tt);
2994 
2995 }
2996 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2997 
2998 /*
2999  * Skip 3:
3000  *
3001  *   trace_buffer_unlock_commit_regs()
3002  *   trace_event_buffer_commit()
3003  *   trace_event_raw_event_xxx()
3004  */
3005 # define STACK_SKIP 3
3006 
3007 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3008 				     struct trace_buffer *buffer,
3009 				     struct ring_buffer_event *event,
3010 				     unsigned int trace_ctx,
3011 				     struct pt_regs *regs)
3012 {
3013 	__buffer_unlock_commit(buffer, event);
3014 
3015 	/*
3016 	 * If regs is not set, then skip the necessary functions.
3017 	 * Note, we can still get here via blktrace, wakeup tracer
3018 	 * and mmiotrace, but that's ok if they lose a function or
3019 	 * two. They are not that meaningful.
3020 	 */
3021 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3022 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3023 }
3024 
3025 /*
3026  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3027  */
3028 void
3029 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3030 				   struct ring_buffer_event *event)
3031 {
3032 	__buffer_unlock_commit(buffer, event);
3033 }
3034 
3035 void
3036 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3037 	       parent_ip, unsigned int trace_ctx)
3038 {
3039 	struct trace_event_call *call = &event_function;
3040 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3041 	struct ring_buffer_event *event;
3042 	struct ftrace_entry *entry;
3043 
3044 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3045 					    trace_ctx);
3046 	if (!event)
3047 		return;
3048 	entry	= ring_buffer_event_data(event);
3049 	entry->ip			= ip;
3050 	entry->parent_ip		= parent_ip;
3051 
3052 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3053 		if (static_branch_unlikely(&trace_function_exports_enabled))
3054 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3055 		__buffer_unlock_commit(buffer, event);
3056 	}
3057 }
3058 
3059 #ifdef CONFIG_STACKTRACE
3060 
3061 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3062 #define FTRACE_KSTACK_NESTING	4
3063 
3064 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3065 
3066 struct ftrace_stack {
3067 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3068 };
3069 
3070 
3071 struct ftrace_stacks {
3072 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3073 };
3074 
3075 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3076 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3077 
3078 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3079 				 unsigned int trace_ctx,
3080 				 int skip, struct pt_regs *regs)
3081 {
3082 	struct trace_event_call *call = &event_kernel_stack;
3083 	struct ring_buffer_event *event;
3084 	unsigned int size, nr_entries;
3085 	struct ftrace_stack *fstack;
3086 	struct stack_entry *entry;
3087 	int stackidx;
3088 
3089 	/*
3090 	 * Add one, for this function and the call to save_stack_trace()
3091 	 * If regs is set, then these functions will not be in the way.
3092 	 */
3093 #ifndef CONFIG_UNWINDER_ORC
3094 	if (!regs)
3095 		skip++;
3096 #endif
3097 
3098 	preempt_disable_notrace();
3099 
3100 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3101 
3102 	/* This should never happen. If it does, yell once and skip */
3103 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3104 		goto out;
3105 
3106 	/*
3107 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3108 	 * interrupt will either see the value pre increment or post
3109 	 * increment. If the interrupt happens pre increment it will have
3110 	 * restored the counter when it returns.  We just need a barrier to
3111 	 * keep gcc from moving things around.
3112 	 */
3113 	barrier();
3114 
3115 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3116 	size = ARRAY_SIZE(fstack->calls);
3117 
3118 	if (regs) {
3119 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3120 						   size, skip);
3121 	} else {
3122 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3123 	}
3124 
3125 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3126 				    struct_size(entry, caller, nr_entries),
3127 				    trace_ctx);
3128 	if (!event)
3129 		goto out;
3130 	entry = ring_buffer_event_data(event);
3131 
3132 	entry->size = nr_entries;
3133 	memcpy(&entry->caller, fstack->calls,
3134 	       flex_array_size(entry, caller, nr_entries));
3135 
3136 	if (!call_filter_check_discard(call, entry, buffer, event))
3137 		__buffer_unlock_commit(buffer, event);
3138 
3139  out:
3140 	/* Again, don't let gcc optimize things here */
3141 	barrier();
3142 	__this_cpu_dec(ftrace_stack_reserve);
3143 	preempt_enable_notrace();
3144 
3145 }
3146 
3147 static inline void ftrace_trace_stack(struct trace_array *tr,
3148 				      struct trace_buffer *buffer,
3149 				      unsigned int trace_ctx,
3150 				      int skip, struct pt_regs *regs)
3151 {
3152 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3153 		return;
3154 
3155 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3156 }
3157 
3158 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3159 		   int skip)
3160 {
3161 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3162 
3163 	if (rcu_is_watching()) {
3164 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3165 		return;
3166 	}
3167 
3168 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3169 		return;
3170 
3171 	/*
3172 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3173 	 * but if the above rcu_is_watching() failed, then the NMI
3174 	 * triggered someplace critical, and ct_irq_enter() should
3175 	 * not be called from NMI.
3176 	 */
3177 	if (unlikely(in_nmi()))
3178 		return;
3179 
3180 	ct_irq_enter_irqson();
3181 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3182 	ct_irq_exit_irqson();
3183 }
3184 
3185 /**
3186  * trace_dump_stack - record a stack back trace in the trace buffer
3187  * @skip: Number of functions to skip (helper handlers)
3188  */
3189 void trace_dump_stack(int skip)
3190 {
3191 	if (tracing_disabled || tracing_selftest_running)
3192 		return;
3193 
3194 #ifndef CONFIG_UNWINDER_ORC
3195 	/* Skip 1 to skip this function. */
3196 	skip++;
3197 #endif
3198 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3199 			     tracing_gen_ctx(), skip, NULL);
3200 }
3201 EXPORT_SYMBOL_GPL(trace_dump_stack);
3202 
3203 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3204 static DEFINE_PER_CPU(int, user_stack_count);
3205 
3206 static void
3207 ftrace_trace_userstack(struct trace_array *tr,
3208 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3209 {
3210 	struct trace_event_call *call = &event_user_stack;
3211 	struct ring_buffer_event *event;
3212 	struct userstack_entry *entry;
3213 
3214 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3215 		return;
3216 
3217 	/*
3218 	 * NMIs can not handle page faults, even with fix ups.
3219 	 * The save user stack can (and often does) fault.
3220 	 */
3221 	if (unlikely(in_nmi()))
3222 		return;
3223 
3224 	/*
3225 	 * prevent recursion, since the user stack tracing may
3226 	 * trigger other kernel events.
3227 	 */
3228 	preempt_disable();
3229 	if (__this_cpu_read(user_stack_count))
3230 		goto out;
3231 
3232 	__this_cpu_inc(user_stack_count);
3233 
3234 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3235 					    sizeof(*entry), trace_ctx);
3236 	if (!event)
3237 		goto out_drop_count;
3238 	entry	= ring_buffer_event_data(event);
3239 
3240 	entry->tgid		= current->tgid;
3241 	memset(&entry->caller, 0, sizeof(entry->caller));
3242 
3243 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3244 	if (!call_filter_check_discard(call, entry, buffer, event))
3245 		__buffer_unlock_commit(buffer, event);
3246 
3247  out_drop_count:
3248 	__this_cpu_dec(user_stack_count);
3249  out:
3250 	preempt_enable();
3251 }
3252 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3253 static void ftrace_trace_userstack(struct trace_array *tr,
3254 				   struct trace_buffer *buffer,
3255 				   unsigned int trace_ctx)
3256 {
3257 }
3258 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3259 
3260 #endif /* CONFIG_STACKTRACE */
3261 
3262 static inline void
3263 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3264 			  unsigned long long delta)
3265 {
3266 	entry->bottom_delta_ts = delta & U32_MAX;
3267 	entry->top_delta_ts = (delta >> 32);
3268 }
3269 
3270 void trace_last_func_repeats(struct trace_array *tr,
3271 			     struct trace_func_repeats *last_info,
3272 			     unsigned int trace_ctx)
3273 {
3274 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3275 	struct func_repeats_entry *entry;
3276 	struct ring_buffer_event *event;
3277 	u64 delta;
3278 
3279 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3280 					    sizeof(*entry), trace_ctx);
3281 	if (!event)
3282 		return;
3283 
3284 	delta = ring_buffer_event_time_stamp(buffer, event) -
3285 		last_info->ts_last_call;
3286 
3287 	entry = ring_buffer_event_data(event);
3288 	entry->ip = last_info->ip;
3289 	entry->parent_ip = last_info->parent_ip;
3290 	entry->count = last_info->count;
3291 	func_repeats_set_delta_ts(entry, delta);
3292 
3293 	__buffer_unlock_commit(buffer, event);
3294 }
3295 
3296 /* created for use with alloc_percpu */
3297 struct trace_buffer_struct {
3298 	int nesting;
3299 	char buffer[4][TRACE_BUF_SIZE];
3300 };
3301 
3302 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3303 
3304 /*
3305  * This allows for lockless recording.  If we're nested too deeply, then
3306  * this returns NULL.
3307  */
3308 static char *get_trace_buf(void)
3309 {
3310 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3311 
3312 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3313 		return NULL;
3314 
3315 	buffer->nesting++;
3316 
3317 	/* Interrupts must see nesting incremented before we use the buffer */
3318 	barrier();
3319 	return &buffer->buffer[buffer->nesting - 1][0];
3320 }
3321 
3322 static void put_trace_buf(void)
3323 {
3324 	/* Don't let the decrement of nesting leak before this */
3325 	barrier();
3326 	this_cpu_dec(trace_percpu_buffer->nesting);
3327 }
3328 
3329 static int alloc_percpu_trace_buffer(void)
3330 {
3331 	struct trace_buffer_struct __percpu *buffers;
3332 
3333 	if (trace_percpu_buffer)
3334 		return 0;
3335 
3336 	buffers = alloc_percpu(struct trace_buffer_struct);
3337 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3338 		return -ENOMEM;
3339 
3340 	trace_percpu_buffer = buffers;
3341 	return 0;
3342 }
3343 
3344 static int buffers_allocated;
3345 
3346 void trace_printk_init_buffers(void)
3347 {
3348 	if (buffers_allocated)
3349 		return;
3350 
3351 	if (alloc_percpu_trace_buffer())
3352 		return;
3353 
3354 	/* trace_printk() is for debug use only. Don't use it in production. */
3355 
3356 	pr_warn("\n");
3357 	pr_warn("**********************************************************\n");
3358 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3359 	pr_warn("**                                                      **\n");
3360 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3361 	pr_warn("**                                                      **\n");
3362 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3363 	pr_warn("** unsafe for production use.                           **\n");
3364 	pr_warn("**                                                      **\n");
3365 	pr_warn("** If you see this message and you are not debugging    **\n");
3366 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3367 	pr_warn("**                                                      **\n");
3368 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3369 	pr_warn("**********************************************************\n");
3370 
3371 	/* Expand the buffers to set size */
3372 	tracing_update_buffers(&global_trace);
3373 
3374 	buffers_allocated = 1;
3375 
3376 	/*
3377 	 * trace_printk_init_buffers() can be called by modules.
3378 	 * If that happens, then we need to start cmdline recording
3379 	 * directly here. If the global_trace.buffer is already
3380 	 * allocated here, then this was called by module code.
3381 	 */
3382 	if (global_trace.array_buffer.buffer)
3383 		tracing_start_cmdline_record();
3384 }
3385 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3386 
3387 void trace_printk_start_comm(void)
3388 {
3389 	/* Start tracing comms if trace printk is set */
3390 	if (!buffers_allocated)
3391 		return;
3392 	tracing_start_cmdline_record();
3393 }
3394 
3395 static void trace_printk_start_stop_comm(int enabled)
3396 {
3397 	if (!buffers_allocated)
3398 		return;
3399 
3400 	if (enabled)
3401 		tracing_start_cmdline_record();
3402 	else
3403 		tracing_stop_cmdline_record();
3404 }
3405 
3406 /**
3407  * trace_vbprintk - write binary msg to tracing buffer
3408  * @ip:    The address of the caller
3409  * @fmt:   The string format to write to the buffer
3410  * @args:  Arguments for @fmt
3411  */
3412 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3413 {
3414 	struct trace_event_call *call = &event_bprint;
3415 	struct ring_buffer_event *event;
3416 	struct trace_buffer *buffer;
3417 	struct trace_array *tr = &global_trace;
3418 	struct bprint_entry *entry;
3419 	unsigned int trace_ctx;
3420 	char *tbuffer;
3421 	int len = 0, size;
3422 
3423 	if (unlikely(tracing_selftest_running || tracing_disabled))
3424 		return 0;
3425 
3426 	/* Don't pollute graph traces with trace_vprintk internals */
3427 	pause_graph_tracing();
3428 
3429 	trace_ctx = tracing_gen_ctx();
3430 	preempt_disable_notrace();
3431 
3432 	tbuffer = get_trace_buf();
3433 	if (!tbuffer) {
3434 		len = 0;
3435 		goto out_nobuffer;
3436 	}
3437 
3438 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3439 
3440 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3441 		goto out_put;
3442 
3443 	size = sizeof(*entry) + sizeof(u32) * len;
3444 	buffer = tr->array_buffer.buffer;
3445 	ring_buffer_nest_start(buffer);
3446 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3447 					    trace_ctx);
3448 	if (!event)
3449 		goto out;
3450 	entry = ring_buffer_event_data(event);
3451 	entry->ip			= ip;
3452 	entry->fmt			= fmt;
3453 
3454 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3455 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3456 		__buffer_unlock_commit(buffer, event);
3457 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3458 	}
3459 
3460 out:
3461 	ring_buffer_nest_end(buffer);
3462 out_put:
3463 	put_trace_buf();
3464 
3465 out_nobuffer:
3466 	preempt_enable_notrace();
3467 	unpause_graph_tracing();
3468 
3469 	return len;
3470 }
3471 EXPORT_SYMBOL_GPL(trace_vbprintk);
3472 
3473 __printf(3, 0)
3474 static int
3475 __trace_array_vprintk(struct trace_buffer *buffer,
3476 		      unsigned long ip, const char *fmt, va_list args)
3477 {
3478 	struct trace_event_call *call = &event_print;
3479 	struct ring_buffer_event *event;
3480 	int len = 0, size;
3481 	struct print_entry *entry;
3482 	unsigned int trace_ctx;
3483 	char *tbuffer;
3484 
3485 	if (tracing_disabled)
3486 		return 0;
3487 
3488 	/* Don't pollute graph traces with trace_vprintk internals */
3489 	pause_graph_tracing();
3490 
3491 	trace_ctx = tracing_gen_ctx();
3492 	preempt_disable_notrace();
3493 
3494 
3495 	tbuffer = get_trace_buf();
3496 	if (!tbuffer) {
3497 		len = 0;
3498 		goto out_nobuffer;
3499 	}
3500 
3501 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3502 
3503 	size = sizeof(*entry) + len + 1;
3504 	ring_buffer_nest_start(buffer);
3505 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3506 					    trace_ctx);
3507 	if (!event)
3508 		goto out;
3509 	entry = ring_buffer_event_data(event);
3510 	entry->ip = ip;
3511 
3512 	memcpy(&entry->buf, tbuffer, len + 1);
3513 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3514 		__buffer_unlock_commit(buffer, event);
3515 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3516 	}
3517 
3518 out:
3519 	ring_buffer_nest_end(buffer);
3520 	put_trace_buf();
3521 
3522 out_nobuffer:
3523 	preempt_enable_notrace();
3524 	unpause_graph_tracing();
3525 
3526 	return len;
3527 }
3528 
3529 __printf(3, 0)
3530 int trace_array_vprintk(struct trace_array *tr,
3531 			unsigned long ip, const char *fmt, va_list args)
3532 {
3533 	if (tracing_selftest_running && tr == &global_trace)
3534 		return 0;
3535 
3536 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3537 }
3538 
3539 /**
3540  * trace_array_printk - Print a message to a specific instance
3541  * @tr: The instance trace_array descriptor
3542  * @ip: The instruction pointer that this is called from.
3543  * @fmt: The format to print (printf format)
3544  *
3545  * If a subsystem sets up its own instance, they have the right to
3546  * printk strings into their tracing instance buffer using this
3547  * function. Note, this function will not write into the top level
3548  * buffer (use trace_printk() for that), as writing into the top level
3549  * buffer should only have events that can be individually disabled.
3550  * trace_printk() is only used for debugging a kernel, and should not
3551  * be ever incorporated in normal use.
3552  *
3553  * trace_array_printk() can be used, as it will not add noise to the
3554  * top level tracing buffer.
3555  *
3556  * Note, trace_array_init_printk() must be called on @tr before this
3557  * can be used.
3558  */
3559 __printf(3, 0)
3560 int trace_array_printk(struct trace_array *tr,
3561 		       unsigned long ip, const char *fmt, ...)
3562 {
3563 	int ret;
3564 	va_list ap;
3565 
3566 	if (!tr)
3567 		return -ENOENT;
3568 
3569 	/* This is only allowed for created instances */
3570 	if (tr == &global_trace)
3571 		return 0;
3572 
3573 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3574 		return 0;
3575 
3576 	va_start(ap, fmt);
3577 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3578 	va_end(ap);
3579 	return ret;
3580 }
3581 EXPORT_SYMBOL_GPL(trace_array_printk);
3582 
3583 /**
3584  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3585  * @tr: The trace array to initialize the buffers for
3586  *
3587  * As trace_array_printk() only writes into instances, they are OK to
3588  * have in the kernel (unlike trace_printk()). This needs to be called
3589  * before trace_array_printk() can be used on a trace_array.
3590  */
3591 int trace_array_init_printk(struct trace_array *tr)
3592 {
3593 	if (!tr)
3594 		return -ENOENT;
3595 
3596 	/* This is only allowed for created instances */
3597 	if (tr == &global_trace)
3598 		return -EINVAL;
3599 
3600 	return alloc_percpu_trace_buffer();
3601 }
3602 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3603 
3604 __printf(3, 4)
3605 int trace_array_printk_buf(struct trace_buffer *buffer,
3606 			   unsigned long ip, const char *fmt, ...)
3607 {
3608 	int ret;
3609 	va_list ap;
3610 
3611 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3612 		return 0;
3613 
3614 	va_start(ap, fmt);
3615 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3616 	va_end(ap);
3617 	return ret;
3618 }
3619 
3620 __printf(2, 0)
3621 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3622 {
3623 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3624 }
3625 EXPORT_SYMBOL_GPL(trace_vprintk);
3626 
3627 static void trace_iterator_increment(struct trace_iterator *iter)
3628 {
3629 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3630 
3631 	iter->idx++;
3632 	if (buf_iter)
3633 		ring_buffer_iter_advance(buf_iter);
3634 }
3635 
3636 static struct trace_entry *
3637 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3638 		unsigned long *lost_events)
3639 {
3640 	struct ring_buffer_event *event;
3641 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3642 
3643 	if (buf_iter) {
3644 		event = ring_buffer_iter_peek(buf_iter, ts);
3645 		if (lost_events)
3646 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3647 				(unsigned long)-1 : 0;
3648 	} else {
3649 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3650 					 lost_events);
3651 	}
3652 
3653 	if (event) {
3654 		iter->ent_size = ring_buffer_event_length(event);
3655 		return ring_buffer_event_data(event);
3656 	}
3657 	iter->ent_size = 0;
3658 	return NULL;
3659 }
3660 
3661 static struct trace_entry *
3662 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3663 		  unsigned long *missing_events, u64 *ent_ts)
3664 {
3665 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3666 	struct trace_entry *ent, *next = NULL;
3667 	unsigned long lost_events = 0, next_lost = 0;
3668 	int cpu_file = iter->cpu_file;
3669 	u64 next_ts = 0, ts;
3670 	int next_cpu = -1;
3671 	int next_size = 0;
3672 	int cpu;
3673 
3674 	/*
3675 	 * If we are in a per_cpu trace file, don't bother by iterating over
3676 	 * all cpu and peek directly.
3677 	 */
3678 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3679 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3680 			return NULL;
3681 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3682 		if (ent_cpu)
3683 			*ent_cpu = cpu_file;
3684 
3685 		return ent;
3686 	}
3687 
3688 	for_each_tracing_cpu(cpu) {
3689 
3690 		if (ring_buffer_empty_cpu(buffer, cpu))
3691 			continue;
3692 
3693 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3694 
3695 		/*
3696 		 * Pick the entry with the smallest timestamp:
3697 		 */
3698 		if (ent && (!next || ts < next_ts)) {
3699 			next = ent;
3700 			next_cpu = cpu;
3701 			next_ts = ts;
3702 			next_lost = lost_events;
3703 			next_size = iter->ent_size;
3704 		}
3705 	}
3706 
3707 	iter->ent_size = next_size;
3708 
3709 	if (ent_cpu)
3710 		*ent_cpu = next_cpu;
3711 
3712 	if (ent_ts)
3713 		*ent_ts = next_ts;
3714 
3715 	if (missing_events)
3716 		*missing_events = next_lost;
3717 
3718 	return next;
3719 }
3720 
3721 #define STATIC_FMT_BUF_SIZE	128
3722 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3723 
3724 char *trace_iter_expand_format(struct trace_iterator *iter)
3725 {
3726 	char *tmp;
3727 
3728 	/*
3729 	 * iter->tr is NULL when used with tp_printk, which makes
3730 	 * this get called where it is not safe to call krealloc().
3731 	 */
3732 	if (!iter->tr || iter->fmt == static_fmt_buf)
3733 		return NULL;
3734 
3735 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3736 		       GFP_KERNEL);
3737 	if (tmp) {
3738 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3739 		iter->fmt = tmp;
3740 	}
3741 
3742 	return tmp;
3743 }
3744 
3745 /* Returns true if the string is safe to dereference from an event */
3746 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3747 			   bool star, int len)
3748 {
3749 	unsigned long addr = (unsigned long)str;
3750 	struct trace_event *trace_event;
3751 	struct trace_event_call *event;
3752 
3753 	/* Ignore strings with no length */
3754 	if (star && !len)
3755 		return true;
3756 
3757 	/* OK if part of the event data */
3758 	if ((addr >= (unsigned long)iter->ent) &&
3759 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3760 		return true;
3761 
3762 	/* OK if part of the temp seq buffer */
3763 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3764 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3765 		return true;
3766 
3767 	/* Core rodata can not be freed */
3768 	if (is_kernel_rodata(addr))
3769 		return true;
3770 
3771 	if (trace_is_tracepoint_string(str))
3772 		return true;
3773 
3774 	/*
3775 	 * Now this could be a module event, referencing core module
3776 	 * data, which is OK.
3777 	 */
3778 	if (!iter->ent)
3779 		return false;
3780 
3781 	trace_event = ftrace_find_event(iter->ent->type);
3782 	if (!trace_event)
3783 		return false;
3784 
3785 	event = container_of(trace_event, struct trace_event_call, event);
3786 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3787 		return false;
3788 
3789 	/* Would rather have rodata, but this will suffice */
3790 	if (within_module_core(addr, event->module))
3791 		return true;
3792 
3793 	return false;
3794 }
3795 
3796 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3797 
3798 static int test_can_verify_check(const char *fmt, ...)
3799 {
3800 	char buf[16];
3801 	va_list ap;
3802 	int ret;
3803 
3804 	/*
3805 	 * The verifier is dependent on vsnprintf() modifies the va_list
3806 	 * passed to it, where it is sent as a reference. Some architectures
3807 	 * (like x86_32) passes it by value, which means that vsnprintf()
3808 	 * does not modify the va_list passed to it, and the verifier
3809 	 * would then need to be able to understand all the values that
3810 	 * vsnprintf can use. If it is passed by value, then the verifier
3811 	 * is disabled.
3812 	 */
3813 	va_start(ap, fmt);
3814 	vsnprintf(buf, 16, "%d", ap);
3815 	ret = va_arg(ap, int);
3816 	va_end(ap);
3817 
3818 	return ret;
3819 }
3820 
3821 static void test_can_verify(void)
3822 {
3823 	if (!test_can_verify_check("%d %d", 0, 1)) {
3824 		pr_info("trace event string verifier disabled\n");
3825 		static_branch_inc(&trace_no_verify);
3826 	}
3827 }
3828 
3829 /**
3830  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3831  * @iter: The iterator that holds the seq buffer and the event being printed
3832  * @fmt: The format used to print the event
3833  * @ap: The va_list holding the data to print from @fmt.
3834  *
3835  * This writes the data into the @iter->seq buffer using the data from
3836  * @fmt and @ap. If the format has a %s, then the source of the string
3837  * is examined to make sure it is safe to print, otherwise it will
3838  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3839  * pointer.
3840  */
3841 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3842 			 va_list ap)
3843 {
3844 	const char *p = fmt;
3845 	const char *str;
3846 	int i, j;
3847 
3848 	if (WARN_ON_ONCE(!fmt))
3849 		return;
3850 
3851 	if (static_branch_unlikely(&trace_no_verify))
3852 		goto print;
3853 
3854 	/* Don't bother checking when doing a ftrace_dump() */
3855 	if (iter->fmt == static_fmt_buf)
3856 		goto print;
3857 
3858 	while (*p) {
3859 		bool star = false;
3860 		int len = 0;
3861 
3862 		j = 0;
3863 
3864 		/* We only care about %s and variants */
3865 		for (i = 0; p[i]; i++) {
3866 			if (i + 1 >= iter->fmt_size) {
3867 				/*
3868 				 * If we can't expand the copy buffer,
3869 				 * just print it.
3870 				 */
3871 				if (!trace_iter_expand_format(iter))
3872 					goto print;
3873 			}
3874 
3875 			if (p[i] == '\\' && p[i+1]) {
3876 				i++;
3877 				continue;
3878 			}
3879 			if (p[i] == '%') {
3880 				/* Need to test cases like %08.*s */
3881 				for (j = 1; p[i+j]; j++) {
3882 					if (isdigit(p[i+j]) ||
3883 					    p[i+j] == '.')
3884 						continue;
3885 					if (p[i+j] == '*') {
3886 						star = true;
3887 						continue;
3888 					}
3889 					break;
3890 				}
3891 				if (p[i+j] == 's')
3892 					break;
3893 				star = false;
3894 			}
3895 			j = 0;
3896 		}
3897 		/* If no %s found then just print normally */
3898 		if (!p[i])
3899 			break;
3900 
3901 		/* Copy up to the %s, and print that */
3902 		strncpy(iter->fmt, p, i);
3903 		iter->fmt[i] = '\0';
3904 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3905 
3906 		/*
3907 		 * If iter->seq is full, the above call no longer guarantees
3908 		 * that ap is in sync with fmt processing, and further calls
3909 		 * to va_arg() can return wrong positional arguments.
3910 		 *
3911 		 * Ensure that ap is no longer used in this case.
3912 		 */
3913 		if (iter->seq.full) {
3914 			p = "";
3915 			break;
3916 		}
3917 
3918 		if (star)
3919 			len = va_arg(ap, int);
3920 
3921 		/* The ap now points to the string data of the %s */
3922 		str = va_arg(ap, const char *);
3923 
3924 		/*
3925 		 * If you hit this warning, it is likely that the
3926 		 * trace event in question used %s on a string that
3927 		 * was saved at the time of the event, but may not be
3928 		 * around when the trace is read. Use __string(),
3929 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3930 		 * instead. See samples/trace_events/trace-events-sample.h
3931 		 * for reference.
3932 		 */
3933 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3934 			      "fmt: '%s' current_buffer: '%s'",
3935 			      fmt, seq_buf_str(&iter->seq.seq))) {
3936 			int ret;
3937 
3938 			/* Try to safely read the string */
3939 			if (star) {
3940 				if (len + 1 > iter->fmt_size)
3941 					len = iter->fmt_size - 1;
3942 				if (len < 0)
3943 					len = 0;
3944 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3945 				iter->fmt[len] = 0;
3946 				star = false;
3947 			} else {
3948 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3949 								  iter->fmt_size);
3950 			}
3951 			if (ret < 0)
3952 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3953 			else
3954 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3955 						 str, iter->fmt);
3956 			str = "[UNSAFE-MEMORY]";
3957 			strcpy(iter->fmt, "%s");
3958 		} else {
3959 			strncpy(iter->fmt, p + i, j + 1);
3960 			iter->fmt[j+1] = '\0';
3961 		}
3962 		if (star)
3963 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3964 		else
3965 			trace_seq_printf(&iter->seq, iter->fmt, str);
3966 
3967 		p += i + j + 1;
3968 	}
3969  print:
3970 	if (*p)
3971 		trace_seq_vprintf(&iter->seq, p, ap);
3972 }
3973 
3974 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3975 {
3976 	const char *p, *new_fmt;
3977 	char *q;
3978 
3979 	if (WARN_ON_ONCE(!fmt))
3980 		return fmt;
3981 
3982 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3983 		return fmt;
3984 
3985 	p = fmt;
3986 	new_fmt = q = iter->fmt;
3987 	while (*p) {
3988 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3989 			if (!trace_iter_expand_format(iter))
3990 				return fmt;
3991 
3992 			q += iter->fmt - new_fmt;
3993 			new_fmt = iter->fmt;
3994 		}
3995 
3996 		*q++ = *p++;
3997 
3998 		/* Replace %p with %px */
3999 		if (p[-1] == '%') {
4000 			if (p[0] == '%') {
4001 				*q++ = *p++;
4002 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4003 				*q++ = *p++;
4004 				*q++ = 'x';
4005 			}
4006 		}
4007 	}
4008 	*q = '\0';
4009 
4010 	return new_fmt;
4011 }
4012 
4013 #define STATIC_TEMP_BUF_SIZE	128
4014 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4015 
4016 /* Find the next real entry, without updating the iterator itself */
4017 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4018 					  int *ent_cpu, u64 *ent_ts)
4019 {
4020 	/* __find_next_entry will reset ent_size */
4021 	int ent_size = iter->ent_size;
4022 	struct trace_entry *entry;
4023 
4024 	/*
4025 	 * If called from ftrace_dump(), then the iter->temp buffer
4026 	 * will be the static_temp_buf and not created from kmalloc.
4027 	 * If the entry size is greater than the buffer, we can
4028 	 * not save it. Just return NULL in that case. This is only
4029 	 * used to add markers when two consecutive events' time
4030 	 * stamps have a large delta. See trace_print_lat_context()
4031 	 */
4032 	if (iter->temp == static_temp_buf &&
4033 	    STATIC_TEMP_BUF_SIZE < ent_size)
4034 		return NULL;
4035 
4036 	/*
4037 	 * The __find_next_entry() may call peek_next_entry(), which may
4038 	 * call ring_buffer_peek() that may make the contents of iter->ent
4039 	 * undefined. Need to copy iter->ent now.
4040 	 */
4041 	if (iter->ent && iter->ent != iter->temp) {
4042 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4043 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4044 			void *temp;
4045 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4046 			if (!temp)
4047 				return NULL;
4048 			kfree(iter->temp);
4049 			iter->temp = temp;
4050 			iter->temp_size = iter->ent_size;
4051 		}
4052 		memcpy(iter->temp, iter->ent, iter->ent_size);
4053 		iter->ent = iter->temp;
4054 	}
4055 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4056 	/* Put back the original ent_size */
4057 	iter->ent_size = ent_size;
4058 
4059 	return entry;
4060 }
4061 
4062 /* Find the next real entry, and increment the iterator to the next entry */
4063 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4064 {
4065 	iter->ent = __find_next_entry(iter, &iter->cpu,
4066 				      &iter->lost_events, &iter->ts);
4067 
4068 	if (iter->ent)
4069 		trace_iterator_increment(iter);
4070 
4071 	return iter->ent ? iter : NULL;
4072 }
4073 
4074 static void trace_consume(struct trace_iterator *iter)
4075 {
4076 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4077 			    &iter->lost_events);
4078 }
4079 
4080 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4081 {
4082 	struct trace_iterator *iter = m->private;
4083 	int i = (int)*pos;
4084 	void *ent;
4085 
4086 	WARN_ON_ONCE(iter->leftover);
4087 
4088 	(*pos)++;
4089 
4090 	/* can't go backwards */
4091 	if (iter->idx > i)
4092 		return NULL;
4093 
4094 	if (iter->idx < 0)
4095 		ent = trace_find_next_entry_inc(iter);
4096 	else
4097 		ent = iter;
4098 
4099 	while (ent && iter->idx < i)
4100 		ent = trace_find_next_entry_inc(iter);
4101 
4102 	iter->pos = *pos;
4103 
4104 	return ent;
4105 }
4106 
4107 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4108 {
4109 	struct ring_buffer_iter *buf_iter;
4110 	unsigned long entries = 0;
4111 	u64 ts;
4112 
4113 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4114 
4115 	buf_iter = trace_buffer_iter(iter, cpu);
4116 	if (!buf_iter)
4117 		return;
4118 
4119 	ring_buffer_iter_reset(buf_iter);
4120 
4121 	/*
4122 	 * We could have the case with the max latency tracers
4123 	 * that a reset never took place on a cpu. This is evident
4124 	 * by the timestamp being before the start of the buffer.
4125 	 */
4126 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4127 		if (ts >= iter->array_buffer->time_start)
4128 			break;
4129 		entries++;
4130 		ring_buffer_iter_advance(buf_iter);
4131 	}
4132 
4133 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4134 }
4135 
4136 /*
4137  * The current tracer is copied to avoid a global locking
4138  * all around.
4139  */
4140 static void *s_start(struct seq_file *m, loff_t *pos)
4141 {
4142 	struct trace_iterator *iter = m->private;
4143 	struct trace_array *tr = iter->tr;
4144 	int cpu_file = iter->cpu_file;
4145 	void *p = NULL;
4146 	loff_t l = 0;
4147 	int cpu;
4148 
4149 	mutex_lock(&trace_types_lock);
4150 	if (unlikely(tr->current_trace != iter->trace)) {
4151 		/* Close iter->trace before switching to the new current tracer */
4152 		if (iter->trace->close)
4153 			iter->trace->close(iter);
4154 		iter->trace = tr->current_trace;
4155 		/* Reopen the new current tracer */
4156 		if (iter->trace->open)
4157 			iter->trace->open(iter);
4158 	}
4159 	mutex_unlock(&trace_types_lock);
4160 
4161 #ifdef CONFIG_TRACER_MAX_TRACE
4162 	if (iter->snapshot && iter->trace->use_max_tr)
4163 		return ERR_PTR(-EBUSY);
4164 #endif
4165 
4166 	if (*pos != iter->pos) {
4167 		iter->ent = NULL;
4168 		iter->cpu = 0;
4169 		iter->idx = -1;
4170 
4171 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4172 			for_each_tracing_cpu(cpu)
4173 				tracing_iter_reset(iter, cpu);
4174 		} else
4175 			tracing_iter_reset(iter, cpu_file);
4176 
4177 		iter->leftover = 0;
4178 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4179 			;
4180 
4181 	} else {
4182 		/*
4183 		 * If we overflowed the seq_file before, then we want
4184 		 * to just reuse the trace_seq buffer again.
4185 		 */
4186 		if (iter->leftover)
4187 			p = iter;
4188 		else {
4189 			l = *pos - 1;
4190 			p = s_next(m, p, &l);
4191 		}
4192 	}
4193 
4194 	trace_event_read_lock();
4195 	trace_access_lock(cpu_file);
4196 	return p;
4197 }
4198 
4199 static void s_stop(struct seq_file *m, void *p)
4200 {
4201 	struct trace_iterator *iter = m->private;
4202 
4203 #ifdef CONFIG_TRACER_MAX_TRACE
4204 	if (iter->snapshot && iter->trace->use_max_tr)
4205 		return;
4206 #endif
4207 
4208 	trace_access_unlock(iter->cpu_file);
4209 	trace_event_read_unlock();
4210 }
4211 
4212 static void
4213 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4214 		      unsigned long *entries, int cpu)
4215 {
4216 	unsigned long count;
4217 
4218 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4219 	/*
4220 	 * If this buffer has skipped entries, then we hold all
4221 	 * entries for the trace and we need to ignore the
4222 	 * ones before the time stamp.
4223 	 */
4224 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4225 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4226 		/* total is the same as the entries */
4227 		*total = count;
4228 	} else
4229 		*total = count +
4230 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4231 	*entries = count;
4232 }
4233 
4234 static void
4235 get_total_entries(struct array_buffer *buf,
4236 		  unsigned long *total, unsigned long *entries)
4237 {
4238 	unsigned long t, e;
4239 	int cpu;
4240 
4241 	*total = 0;
4242 	*entries = 0;
4243 
4244 	for_each_tracing_cpu(cpu) {
4245 		get_total_entries_cpu(buf, &t, &e, cpu);
4246 		*total += t;
4247 		*entries += e;
4248 	}
4249 }
4250 
4251 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4252 {
4253 	unsigned long total, entries;
4254 
4255 	if (!tr)
4256 		tr = &global_trace;
4257 
4258 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4259 
4260 	return entries;
4261 }
4262 
4263 unsigned long trace_total_entries(struct trace_array *tr)
4264 {
4265 	unsigned long total, entries;
4266 
4267 	if (!tr)
4268 		tr = &global_trace;
4269 
4270 	get_total_entries(&tr->array_buffer, &total, &entries);
4271 
4272 	return entries;
4273 }
4274 
4275 static void print_lat_help_header(struct seq_file *m)
4276 {
4277 	seq_puts(m, "#                    _------=> CPU#            \n"
4278 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4279 		    "#                  | / _----=> need-resched    \n"
4280 		    "#                  || / _---=> hardirq/softirq \n"
4281 		    "#                  ||| / _--=> preempt-depth   \n"
4282 		    "#                  |||| / _-=> migrate-disable \n"
4283 		    "#                  ||||| /     delay           \n"
4284 		    "#  cmd     pid     |||||| time  |   caller     \n"
4285 		    "#     \\   /        ||||||  \\    |    /       \n");
4286 }
4287 
4288 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4289 {
4290 	unsigned long total;
4291 	unsigned long entries;
4292 
4293 	get_total_entries(buf, &total, &entries);
4294 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4295 		   entries, total, num_online_cpus());
4296 	seq_puts(m, "#\n");
4297 }
4298 
4299 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4300 				   unsigned int flags)
4301 {
4302 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4303 
4304 	print_event_info(buf, m);
4305 
4306 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4307 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4308 }
4309 
4310 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4311 				       unsigned int flags)
4312 {
4313 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4314 	static const char space[] = "            ";
4315 	int prec = tgid ? 12 : 2;
4316 
4317 	print_event_info(buf, m);
4318 
4319 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4320 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4321 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4322 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4323 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4324 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4325 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4326 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4327 }
4328 
4329 void
4330 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4331 {
4332 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4333 	struct array_buffer *buf = iter->array_buffer;
4334 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4335 	struct tracer *type = iter->trace;
4336 	unsigned long entries;
4337 	unsigned long total;
4338 	const char *name = type->name;
4339 
4340 	get_total_entries(buf, &total, &entries);
4341 
4342 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4343 		   name, UTS_RELEASE);
4344 	seq_puts(m, "# -----------------------------------"
4345 		 "---------------------------------\n");
4346 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4347 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4348 		   nsecs_to_usecs(data->saved_latency),
4349 		   entries,
4350 		   total,
4351 		   buf->cpu,
4352 		   preempt_model_none()      ? "server" :
4353 		   preempt_model_voluntary() ? "desktop" :
4354 		   preempt_model_full()      ? "preempt" :
4355 		   preempt_model_rt()        ? "preempt_rt" :
4356 		   "unknown",
4357 		   /* These are reserved for later use */
4358 		   0, 0, 0, 0);
4359 #ifdef CONFIG_SMP
4360 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4361 #else
4362 	seq_puts(m, ")\n");
4363 #endif
4364 	seq_puts(m, "#    -----------------\n");
4365 	seq_printf(m, "#    | task: %.16s-%d "
4366 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4367 		   data->comm, data->pid,
4368 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4369 		   data->policy, data->rt_priority);
4370 	seq_puts(m, "#    -----------------\n");
4371 
4372 	if (data->critical_start) {
4373 		seq_puts(m, "#  => started at: ");
4374 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4375 		trace_print_seq(m, &iter->seq);
4376 		seq_puts(m, "\n#  => ended at:   ");
4377 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4378 		trace_print_seq(m, &iter->seq);
4379 		seq_puts(m, "\n#\n");
4380 	}
4381 
4382 	seq_puts(m, "#\n");
4383 }
4384 
4385 static void test_cpu_buff_start(struct trace_iterator *iter)
4386 {
4387 	struct trace_seq *s = &iter->seq;
4388 	struct trace_array *tr = iter->tr;
4389 
4390 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4391 		return;
4392 
4393 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4394 		return;
4395 
4396 	if (cpumask_available(iter->started) &&
4397 	    cpumask_test_cpu(iter->cpu, iter->started))
4398 		return;
4399 
4400 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4401 		return;
4402 
4403 	if (cpumask_available(iter->started))
4404 		cpumask_set_cpu(iter->cpu, iter->started);
4405 
4406 	/* Don't print started cpu buffer for the first entry of the trace */
4407 	if (iter->idx > 1)
4408 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4409 				iter->cpu);
4410 }
4411 
4412 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4413 {
4414 	struct trace_array *tr = iter->tr;
4415 	struct trace_seq *s = &iter->seq;
4416 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4417 	struct trace_entry *entry;
4418 	struct trace_event *event;
4419 
4420 	entry = iter->ent;
4421 
4422 	test_cpu_buff_start(iter);
4423 
4424 	event = ftrace_find_event(entry->type);
4425 
4426 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4427 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4428 			trace_print_lat_context(iter);
4429 		else
4430 			trace_print_context(iter);
4431 	}
4432 
4433 	if (trace_seq_has_overflowed(s))
4434 		return TRACE_TYPE_PARTIAL_LINE;
4435 
4436 	if (event) {
4437 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4438 			return print_event_fields(iter, event);
4439 		return event->funcs->trace(iter, sym_flags, event);
4440 	}
4441 
4442 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4443 
4444 	return trace_handle_return(s);
4445 }
4446 
4447 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4448 {
4449 	struct trace_array *tr = iter->tr;
4450 	struct trace_seq *s = &iter->seq;
4451 	struct trace_entry *entry;
4452 	struct trace_event *event;
4453 
4454 	entry = iter->ent;
4455 
4456 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4457 		trace_seq_printf(s, "%d %d %llu ",
4458 				 entry->pid, iter->cpu, iter->ts);
4459 
4460 	if (trace_seq_has_overflowed(s))
4461 		return TRACE_TYPE_PARTIAL_LINE;
4462 
4463 	event = ftrace_find_event(entry->type);
4464 	if (event)
4465 		return event->funcs->raw(iter, 0, event);
4466 
4467 	trace_seq_printf(s, "%d ?\n", entry->type);
4468 
4469 	return trace_handle_return(s);
4470 }
4471 
4472 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4473 {
4474 	struct trace_array *tr = iter->tr;
4475 	struct trace_seq *s = &iter->seq;
4476 	unsigned char newline = '\n';
4477 	struct trace_entry *entry;
4478 	struct trace_event *event;
4479 
4480 	entry = iter->ent;
4481 
4482 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4483 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4484 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4485 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4486 		if (trace_seq_has_overflowed(s))
4487 			return TRACE_TYPE_PARTIAL_LINE;
4488 	}
4489 
4490 	event = ftrace_find_event(entry->type);
4491 	if (event) {
4492 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4493 		if (ret != TRACE_TYPE_HANDLED)
4494 			return ret;
4495 	}
4496 
4497 	SEQ_PUT_FIELD(s, newline);
4498 
4499 	return trace_handle_return(s);
4500 }
4501 
4502 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4503 {
4504 	struct trace_array *tr = iter->tr;
4505 	struct trace_seq *s = &iter->seq;
4506 	struct trace_entry *entry;
4507 	struct trace_event *event;
4508 
4509 	entry = iter->ent;
4510 
4511 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4512 		SEQ_PUT_FIELD(s, entry->pid);
4513 		SEQ_PUT_FIELD(s, iter->cpu);
4514 		SEQ_PUT_FIELD(s, iter->ts);
4515 		if (trace_seq_has_overflowed(s))
4516 			return TRACE_TYPE_PARTIAL_LINE;
4517 	}
4518 
4519 	event = ftrace_find_event(entry->type);
4520 	return event ? event->funcs->binary(iter, 0, event) :
4521 		TRACE_TYPE_HANDLED;
4522 }
4523 
4524 int trace_empty(struct trace_iterator *iter)
4525 {
4526 	struct ring_buffer_iter *buf_iter;
4527 	int cpu;
4528 
4529 	/* If we are looking at one CPU buffer, only check that one */
4530 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4531 		cpu = iter->cpu_file;
4532 		buf_iter = trace_buffer_iter(iter, cpu);
4533 		if (buf_iter) {
4534 			if (!ring_buffer_iter_empty(buf_iter))
4535 				return 0;
4536 		} else {
4537 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4538 				return 0;
4539 		}
4540 		return 1;
4541 	}
4542 
4543 	for_each_tracing_cpu(cpu) {
4544 		buf_iter = trace_buffer_iter(iter, cpu);
4545 		if (buf_iter) {
4546 			if (!ring_buffer_iter_empty(buf_iter))
4547 				return 0;
4548 		} else {
4549 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4550 				return 0;
4551 		}
4552 	}
4553 
4554 	return 1;
4555 }
4556 
4557 /*  Called with trace_event_read_lock() held. */
4558 enum print_line_t print_trace_line(struct trace_iterator *iter)
4559 {
4560 	struct trace_array *tr = iter->tr;
4561 	unsigned long trace_flags = tr->trace_flags;
4562 	enum print_line_t ret;
4563 
4564 	if (iter->lost_events) {
4565 		if (iter->lost_events == (unsigned long)-1)
4566 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4567 					 iter->cpu);
4568 		else
4569 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4570 					 iter->cpu, iter->lost_events);
4571 		if (trace_seq_has_overflowed(&iter->seq))
4572 			return TRACE_TYPE_PARTIAL_LINE;
4573 	}
4574 
4575 	if (iter->trace && iter->trace->print_line) {
4576 		ret = iter->trace->print_line(iter);
4577 		if (ret != TRACE_TYPE_UNHANDLED)
4578 			return ret;
4579 	}
4580 
4581 	if (iter->ent->type == TRACE_BPUTS &&
4582 			trace_flags & TRACE_ITER_PRINTK &&
4583 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4584 		return trace_print_bputs_msg_only(iter);
4585 
4586 	if (iter->ent->type == TRACE_BPRINT &&
4587 			trace_flags & TRACE_ITER_PRINTK &&
4588 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4589 		return trace_print_bprintk_msg_only(iter);
4590 
4591 	if (iter->ent->type == TRACE_PRINT &&
4592 			trace_flags & TRACE_ITER_PRINTK &&
4593 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4594 		return trace_print_printk_msg_only(iter);
4595 
4596 	if (trace_flags & TRACE_ITER_BIN)
4597 		return print_bin_fmt(iter);
4598 
4599 	if (trace_flags & TRACE_ITER_HEX)
4600 		return print_hex_fmt(iter);
4601 
4602 	if (trace_flags & TRACE_ITER_RAW)
4603 		return print_raw_fmt(iter);
4604 
4605 	return print_trace_fmt(iter);
4606 }
4607 
4608 void trace_latency_header(struct seq_file *m)
4609 {
4610 	struct trace_iterator *iter = m->private;
4611 	struct trace_array *tr = iter->tr;
4612 
4613 	/* print nothing if the buffers are empty */
4614 	if (trace_empty(iter))
4615 		return;
4616 
4617 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4618 		print_trace_header(m, iter);
4619 
4620 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4621 		print_lat_help_header(m);
4622 }
4623 
4624 void trace_default_header(struct seq_file *m)
4625 {
4626 	struct trace_iterator *iter = m->private;
4627 	struct trace_array *tr = iter->tr;
4628 	unsigned long trace_flags = tr->trace_flags;
4629 
4630 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4631 		return;
4632 
4633 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4634 		/* print nothing if the buffers are empty */
4635 		if (trace_empty(iter))
4636 			return;
4637 		print_trace_header(m, iter);
4638 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4639 			print_lat_help_header(m);
4640 	} else {
4641 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4642 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4643 				print_func_help_header_irq(iter->array_buffer,
4644 							   m, trace_flags);
4645 			else
4646 				print_func_help_header(iter->array_buffer, m,
4647 						       trace_flags);
4648 		}
4649 	}
4650 }
4651 
4652 static void test_ftrace_alive(struct seq_file *m)
4653 {
4654 	if (!ftrace_is_dead())
4655 		return;
4656 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4657 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4658 }
4659 
4660 #ifdef CONFIG_TRACER_MAX_TRACE
4661 static void show_snapshot_main_help(struct seq_file *m)
4662 {
4663 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4664 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4665 		    "#                      Takes a snapshot of the main buffer.\n"
4666 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4667 		    "#                      (Doesn't have to be '2' works with any number that\n"
4668 		    "#                       is not a '0' or '1')\n");
4669 }
4670 
4671 static void show_snapshot_percpu_help(struct seq_file *m)
4672 {
4673 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4674 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4675 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4676 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4677 #else
4678 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4679 		    "#                     Must use main snapshot file to allocate.\n");
4680 #endif
4681 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4682 		    "#                      (Doesn't have to be '2' works with any number that\n"
4683 		    "#                       is not a '0' or '1')\n");
4684 }
4685 
4686 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4687 {
4688 	if (iter->tr->allocated_snapshot)
4689 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4690 	else
4691 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4692 
4693 	seq_puts(m, "# Snapshot commands:\n");
4694 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4695 		show_snapshot_main_help(m);
4696 	else
4697 		show_snapshot_percpu_help(m);
4698 }
4699 #else
4700 /* Should never be called */
4701 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4702 #endif
4703 
4704 static int s_show(struct seq_file *m, void *v)
4705 {
4706 	struct trace_iterator *iter = v;
4707 	int ret;
4708 
4709 	if (iter->ent == NULL) {
4710 		if (iter->tr) {
4711 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4712 			seq_puts(m, "#\n");
4713 			test_ftrace_alive(m);
4714 		}
4715 		if (iter->snapshot && trace_empty(iter))
4716 			print_snapshot_help(m, iter);
4717 		else if (iter->trace && iter->trace->print_header)
4718 			iter->trace->print_header(m);
4719 		else
4720 			trace_default_header(m);
4721 
4722 	} else if (iter->leftover) {
4723 		/*
4724 		 * If we filled the seq_file buffer earlier, we
4725 		 * want to just show it now.
4726 		 */
4727 		ret = trace_print_seq(m, &iter->seq);
4728 
4729 		/* ret should this time be zero, but you never know */
4730 		iter->leftover = ret;
4731 
4732 	} else {
4733 		ret = print_trace_line(iter);
4734 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4735 			iter->seq.full = 0;
4736 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4737 		}
4738 		ret = trace_print_seq(m, &iter->seq);
4739 		/*
4740 		 * If we overflow the seq_file buffer, then it will
4741 		 * ask us for this data again at start up.
4742 		 * Use that instead.
4743 		 *  ret is 0 if seq_file write succeeded.
4744 		 *        -1 otherwise.
4745 		 */
4746 		iter->leftover = ret;
4747 	}
4748 
4749 	return 0;
4750 }
4751 
4752 /*
4753  * Should be used after trace_array_get(), trace_types_lock
4754  * ensures that i_cdev was already initialized.
4755  */
4756 static inline int tracing_get_cpu(struct inode *inode)
4757 {
4758 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4759 		return (long)inode->i_cdev - 1;
4760 	return RING_BUFFER_ALL_CPUS;
4761 }
4762 
4763 static const struct seq_operations tracer_seq_ops = {
4764 	.start		= s_start,
4765 	.next		= s_next,
4766 	.stop		= s_stop,
4767 	.show		= s_show,
4768 };
4769 
4770 /*
4771  * Note, as iter itself can be allocated and freed in different
4772  * ways, this function is only used to free its content, and not
4773  * the iterator itself. The only requirement to all the allocations
4774  * is that it must zero all fields (kzalloc), as freeing works with
4775  * ethier allocated content or NULL.
4776  */
4777 static void free_trace_iter_content(struct trace_iterator *iter)
4778 {
4779 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4780 	if (iter->fmt != static_fmt_buf)
4781 		kfree(iter->fmt);
4782 
4783 	kfree(iter->temp);
4784 	kfree(iter->buffer_iter);
4785 	mutex_destroy(&iter->mutex);
4786 	free_cpumask_var(iter->started);
4787 }
4788 
4789 static struct trace_iterator *
4790 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4791 {
4792 	struct trace_array *tr = inode->i_private;
4793 	struct trace_iterator *iter;
4794 	int cpu;
4795 
4796 	if (tracing_disabled)
4797 		return ERR_PTR(-ENODEV);
4798 
4799 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4800 	if (!iter)
4801 		return ERR_PTR(-ENOMEM);
4802 
4803 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4804 				    GFP_KERNEL);
4805 	if (!iter->buffer_iter)
4806 		goto release;
4807 
4808 	/*
4809 	 * trace_find_next_entry() may need to save off iter->ent.
4810 	 * It will place it into the iter->temp buffer. As most
4811 	 * events are less than 128, allocate a buffer of that size.
4812 	 * If one is greater, then trace_find_next_entry() will
4813 	 * allocate a new buffer to adjust for the bigger iter->ent.
4814 	 * It's not critical if it fails to get allocated here.
4815 	 */
4816 	iter->temp = kmalloc(128, GFP_KERNEL);
4817 	if (iter->temp)
4818 		iter->temp_size = 128;
4819 
4820 	/*
4821 	 * trace_event_printf() may need to modify given format
4822 	 * string to replace %p with %px so that it shows real address
4823 	 * instead of hash value. However, that is only for the event
4824 	 * tracing, other tracer may not need. Defer the allocation
4825 	 * until it is needed.
4826 	 */
4827 	iter->fmt = NULL;
4828 	iter->fmt_size = 0;
4829 
4830 	mutex_lock(&trace_types_lock);
4831 	iter->trace = tr->current_trace;
4832 
4833 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4834 		goto fail;
4835 
4836 	iter->tr = tr;
4837 
4838 #ifdef CONFIG_TRACER_MAX_TRACE
4839 	/* Currently only the top directory has a snapshot */
4840 	if (tr->current_trace->print_max || snapshot)
4841 		iter->array_buffer = &tr->max_buffer;
4842 	else
4843 #endif
4844 		iter->array_buffer = &tr->array_buffer;
4845 	iter->snapshot = snapshot;
4846 	iter->pos = -1;
4847 	iter->cpu_file = tracing_get_cpu(inode);
4848 	mutex_init(&iter->mutex);
4849 
4850 	/* Notify the tracer early; before we stop tracing. */
4851 	if (iter->trace->open)
4852 		iter->trace->open(iter);
4853 
4854 	/* Annotate start of buffers if we had overruns */
4855 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4856 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4857 
4858 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4859 	if (trace_clocks[tr->clock_id].in_ns)
4860 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4861 
4862 	/*
4863 	 * If pause-on-trace is enabled, then stop the trace while
4864 	 * dumping, unless this is the "snapshot" file
4865 	 */
4866 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4867 		tracing_stop_tr(tr);
4868 
4869 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4870 		for_each_tracing_cpu(cpu) {
4871 			iter->buffer_iter[cpu] =
4872 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4873 							 cpu, GFP_KERNEL);
4874 		}
4875 		ring_buffer_read_prepare_sync();
4876 		for_each_tracing_cpu(cpu) {
4877 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4878 			tracing_iter_reset(iter, cpu);
4879 		}
4880 	} else {
4881 		cpu = iter->cpu_file;
4882 		iter->buffer_iter[cpu] =
4883 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4884 						 cpu, GFP_KERNEL);
4885 		ring_buffer_read_prepare_sync();
4886 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4887 		tracing_iter_reset(iter, cpu);
4888 	}
4889 
4890 	mutex_unlock(&trace_types_lock);
4891 
4892 	return iter;
4893 
4894  fail:
4895 	mutex_unlock(&trace_types_lock);
4896 	free_trace_iter_content(iter);
4897 release:
4898 	seq_release_private(inode, file);
4899 	return ERR_PTR(-ENOMEM);
4900 }
4901 
4902 int tracing_open_generic(struct inode *inode, struct file *filp)
4903 {
4904 	int ret;
4905 
4906 	ret = tracing_check_open_get_tr(NULL);
4907 	if (ret)
4908 		return ret;
4909 
4910 	filp->private_data = inode->i_private;
4911 	return 0;
4912 }
4913 
4914 bool tracing_is_disabled(void)
4915 {
4916 	return (tracing_disabled) ? true: false;
4917 }
4918 
4919 /*
4920  * Open and update trace_array ref count.
4921  * Must have the current trace_array passed to it.
4922  */
4923 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4924 {
4925 	struct trace_array *tr = inode->i_private;
4926 	int ret;
4927 
4928 	ret = tracing_check_open_get_tr(tr);
4929 	if (ret)
4930 		return ret;
4931 
4932 	filp->private_data = inode->i_private;
4933 
4934 	return 0;
4935 }
4936 
4937 /*
4938  * The private pointer of the inode is the trace_event_file.
4939  * Update the tr ref count associated to it.
4940  */
4941 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4942 {
4943 	struct trace_event_file *file = inode->i_private;
4944 	int ret;
4945 
4946 	ret = tracing_check_open_get_tr(file->tr);
4947 	if (ret)
4948 		return ret;
4949 
4950 	mutex_lock(&event_mutex);
4951 
4952 	/* Fail if the file is marked for removal */
4953 	if (file->flags & EVENT_FILE_FL_FREED) {
4954 		trace_array_put(file->tr);
4955 		ret = -ENODEV;
4956 	} else {
4957 		event_file_get(file);
4958 	}
4959 
4960 	mutex_unlock(&event_mutex);
4961 	if (ret)
4962 		return ret;
4963 
4964 	filp->private_data = inode->i_private;
4965 
4966 	return 0;
4967 }
4968 
4969 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4970 {
4971 	struct trace_event_file *file = inode->i_private;
4972 
4973 	trace_array_put(file->tr);
4974 	event_file_put(file);
4975 
4976 	return 0;
4977 }
4978 
4979 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4980 {
4981 	tracing_release_file_tr(inode, filp);
4982 	return single_release(inode, filp);
4983 }
4984 
4985 static int tracing_mark_open(struct inode *inode, struct file *filp)
4986 {
4987 	stream_open(inode, filp);
4988 	return tracing_open_generic_tr(inode, filp);
4989 }
4990 
4991 static int tracing_release(struct inode *inode, struct file *file)
4992 {
4993 	struct trace_array *tr = inode->i_private;
4994 	struct seq_file *m = file->private_data;
4995 	struct trace_iterator *iter;
4996 	int cpu;
4997 
4998 	if (!(file->f_mode & FMODE_READ)) {
4999 		trace_array_put(tr);
5000 		return 0;
5001 	}
5002 
5003 	/* Writes do not use seq_file */
5004 	iter = m->private;
5005 	mutex_lock(&trace_types_lock);
5006 
5007 	for_each_tracing_cpu(cpu) {
5008 		if (iter->buffer_iter[cpu])
5009 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5010 	}
5011 
5012 	if (iter->trace && iter->trace->close)
5013 		iter->trace->close(iter);
5014 
5015 	if (!iter->snapshot && tr->stop_count)
5016 		/* reenable tracing if it was previously enabled */
5017 		tracing_start_tr(tr);
5018 
5019 	__trace_array_put(tr);
5020 
5021 	mutex_unlock(&trace_types_lock);
5022 
5023 	free_trace_iter_content(iter);
5024 	seq_release_private(inode, file);
5025 
5026 	return 0;
5027 }
5028 
5029 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5030 {
5031 	struct trace_array *tr = inode->i_private;
5032 
5033 	trace_array_put(tr);
5034 	return 0;
5035 }
5036 
5037 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5038 {
5039 	struct trace_array *tr = inode->i_private;
5040 
5041 	trace_array_put(tr);
5042 
5043 	return single_release(inode, file);
5044 }
5045 
5046 static int tracing_open(struct inode *inode, struct file *file)
5047 {
5048 	struct trace_array *tr = inode->i_private;
5049 	struct trace_iterator *iter;
5050 	int ret;
5051 
5052 	ret = tracing_check_open_get_tr(tr);
5053 	if (ret)
5054 		return ret;
5055 
5056 	/* If this file was open for write, then erase contents */
5057 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5058 		int cpu = tracing_get_cpu(inode);
5059 		struct array_buffer *trace_buf = &tr->array_buffer;
5060 
5061 #ifdef CONFIG_TRACER_MAX_TRACE
5062 		if (tr->current_trace->print_max)
5063 			trace_buf = &tr->max_buffer;
5064 #endif
5065 
5066 		if (cpu == RING_BUFFER_ALL_CPUS)
5067 			tracing_reset_online_cpus(trace_buf);
5068 		else
5069 			tracing_reset_cpu(trace_buf, cpu);
5070 	}
5071 
5072 	if (file->f_mode & FMODE_READ) {
5073 		iter = __tracing_open(inode, file, false);
5074 		if (IS_ERR(iter))
5075 			ret = PTR_ERR(iter);
5076 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5077 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5078 	}
5079 
5080 	if (ret < 0)
5081 		trace_array_put(tr);
5082 
5083 	return ret;
5084 }
5085 
5086 /*
5087  * Some tracers are not suitable for instance buffers.
5088  * A tracer is always available for the global array (toplevel)
5089  * or if it explicitly states that it is.
5090  */
5091 static bool
5092 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5093 {
5094 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5095 }
5096 
5097 /* Find the next tracer that this trace array may use */
5098 static struct tracer *
5099 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5100 {
5101 	while (t && !trace_ok_for_array(t, tr))
5102 		t = t->next;
5103 
5104 	return t;
5105 }
5106 
5107 static void *
5108 t_next(struct seq_file *m, void *v, loff_t *pos)
5109 {
5110 	struct trace_array *tr = m->private;
5111 	struct tracer *t = v;
5112 
5113 	(*pos)++;
5114 
5115 	if (t)
5116 		t = get_tracer_for_array(tr, t->next);
5117 
5118 	return t;
5119 }
5120 
5121 static void *t_start(struct seq_file *m, loff_t *pos)
5122 {
5123 	struct trace_array *tr = m->private;
5124 	struct tracer *t;
5125 	loff_t l = 0;
5126 
5127 	mutex_lock(&trace_types_lock);
5128 
5129 	t = get_tracer_for_array(tr, trace_types);
5130 	for (; t && l < *pos; t = t_next(m, t, &l))
5131 			;
5132 
5133 	return t;
5134 }
5135 
5136 static void t_stop(struct seq_file *m, void *p)
5137 {
5138 	mutex_unlock(&trace_types_lock);
5139 }
5140 
5141 static int t_show(struct seq_file *m, void *v)
5142 {
5143 	struct tracer *t = v;
5144 
5145 	if (!t)
5146 		return 0;
5147 
5148 	seq_puts(m, t->name);
5149 	if (t->next)
5150 		seq_putc(m, ' ');
5151 	else
5152 		seq_putc(m, '\n');
5153 
5154 	return 0;
5155 }
5156 
5157 static const struct seq_operations show_traces_seq_ops = {
5158 	.start		= t_start,
5159 	.next		= t_next,
5160 	.stop		= t_stop,
5161 	.show		= t_show,
5162 };
5163 
5164 static int show_traces_open(struct inode *inode, struct file *file)
5165 {
5166 	struct trace_array *tr = inode->i_private;
5167 	struct seq_file *m;
5168 	int ret;
5169 
5170 	ret = tracing_check_open_get_tr(tr);
5171 	if (ret)
5172 		return ret;
5173 
5174 	ret = seq_open(file, &show_traces_seq_ops);
5175 	if (ret) {
5176 		trace_array_put(tr);
5177 		return ret;
5178 	}
5179 
5180 	m = file->private_data;
5181 	m->private = tr;
5182 
5183 	return 0;
5184 }
5185 
5186 static int show_traces_release(struct inode *inode, struct file *file)
5187 {
5188 	struct trace_array *tr = inode->i_private;
5189 
5190 	trace_array_put(tr);
5191 	return seq_release(inode, file);
5192 }
5193 
5194 static ssize_t
5195 tracing_write_stub(struct file *filp, const char __user *ubuf,
5196 		   size_t count, loff_t *ppos)
5197 {
5198 	return count;
5199 }
5200 
5201 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5202 {
5203 	int ret;
5204 
5205 	if (file->f_mode & FMODE_READ)
5206 		ret = seq_lseek(file, offset, whence);
5207 	else
5208 		file->f_pos = ret = 0;
5209 
5210 	return ret;
5211 }
5212 
5213 static const struct file_operations tracing_fops = {
5214 	.open		= tracing_open,
5215 	.read		= seq_read,
5216 	.read_iter	= seq_read_iter,
5217 	.splice_read	= copy_splice_read,
5218 	.write		= tracing_write_stub,
5219 	.llseek		= tracing_lseek,
5220 	.release	= tracing_release,
5221 };
5222 
5223 static const struct file_operations show_traces_fops = {
5224 	.open		= show_traces_open,
5225 	.read		= seq_read,
5226 	.llseek		= seq_lseek,
5227 	.release	= show_traces_release,
5228 };
5229 
5230 static ssize_t
5231 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5232 		     size_t count, loff_t *ppos)
5233 {
5234 	struct trace_array *tr = file_inode(filp)->i_private;
5235 	char *mask_str;
5236 	int len;
5237 
5238 	len = snprintf(NULL, 0, "%*pb\n",
5239 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5240 	mask_str = kmalloc(len, GFP_KERNEL);
5241 	if (!mask_str)
5242 		return -ENOMEM;
5243 
5244 	len = snprintf(mask_str, len, "%*pb\n",
5245 		       cpumask_pr_args(tr->tracing_cpumask));
5246 	if (len >= count) {
5247 		count = -EINVAL;
5248 		goto out_err;
5249 	}
5250 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5251 
5252 out_err:
5253 	kfree(mask_str);
5254 
5255 	return count;
5256 }
5257 
5258 int tracing_set_cpumask(struct trace_array *tr,
5259 			cpumask_var_t tracing_cpumask_new)
5260 {
5261 	int cpu;
5262 
5263 	if (!tr)
5264 		return -EINVAL;
5265 
5266 	local_irq_disable();
5267 	arch_spin_lock(&tr->max_lock);
5268 	for_each_tracing_cpu(cpu) {
5269 		/*
5270 		 * Increase/decrease the disabled counter if we are
5271 		 * about to flip a bit in the cpumask:
5272 		 */
5273 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5274 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5275 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5276 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5277 #ifdef CONFIG_TRACER_MAX_TRACE
5278 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5279 #endif
5280 		}
5281 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5282 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5283 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5284 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5285 #ifdef CONFIG_TRACER_MAX_TRACE
5286 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5287 #endif
5288 		}
5289 	}
5290 	arch_spin_unlock(&tr->max_lock);
5291 	local_irq_enable();
5292 
5293 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5294 
5295 	return 0;
5296 }
5297 
5298 static ssize_t
5299 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5300 		      size_t count, loff_t *ppos)
5301 {
5302 	struct trace_array *tr = file_inode(filp)->i_private;
5303 	cpumask_var_t tracing_cpumask_new;
5304 	int err;
5305 
5306 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5307 		return -ENOMEM;
5308 
5309 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5310 	if (err)
5311 		goto err_free;
5312 
5313 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5314 	if (err)
5315 		goto err_free;
5316 
5317 	free_cpumask_var(tracing_cpumask_new);
5318 
5319 	return count;
5320 
5321 err_free:
5322 	free_cpumask_var(tracing_cpumask_new);
5323 
5324 	return err;
5325 }
5326 
5327 static const struct file_operations tracing_cpumask_fops = {
5328 	.open		= tracing_open_generic_tr,
5329 	.read		= tracing_cpumask_read,
5330 	.write		= tracing_cpumask_write,
5331 	.release	= tracing_release_generic_tr,
5332 	.llseek		= generic_file_llseek,
5333 };
5334 
5335 static int tracing_trace_options_show(struct seq_file *m, void *v)
5336 {
5337 	struct tracer_opt *trace_opts;
5338 	struct trace_array *tr = m->private;
5339 	u32 tracer_flags;
5340 	int i;
5341 
5342 	mutex_lock(&trace_types_lock);
5343 	tracer_flags = tr->current_trace->flags->val;
5344 	trace_opts = tr->current_trace->flags->opts;
5345 
5346 	for (i = 0; trace_options[i]; i++) {
5347 		if (tr->trace_flags & (1 << i))
5348 			seq_printf(m, "%s\n", trace_options[i]);
5349 		else
5350 			seq_printf(m, "no%s\n", trace_options[i]);
5351 	}
5352 
5353 	for (i = 0; trace_opts[i].name; i++) {
5354 		if (tracer_flags & trace_opts[i].bit)
5355 			seq_printf(m, "%s\n", trace_opts[i].name);
5356 		else
5357 			seq_printf(m, "no%s\n", trace_opts[i].name);
5358 	}
5359 	mutex_unlock(&trace_types_lock);
5360 
5361 	return 0;
5362 }
5363 
5364 static int __set_tracer_option(struct trace_array *tr,
5365 			       struct tracer_flags *tracer_flags,
5366 			       struct tracer_opt *opts, int neg)
5367 {
5368 	struct tracer *trace = tracer_flags->trace;
5369 	int ret;
5370 
5371 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5372 	if (ret)
5373 		return ret;
5374 
5375 	if (neg)
5376 		tracer_flags->val &= ~opts->bit;
5377 	else
5378 		tracer_flags->val |= opts->bit;
5379 	return 0;
5380 }
5381 
5382 /* Try to assign a tracer specific option */
5383 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5384 {
5385 	struct tracer *trace = tr->current_trace;
5386 	struct tracer_flags *tracer_flags = trace->flags;
5387 	struct tracer_opt *opts = NULL;
5388 	int i;
5389 
5390 	for (i = 0; tracer_flags->opts[i].name; i++) {
5391 		opts = &tracer_flags->opts[i];
5392 
5393 		if (strcmp(cmp, opts->name) == 0)
5394 			return __set_tracer_option(tr, trace->flags, opts, neg);
5395 	}
5396 
5397 	return -EINVAL;
5398 }
5399 
5400 /* Some tracers require overwrite to stay enabled */
5401 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5402 {
5403 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5404 		return -1;
5405 
5406 	return 0;
5407 }
5408 
5409 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5410 {
5411 	int *map;
5412 
5413 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5414 	    (mask == TRACE_ITER_RECORD_CMD))
5415 		lockdep_assert_held(&event_mutex);
5416 
5417 	/* do nothing if flag is already set */
5418 	if (!!(tr->trace_flags & mask) == !!enabled)
5419 		return 0;
5420 
5421 	/* Give the tracer a chance to approve the change */
5422 	if (tr->current_trace->flag_changed)
5423 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5424 			return -EINVAL;
5425 
5426 	if (enabled)
5427 		tr->trace_flags |= mask;
5428 	else
5429 		tr->trace_flags &= ~mask;
5430 
5431 	if (mask == TRACE_ITER_RECORD_CMD)
5432 		trace_event_enable_cmd_record(enabled);
5433 
5434 	if (mask == TRACE_ITER_RECORD_TGID) {
5435 		if (!tgid_map) {
5436 			tgid_map_max = pid_max;
5437 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5438 				       GFP_KERNEL);
5439 
5440 			/*
5441 			 * Pairs with smp_load_acquire() in
5442 			 * trace_find_tgid_ptr() to ensure that if it observes
5443 			 * the tgid_map we just allocated then it also observes
5444 			 * the corresponding tgid_map_max value.
5445 			 */
5446 			smp_store_release(&tgid_map, map);
5447 		}
5448 		if (!tgid_map) {
5449 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5450 			return -ENOMEM;
5451 		}
5452 
5453 		trace_event_enable_tgid_record(enabled);
5454 	}
5455 
5456 	if (mask == TRACE_ITER_EVENT_FORK)
5457 		trace_event_follow_fork(tr, enabled);
5458 
5459 	if (mask == TRACE_ITER_FUNC_FORK)
5460 		ftrace_pid_follow_fork(tr, enabled);
5461 
5462 	if (mask == TRACE_ITER_OVERWRITE) {
5463 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5464 #ifdef CONFIG_TRACER_MAX_TRACE
5465 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5466 #endif
5467 	}
5468 
5469 	if (mask == TRACE_ITER_PRINTK) {
5470 		trace_printk_start_stop_comm(enabled);
5471 		trace_printk_control(enabled);
5472 	}
5473 
5474 	return 0;
5475 }
5476 
5477 int trace_set_options(struct trace_array *tr, char *option)
5478 {
5479 	char *cmp;
5480 	int neg = 0;
5481 	int ret;
5482 	size_t orig_len = strlen(option);
5483 	int len;
5484 
5485 	cmp = strstrip(option);
5486 
5487 	len = str_has_prefix(cmp, "no");
5488 	if (len)
5489 		neg = 1;
5490 
5491 	cmp += len;
5492 
5493 	mutex_lock(&event_mutex);
5494 	mutex_lock(&trace_types_lock);
5495 
5496 	ret = match_string(trace_options, -1, cmp);
5497 	/* If no option could be set, test the specific tracer options */
5498 	if (ret < 0)
5499 		ret = set_tracer_option(tr, cmp, neg);
5500 	else
5501 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5502 
5503 	mutex_unlock(&trace_types_lock);
5504 	mutex_unlock(&event_mutex);
5505 
5506 	/*
5507 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5508 	 * turn it back into a space.
5509 	 */
5510 	if (orig_len > strlen(option))
5511 		option[strlen(option)] = ' ';
5512 
5513 	return ret;
5514 }
5515 
5516 static void __init apply_trace_boot_options(void)
5517 {
5518 	char *buf = trace_boot_options_buf;
5519 	char *option;
5520 
5521 	while (true) {
5522 		option = strsep(&buf, ",");
5523 
5524 		if (!option)
5525 			break;
5526 
5527 		if (*option)
5528 			trace_set_options(&global_trace, option);
5529 
5530 		/* Put back the comma to allow this to be called again */
5531 		if (buf)
5532 			*(buf - 1) = ',';
5533 	}
5534 }
5535 
5536 static ssize_t
5537 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5538 			size_t cnt, loff_t *ppos)
5539 {
5540 	struct seq_file *m = filp->private_data;
5541 	struct trace_array *tr = m->private;
5542 	char buf[64];
5543 	int ret;
5544 
5545 	if (cnt >= sizeof(buf))
5546 		return -EINVAL;
5547 
5548 	if (copy_from_user(buf, ubuf, cnt))
5549 		return -EFAULT;
5550 
5551 	buf[cnt] = 0;
5552 
5553 	ret = trace_set_options(tr, buf);
5554 	if (ret < 0)
5555 		return ret;
5556 
5557 	*ppos += cnt;
5558 
5559 	return cnt;
5560 }
5561 
5562 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5563 {
5564 	struct trace_array *tr = inode->i_private;
5565 	int ret;
5566 
5567 	ret = tracing_check_open_get_tr(tr);
5568 	if (ret)
5569 		return ret;
5570 
5571 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5572 	if (ret < 0)
5573 		trace_array_put(tr);
5574 
5575 	return ret;
5576 }
5577 
5578 static const struct file_operations tracing_iter_fops = {
5579 	.open		= tracing_trace_options_open,
5580 	.read		= seq_read,
5581 	.llseek		= seq_lseek,
5582 	.release	= tracing_single_release_tr,
5583 	.write		= tracing_trace_options_write,
5584 };
5585 
5586 static const char readme_msg[] =
5587 	"tracing mini-HOWTO:\n\n"
5588 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5589 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5590 	" Important files:\n"
5591 	"  trace\t\t\t- The static contents of the buffer\n"
5592 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5593 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5594 	"  current_tracer\t- function and latency tracers\n"
5595 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5596 	"  error_log\t- error log for failed commands (that support it)\n"
5597 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5598 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5599 	"  trace_clock\t\t- change the clock used to order events\n"
5600 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5601 	"      global:   Synced across CPUs but slows tracing down.\n"
5602 	"     counter:   Not a clock, but just an increment\n"
5603 	"      uptime:   Jiffy counter from time of boot\n"
5604 	"        perf:   Same clock that perf events use\n"
5605 #ifdef CONFIG_X86_64
5606 	"     x86-tsc:   TSC cycle counter\n"
5607 #endif
5608 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5609 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5610 	"    absolute:   Absolute (standalone) timestamp\n"
5611 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5612 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5613 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5614 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5615 	"\t\t\t  Remove sub-buffer with rmdir\n"
5616 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5617 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5618 	"\t\t\t  option name\n"
5619 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5620 #ifdef CONFIG_DYNAMIC_FTRACE
5621 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5622 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5623 	"\t\t\t  functions\n"
5624 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5625 	"\t     modules: Can select a group via module\n"
5626 	"\t      Format: :mod:<module-name>\n"
5627 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5628 	"\t    triggers: a command to perform when function is hit\n"
5629 	"\t      Format: <function>:<trigger>[:count]\n"
5630 	"\t     trigger: traceon, traceoff\n"
5631 	"\t\t      enable_event:<system>:<event>\n"
5632 	"\t\t      disable_event:<system>:<event>\n"
5633 #ifdef CONFIG_STACKTRACE
5634 	"\t\t      stacktrace\n"
5635 #endif
5636 #ifdef CONFIG_TRACER_SNAPSHOT
5637 	"\t\t      snapshot\n"
5638 #endif
5639 	"\t\t      dump\n"
5640 	"\t\t      cpudump\n"
5641 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5642 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5643 	"\t     The first one will disable tracing every time do_fault is hit\n"
5644 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5645 	"\t       The first time do trap is hit and it disables tracing, the\n"
5646 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5647 	"\t       the counter will not decrement. It only decrements when the\n"
5648 	"\t       trigger did work\n"
5649 	"\t     To remove trigger without count:\n"
5650 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5651 	"\t     To remove trigger with a count:\n"
5652 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5653 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5654 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5655 	"\t    modules: Can select a group via module command :mod:\n"
5656 	"\t    Does not accept triggers\n"
5657 #endif /* CONFIG_DYNAMIC_FTRACE */
5658 #ifdef CONFIG_FUNCTION_TRACER
5659 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5660 	"\t\t    (function)\n"
5661 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5662 	"\t\t    (function)\n"
5663 #endif
5664 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5665 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5666 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5667 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5668 #endif
5669 #ifdef CONFIG_TRACER_SNAPSHOT
5670 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5671 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5672 	"\t\t\t  information\n"
5673 #endif
5674 #ifdef CONFIG_STACK_TRACER
5675 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5676 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5677 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5678 	"\t\t\t  new trace)\n"
5679 #ifdef CONFIG_DYNAMIC_FTRACE
5680 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5681 	"\t\t\t  traces\n"
5682 #endif
5683 #endif /* CONFIG_STACK_TRACER */
5684 #ifdef CONFIG_DYNAMIC_EVENTS
5685 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5686 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5687 #endif
5688 #ifdef CONFIG_KPROBE_EVENTS
5689 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5690 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5691 #endif
5692 #ifdef CONFIG_UPROBE_EVENTS
5693 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5694 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5695 #endif
5696 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5697     defined(CONFIG_FPROBE_EVENTS)
5698 	"\t  accepts: event-definitions (one definition per line)\n"
5699 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5700 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5701 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5702 #endif
5703 #ifdef CONFIG_FPROBE_EVENTS
5704 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5705 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5706 #endif
5707 #ifdef CONFIG_HIST_TRIGGERS
5708 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5709 #endif
5710 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5711 	"\t           -:[<group>/][<event>]\n"
5712 #ifdef CONFIG_KPROBE_EVENTS
5713 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5714   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5715 #endif
5716 #ifdef CONFIG_UPROBE_EVENTS
5717   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5718 #endif
5719 	"\t     args: <name>=fetcharg[:type]\n"
5720 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5721 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5722 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5723 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5724 	"\t           <argname>[->field[->field|.field...]],\n"
5725 #else
5726 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5727 #endif
5728 #else
5729 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5730 #endif
5731 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5732 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5733 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5734 	"\t           symstr, <type>\\[<array-size>\\]\n"
5735 #ifdef CONFIG_HIST_TRIGGERS
5736 	"\t    field: <stype> <name>;\n"
5737 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5738 	"\t           [unsigned] char/int/long\n"
5739 #endif
5740 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5741 	"\t            of the <attached-group>/<attached-event>.\n"
5742 #endif
5743 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5744 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5745 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5746 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5747 	"\t\t\t  events\n"
5748 	"      filter\t\t- If set, only events passing filter are traced\n"
5749 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5750 	"\t\t\t  <event>:\n"
5751 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5752 	"      filter\t\t- If set, only events passing filter are traced\n"
5753 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5754 	"\t    Format: <trigger>[:count][if <filter>]\n"
5755 	"\t   trigger: traceon, traceoff\n"
5756 	"\t            enable_event:<system>:<event>\n"
5757 	"\t            disable_event:<system>:<event>\n"
5758 #ifdef CONFIG_HIST_TRIGGERS
5759 	"\t            enable_hist:<system>:<event>\n"
5760 	"\t            disable_hist:<system>:<event>\n"
5761 #endif
5762 #ifdef CONFIG_STACKTRACE
5763 	"\t\t    stacktrace\n"
5764 #endif
5765 #ifdef CONFIG_TRACER_SNAPSHOT
5766 	"\t\t    snapshot\n"
5767 #endif
5768 #ifdef CONFIG_HIST_TRIGGERS
5769 	"\t\t    hist (see below)\n"
5770 #endif
5771 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5772 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5773 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5774 	"\t                  events/block/block_unplug/trigger\n"
5775 	"\t   The first disables tracing every time block_unplug is hit.\n"
5776 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5777 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5778 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5779 	"\t   Like function triggers, the counter is only decremented if it\n"
5780 	"\t    enabled or disabled tracing.\n"
5781 	"\t   To remove a trigger without a count:\n"
5782 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5783 	"\t   To remove a trigger with a count:\n"
5784 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5785 	"\t   Filters can be ignored when removing a trigger.\n"
5786 #ifdef CONFIG_HIST_TRIGGERS
5787 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5788 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5789 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5790 	"\t            [:values=<field1[,field2,...]>]\n"
5791 	"\t            [:sort=<field1[,field2,...]>]\n"
5792 	"\t            [:size=#entries]\n"
5793 	"\t            [:pause][:continue][:clear]\n"
5794 	"\t            [:name=histname1]\n"
5795 	"\t            [:nohitcount]\n"
5796 	"\t            [:<handler>.<action>]\n"
5797 	"\t            [if <filter>]\n\n"
5798 	"\t    Note, special fields can be used as well:\n"
5799 	"\t            common_timestamp - to record current timestamp\n"
5800 	"\t            common_cpu - to record the CPU the event happened on\n"
5801 	"\n"
5802 	"\t    A hist trigger variable can be:\n"
5803 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5804 	"\t        - a reference to another variable e.g. y=$x,\n"
5805 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5806 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5807 	"\n"
5808 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5809 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5810 	"\t    variable reference, field or numeric literal.\n"
5811 	"\n"
5812 	"\t    When a matching event is hit, an entry is added to a hash\n"
5813 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5814 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5815 	"\t    correspond to fields in the event's format description.  Keys\n"
5816 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5817 	"\t    Compound keys consisting of up to two fields can be specified\n"
5818 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5819 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5820 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5821 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5822 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5823 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5824 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5825 	"\t    its histogram data will be shared with other triggers of the\n"
5826 	"\t    same name, and trigger hits will update this common data.\n\n"
5827 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5828 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5829 	"\t    triggers attached to an event, there will be a table for each\n"
5830 	"\t    trigger in the output.  The table displayed for a named\n"
5831 	"\t    trigger will be the same as any other instance having the\n"
5832 	"\t    same name.  The default format used to display a given field\n"
5833 	"\t    can be modified by appending any of the following modifiers\n"
5834 	"\t    to the field name, as applicable:\n\n"
5835 	"\t            .hex        display a number as a hex value\n"
5836 	"\t            .sym        display an address as a symbol\n"
5837 	"\t            .sym-offset display an address as a symbol and offset\n"
5838 	"\t            .execname   display a common_pid as a program name\n"
5839 	"\t            .syscall    display a syscall id as a syscall name\n"
5840 	"\t            .log2       display log2 value rather than raw number\n"
5841 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5842 	"\t            .usecs      display a common_timestamp in microseconds\n"
5843 	"\t            .percent    display a number of percentage value\n"
5844 	"\t            .graph      display a bar-graph of a value\n\n"
5845 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5846 	"\t    trigger or to start a hist trigger but not log any events\n"
5847 	"\t    until told to do so.  'continue' can be used to start or\n"
5848 	"\t    restart a paused hist trigger.\n\n"
5849 	"\t    The 'clear' parameter will clear the contents of a running\n"
5850 	"\t    hist trigger and leave its current paused/active state\n"
5851 	"\t    unchanged.\n\n"
5852 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5853 	"\t    raw hitcount in the histogram.\n\n"
5854 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5855 	"\t    have one event conditionally start and stop another event's\n"
5856 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5857 	"\t    the enable_event and disable_event triggers.\n\n"
5858 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5859 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5860 	"\t        <handler>.<action>\n\n"
5861 	"\t    The available handlers are:\n\n"
5862 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5863 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5864 	"\t        onchange(var)            - invoke action if var changes\n\n"
5865 	"\t    The available actions are:\n\n"
5866 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5867 	"\t        save(field,...)                      - save current event fields\n"
5868 #ifdef CONFIG_TRACER_SNAPSHOT
5869 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5870 #endif
5871 #ifdef CONFIG_SYNTH_EVENTS
5872 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5873 	"\t  Write into this file to define/undefine new synthetic events.\n"
5874 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5875 #endif
5876 #endif
5877 ;
5878 
5879 static ssize_t
5880 tracing_readme_read(struct file *filp, char __user *ubuf,
5881 		       size_t cnt, loff_t *ppos)
5882 {
5883 	return simple_read_from_buffer(ubuf, cnt, ppos,
5884 					readme_msg, strlen(readme_msg));
5885 }
5886 
5887 static const struct file_operations tracing_readme_fops = {
5888 	.open		= tracing_open_generic,
5889 	.read		= tracing_readme_read,
5890 	.llseek		= generic_file_llseek,
5891 };
5892 
5893 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5894 {
5895 	int pid = ++(*pos);
5896 
5897 	return trace_find_tgid_ptr(pid);
5898 }
5899 
5900 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5901 {
5902 	int pid = *pos;
5903 
5904 	return trace_find_tgid_ptr(pid);
5905 }
5906 
5907 static void saved_tgids_stop(struct seq_file *m, void *v)
5908 {
5909 }
5910 
5911 static int saved_tgids_show(struct seq_file *m, void *v)
5912 {
5913 	int *entry = (int *)v;
5914 	int pid = entry - tgid_map;
5915 	int tgid = *entry;
5916 
5917 	if (tgid == 0)
5918 		return SEQ_SKIP;
5919 
5920 	seq_printf(m, "%d %d\n", pid, tgid);
5921 	return 0;
5922 }
5923 
5924 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5925 	.start		= saved_tgids_start,
5926 	.stop		= saved_tgids_stop,
5927 	.next		= saved_tgids_next,
5928 	.show		= saved_tgids_show,
5929 };
5930 
5931 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5932 {
5933 	int ret;
5934 
5935 	ret = tracing_check_open_get_tr(NULL);
5936 	if (ret)
5937 		return ret;
5938 
5939 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5940 }
5941 
5942 
5943 static const struct file_operations tracing_saved_tgids_fops = {
5944 	.open		= tracing_saved_tgids_open,
5945 	.read		= seq_read,
5946 	.llseek		= seq_lseek,
5947 	.release	= seq_release,
5948 };
5949 
5950 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5951 {
5952 	unsigned int *ptr = v;
5953 
5954 	if (*pos || m->count)
5955 		ptr++;
5956 
5957 	(*pos)++;
5958 
5959 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5960 	     ptr++) {
5961 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5962 			continue;
5963 
5964 		return ptr;
5965 	}
5966 
5967 	return NULL;
5968 }
5969 
5970 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5971 {
5972 	void *v;
5973 	loff_t l = 0;
5974 
5975 	preempt_disable();
5976 	arch_spin_lock(&trace_cmdline_lock);
5977 
5978 	v = &savedcmd->map_cmdline_to_pid[0];
5979 	while (l <= *pos) {
5980 		v = saved_cmdlines_next(m, v, &l);
5981 		if (!v)
5982 			return NULL;
5983 	}
5984 
5985 	return v;
5986 }
5987 
5988 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5989 {
5990 	arch_spin_unlock(&trace_cmdline_lock);
5991 	preempt_enable();
5992 }
5993 
5994 static int saved_cmdlines_show(struct seq_file *m, void *v)
5995 {
5996 	char buf[TASK_COMM_LEN];
5997 	unsigned int *pid = v;
5998 
5999 	__trace_find_cmdline(*pid, buf);
6000 	seq_printf(m, "%d %s\n", *pid, buf);
6001 	return 0;
6002 }
6003 
6004 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6005 	.start		= saved_cmdlines_start,
6006 	.next		= saved_cmdlines_next,
6007 	.stop		= saved_cmdlines_stop,
6008 	.show		= saved_cmdlines_show,
6009 };
6010 
6011 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6012 {
6013 	int ret;
6014 
6015 	ret = tracing_check_open_get_tr(NULL);
6016 	if (ret)
6017 		return ret;
6018 
6019 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6020 }
6021 
6022 static const struct file_operations tracing_saved_cmdlines_fops = {
6023 	.open		= tracing_saved_cmdlines_open,
6024 	.read		= seq_read,
6025 	.llseek		= seq_lseek,
6026 	.release	= seq_release,
6027 };
6028 
6029 static ssize_t
6030 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6031 				 size_t cnt, loff_t *ppos)
6032 {
6033 	char buf[64];
6034 	int r;
6035 
6036 	preempt_disable();
6037 	arch_spin_lock(&trace_cmdline_lock);
6038 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6039 	arch_spin_unlock(&trace_cmdline_lock);
6040 	preempt_enable();
6041 
6042 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6043 }
6044 
6045 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6046 {
6047 	kfree(s->saved_cmdlines);
6048 	kfree(s->map_cmdline_to_pid);
6049 	kfree(s);
6050 }
6051 
6052 static int tracing_resize_saved_cmdlines(unsigned int val)
6053 {
6054 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6055 
6056 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6057 	if (!s)
6058 		return -ENOMEM;
6059 
6060 	if (allocate_cmdlines_buffer(val, s) < 0) {
6061 		kfree(s);
6062 		return -ENOMEM;
6063 	}
6064 
6065 	preempt_disable();
6066 	arch_spin_lock(&trace_cmdline_lock);
6067 	savedcmd_temp = savedcmd;
6068 	savedcmd = s;
6069 	arch_spin_unlock(&trace_cmdline_lock);
6070 	preempt_enable();
6071 	free_saved_cmdlines_buffer(savedcmd_temp);
6072 
6073 	return 0;
6074 }
6075 
6076 static ssize_t
6077 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6078 				  size_t cnt, loff_t *ppos)
6079 {
6080 	unsigned long val;
6081 	int ret;
6082 
6083 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6084 	if (ret)
6085 		return ret;
6086 
6087 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6088 	if (!val || val > PID_MAX_DEFAULT)
6089 		return -EINVAL;
6090 
6091 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6092 	if (ret < 0)
6093 		return ret;
6094 
6095 	*ppos += cnt;
6096 
6097 	return cnt;
6098 }
6099 
6100 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6101 	.open		= tracing_open_generic,
6102 	.read		= tracing_saved_cmdlines_size_read,
6103 	.write		= tracing_saved_cmdlines_size_write,
6104 };
6105 
6106 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6107 static union trace_eval_map_item *
6108 update_eval_map(union trace_eval_map_item *ptr)
6109 {
6110 	if (!ptr->map.eval_string) {
6111 		if (ptr->tail.next) {
6112 			ptr = ptr->tail.next;
6113 			/* Set ptr to the next real item (skip head) */
6114 			ptr++;
6115 		} else
6116 			return NULL;
6117 	}
6118 	return ptr;
6119 }
6120 
6121 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6122 {
6123 	union trace_eval_map_item *ptr = v;
6124 
6125 	/*
6126 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6127 	 * This really should never happen.
6128 	 */
6129 	(*pos)++;
6130 	ptr = update_eval_map(ptr);
6131 	if (WARN_ON_ONCE(!ptr))
6132 		return NULL;
6133 
6134 	ptr++;
6135 	ptr = update_eval_map(ptr);
6136 
6137 	return ptr;
6138 }
6139 
6140 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6141 {
6142 	union trace_eval_map_item *v;
6143 	loff_t l = 0;
6144 
6145 	mutex_lock(&trace_eval_mutex);
6146 
6147 	v = trace_eval_maps;
6148 	if (v)
6149 		v++;
6150 
6151 	while (v && l < *pos) {
6152 		v = eval_map_next(m, v, &l);
6153 	}
6154 
6155 	return v;
6156 }
6157 
6158 static void eval_map_stop(struct seq_file *m, void *v)
6159 {
6160 	mutex_unlock(&trace_eval_mutex);
6161 }
6162 
6163 static int eval_map_show(struct seq_file *m, void *v)
6164 {
6165 	union trace_eval_map_item *ptr = v;
6166 
6167 	seq_printf(m, "%s %ld (%s)\n",
6168 		   ptr->map.eval_string, ptr->map.eval_value,
6169 		   ptr->map.system);
6170 
6171 	return 0;
6172 }
6173 
6174 static const struct seq_operations tracing_eval_map_seq_ops = {
6175 	.start		= eval_map_start,
6176 	.next		= eval_map_next,
6177 	.stop		= eval_map_stop,
6178 	.show		= eval_map_show,
6179 };
6180 
6181 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6182 {
6183 	int ret;
6184 
6185 	ret = tracing_check_open_get_tr(NULL);
6186 	if (ret)
6187 		return ret;
6188 
6189 	return seq_open(filp, &tracing_eval_map_seq_ops);
6190 }
6191 
6192 static const struct file_operations tracing_eval_map_fops = {
6193 	.open		= tracing_eval_map_open,
6194 	.read		= seq_read,
6195 	.llseek		= seq_lseek,
6196 	.release	= seq_release,
6197 };
6198 
6199 static inline union trace_eval_map_item *
6200 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6201 {
6202 	/* Return tail of array given the head */
6203 	return ptr + ptr->head.length + 1;
6204 }
6205 
6206 static void
6207 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6208 			   int len)
6209 {
6210 	struct trace_eval_map **stop;
6211 	struct trace_eval_map **map;
6212 	union trace_eval_map_item *map_array;
6213 	union trace_eval_map_item *ptr;
6214 
6215 	stop = start + len;
6216 
6217 	/*
6218 	 * The trace_eval_maps contains the map plus a head and tail item,
6219 	 * where the head holds the module and length of array, and the
6220 	 * tail holds a pointer to the next list.
6221 	 */
6222 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6223 	if (!map_array) {
6224 		pr_warn("Unable to allocate trace eval mapping\n");
6225 		return;
6226 	}
6227 
6228 	mutex_lock(&trace_eval_mutex);
6229 
6230 	if (!trace_eval_maps)
6231 		trace_eval_maps = map_array;
6232 	else {
6233 		ptr = trace_eval_maps;
6234 		for (;;) {
6235 			ptr = trace_eval_jmp_to_tail(ptr);
6236 			if (!ptr->tail.next)
6237 				break;
6238 			ptr = ptr->tail.next;
6239 
6240 		}
6241 		ptr->tail.next = map_array;
6242 	}
6243 	map_array->head.mod = mod;
6244 	map_array->head.length = len;
6245 	map_array++;
6246 
6247 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6248 		map_array->map = **map;
6249 		map_array++;
6250 	}
6251 	memset(map_array, 0, sizeof(*map_array));
6252 
6253 	mutex_unlock(&trace_eval_mutex);
6254 }
6255 
6256 static void trace_create_eval_file(struct dentry *d_tracer)
6257 {
6258 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6259 			  NULL, &tracing_eval_map_fops);
6260 }
6261 
6262 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6263 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6264 static inline void trace_insert_eval_map_file(struct module *mod,
6265 			      struct trace_eval_map **start, int len) { }
6266 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6267 
6268 static void trace_insert_eval_map(struct module *mod,
6269 				  struct trace_eval_map **start, int len)
6270 {
6271 	struct trace_eval_map **map;
6272 
6273 	if (len <= 0)
6274 		return;
6275 
6276 	map = start;
6277 
6278 	trace_event_eval_update(map, len);
6279 
6280 	trace_insert_eval_map_file(mod, start, len);
6281 }
6282 
6283 static ssize_t
6284 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6285 		       size_t cnt, loff_t *ppos)
6286 {
6287 	struct trace_array *tr = filp->private_data;
6288 	char buf[MAX_TRACER_SIZE+2];
6289 	int r;
6290 
6291 	mutex_lock(&trace_types_lock);
6292 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6293 	mutex_unlock(&trace_types_lock);
6294 
6295 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6296 }
6297 
6298 int tracer_init(struct tracer *t, struct trace_array *tr)
6299 {
6300 	tracing_reset_online_cpus(&tr->array_buffer);
6301 	return t->init(tr);
6302 }
6303 
6304 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6305 {
6306 	int cpu;
6307 
6308 	for_each_tracing_cpu(cpu)
6309 		per_cpu_ptr(buf->data, cpu)->entries = val;
6310 }
6311 
6312 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6313 {
6314 	if (cpu == RING_BUFFER_ALL_CPUS) {
6315 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6316 	} else {
6317 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6318 	}
6319 }
6320 
6321 #ifdef CONFIG_TRACER_MAX_TRACE
6322 /* resize @tr's buffer to the size of @size_tr's entries */
6323 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6324 					struct array_buffer *size_buf, int cpu_id)
6325 {
6326 	int cpu, ret = 0;
6327 
6328 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6329 		for_each_tracing_cpu(cpu) {
6330 			ret = ring_buffer_resize(trace_buf->buffer,
6331 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6332 			if (ret < 0)
6333 				break;
6334 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6335 				per_cpu_ptr(size_buf->data, cpu)->entries;
6336 		}
6337 	} else {
6338 		ret = ring_buffer_resize(trace_buf->buffer,
6339 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6340 		if (ret == 0)
6341 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6342 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6343 	}
6344 
6345 	return ret;
6346 }
6347 #endif /* CONFIG_TRACER_MAX_TRACE */
6348 
6349 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6350 					unsigned long size, int cpu)
6351 {
6352 	int ret;
6353 
6354 	/*
6355 	 * If kernel or user changes the size of the ring buffer
6356 	 * we use the size that was given, and we can forget about
6357 	 * expanding it later.
6358 	 */
6359 	trace_set_ring_buffer_expanded(tr);
6360 
6361 	/* May be called before buffers are initialized */
6362 	if (!tr->array_buffer.buffer)
6363 		return 0;
6364 
6365 	/* Do not allow tracing while resizing ring buffer */
6366 	tracing_stop_tr(tr);
6367 
6368 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6369 	if (ret < 0)
6370 		goto out_start;
6371 
6372 #ifdef CONFIG_TRACER_MAX_TRACE
6373 	if (!tr->allocated_snapshot)
6374 		goto out;
6375 
6376 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6377 	if (ret < 0) {
6378 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6379 						     &tr->array_buffer, cpu);
6380 		if (r < 0) {
6381 			/*
6382 			 * AARGH! We are left with different
6383 			 * size max buffer!!!!
6384 			 * The max buffer is our "snapshot" buffer.
6385 			 * When a tracer needs a snapshot (one of the
6386 			 * latency tracers), it swaps the max buffer
6387 			 * with the saved snap shot. We succeeded to
6388 			 * update the size of the main buffer, but failed to
6389 			 * update the size of the max buffer. But when we tried
6390 			 * to reset the main buffer to the original size, we
6391 			 * failed there too. This is very unlikely to
6392 			 * happen, but if it does, warn and kill all
6393 			 * tracing.
6394 			 */
6395 			WARN_ON(1);
6396 			tracing_disabled = 1;
6397 		}
6398 		goto out_start;
6399 	}
6400 
6401 	update_buffer_entries(&tr->max_buffer, cpu);
6402 
6403  out:
6404 #endif /* CONFIG_TRACER_MAX_TRACE */
6405 
6406 	update_buffer_entries(&tr->array_buffer, cpu);
6407  out_start:
6408 	tracing_start_tr(tr);
6409 	return ret;
6410 }
6411 
6412 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6413 				  unsigned long size, int cpu_id)
6414 {
6415 	int ret;
6416 
6417 	mutex_lock(&trace_types_lock);
6418 
6419 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6420 		/* make sure, this cpu is enabled in the mask */
6421 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6422 			ret = -EINVAL;
6423 			goto out;
6424 		}
6425 	}
6426 
6427 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6428 	if (ret < 0)
6429 		ret = -ENOMEM;
6430 
6431 out:
6432 	mutex_unlock(&trace_types_lock);
6433 
6434 	return ret;
6435 }
6436 
6437 
6438 /**
6439  * tracing_update_buffers - used by tracing facility to expand ring buffers
6440  * @tr: The tracing instance
6441  *
6442  * To save on memory when the tracing is never used on a system with it
6443  * configured in. The ring buffers are set to a minimum size. But once
6444  * a user starts to use the tracing facility, then they need to grow
6445  * to their default size.
6446  *
6447  * This function is to be called when a tracer is about to be used.
6448  */
6449 int tracing_update_buffers(struct trace_array *tr)
6450 {
6451 	int ret = 0;
6452 
6453 	mutex_lock(&trace_types_lock);
6454 	if (!tr->ring_buffer_expanded)
6455 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6456 						RING_BUFFER_ALL_CPUS);
6457 	mutex_unlock(&trace_types_lock);
6458 
6459 	return ret;
6460 }
6461 
6462 struct trace_option_dentry;
6463 
6464 static void
6465 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6466 
6467 /*
6468  * Used to clear out the tracer before deletion of an instance.
6469  * Must have trace_types_lock held.
6470  */
6471 static void tracing_set_nop(struct trace_array *tr)
6472 {
6473 	if (tr->current_trace == &nop_trace)
6474 		return;
6475 
6476 	tr->current_trace->enabled--;
6477 
6478 	if (tr->current_trace->reset)
6479 		tr->current_trace->reset(tr);
6480 
6481 	tr->current_trace = &nop_trace;
6482 }
6483 
6484 static bool tracer_options_updated;
6485 
6486 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6487 {
6488 	/* Only enable if the directory has been created already. */
6489 	if (!tr->dir)
6490 		return;
6491 
6492 	/* Only create trace option files after update_tracer_options finish */
6493 	if (!tracer_options_updated)
6494 		return;
6495 
6496 	create_trace_option_files(tr, t);
6497 }
6498 
6499 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6500 {
6501 	struct tracer *t;
6502 #ifdef CONFIG_TRACER_MAX_TRACE
6503 	bool had_max_tr;
6504 #endif
6505 	int ret = 0;
6506 
6507 	mutex_lock(&trace_types_lock);
6508 
6509 	if (!tr->ring_buffer_expanded) {
6510 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6511 						RING_BUFFER_ALL_CPUS);
6512 		if (ret < 0)
6513 			goto out;
6514 		ret = 0;
6515 	}
6516 
6517 	for (t = trace_types; t; t = t->next) {
6518 		if (strcmp(t->name, buf) == 0)
6519 			break;
6520 	}
6521 	if (!t) {
6522 		ret = -EINVAL;
6523 		goto out;
6524 	}
6525 	if (t == tr->current_trace)
6526 		goto out;
6527 
6528 #ifdef CONFIG_TRACER_SNAPSHOT
6529 	if (t->use_max_tr) {
6530 		local_irq_disable();
6531 		arch_spin_lock(&tr->max_lock);
6532 		if (tr->cond_snapshot)
6533 			ret = -EBUSY;
6534 		arch_spin_unlock(&tr->max_lock);
6535 		local_irq_enable();
6536 		if (ret)
6537 			goto out;
6538 	}
6539 #endif
6540 	/* Some tracers won't work on kernel command line */
6541 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6542 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6543 			t->name);
6544 		goto out;
6545 	}
6546 
6547 	/* Some tracers are only allowed for the top level buffer */
6548 	if (!trace_ok_for_array(t, tr)) {
6549 		ret = -EINVAL;
6550 		goto out;
6551 	}
6552 
6553 	/* If trace pipe files are being read, we can't change the tracer */
6554 	if (tr->trace_ref) {
6555 		ret = -EBUSY;
6556 		goto out;
6557 	}
6558 
6559 	trace_branch_disable();
6560 
6561 	tr->current_trace->enabled--;
6562 
6563 	if (tr->current_trace->reset)
6564 		tr->current_trace->reset(tr);
6565 
6566 #ifdef CONFIG_TRACER_MAX_TRACE
6567 	had_max_tr = tr->current_trace->use_max_tr;
6568 
6569 	/* Current trace needs to be nop_trace before synchronize_rcu */
6570 	tr->current_trace = &nop_trace;
6571 
6572 	if (had_max_tr && !t->use_max_tr) {
6573 		/*
6574 		 * We need to make sure that the update_max_tr sees that
6575 		 * current_trace changed to nop_trace to keep it from
6576 		 * swapping the buffers after we resize it.
6577 		 * The update_max_tr is called from interrupts disabled
6578 		 * so a synchronized_sched() is sufficient.
6579 		 */
6580 		synchronize_rcu();
6581 		free_snapshot(tr);
6582 	}
6583 
6584 	if (t->use_max_tr && !tr->allocated_snapshot) {
6585 		ret = tracing_alloc_snapshot_instance(tr);
6586 		if (ret < 0)
6587 			goto out;
6588 	}
6589 #else
6590 	tr->current_trace = &nop_trace;
6591 #endif
6592 
6593 	if (t->init) {
6594 		ret = tracer_init(t, tr);
6595 		if (ret)
6596 			goto out;
6597 	}
6598 
6599 	tr->current_trace = t;
6600 	tr->current_trace->enabled++;
6601 	trace_branch_enable(tr);
6602  out:
6603 	mutex_unlock(&trace_types_lock);
6604 
6605 	return ret;
6606 }
6607 
6608 static ssize_t
6609 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6610 			size_t cnt, loff_t *ppos)
6611 {
6612 	struct trace_array *tr = filp->private_data;
6613 	char buf[MAX_TRACER_SIZE+1];
6614 	char *name;
6615 	size_t ret;
6616 	int err;
6617 
6618 	ret = cnt;
6619 
6620 	if (cnt > MAX_TRACER_SIZE)
6621 		cnt = MAX_TRACER_SIZE;
6622 
6623 	if (copy_from_user(buf, ubuf, cnt))
6624 		return -EFAULT;
6625 
6626 	buf[cnt] = 0;
6627 
6628 	name = strim(buf);
6629 
6630 	err = tracing_set_tracer(tr, name);
6631 	if (err)
6632 		return err;
6633 
6634 	*ppos += ret;
6635 
6636 	return ret;
6637 }
6638 
6639 static ssize_t
6640 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6641 		   size_t cnt, loff_t *ppos)
6642 {
6643 	char buf[64];
6644 	int r;
6645 
6646 	r = snprintf(buf, sizeof(buf), "%ld\n",
6647 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6648 	if (r > sizeof(buf))
6649 		r = sizeof(buf);
6650 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6651 }
6652 
6653 static ssize_t
6654 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6655 		    size_t cnt, loff_t *ppos)
6656 {
6657 	unsigned long val;
6658 	int ret;
6659 
6660 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6661 	if (ret)
6662 		return ret;
6663 
6664 	*ptr = val * 1000;
6665 
6666 	return cnt;
6667 }
6668 
6669 static ssize_t
6670 tracing_thresh_read(struct file *filp, char __user *ubuf,
6671 		    size_t cnt, loff_t *ppos)
6672 {
6673 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6674 }
6675 
6676 static ssize_t
6677 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6678 		     size_t cnt, loff_t *ppos)
6679 {
6680 	struct trace_array *tr = filp->private_data;
6681 	int ret;
6682 
6683 	mutex_lock(&trace_types_lock);
6684 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6685 	if (ret < 0)
6686 		goto out;
6687 
6688 	if (tr->current_trace->update_thresh) {
6689 		ret = tr->current_trace->update_thresh(tr);
6690 		if (ret < 0)
6691 			goto out;
6692 	}
6693 
6694 	ret = cnt;
6695 out:
6696 	mutex_unlock(&trace_types_lock);
6697 
6698 	return ret;
6699 }
6700 
6701 #ifdef CONFIG_TRACER_MAX_TRACE
6702 
6703 static ssize_t
6704 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6705 		     size_t cnt, loff_t *ppos)
6706 {
6707 	struct trace_array *tr = filp->private_data;
6708 
6709 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6710 }
6711 
6712 static ssize_t
6713 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6714 		      size_t cnt, loff_t *ppos)
6715 {
6716 	struct trace_array *tr = filp->private_data;
6717 
6718 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6719 }
6720 
6721 #endif
6722 
6723 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6724 {
6725 	if (cpu == RING_BUFFER_ALL_CPUS) {
6726 		if (cpumask_empty(tr->pipe_cpumask)) {
6727 			cpumask_setall(tr->pipe_cpumask);
6728 			return 0;
6729 		}
6730 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6731 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6732 		return 0;
6733 	}
6734 	return -EBUSY;
6735 }
6736 
6737 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6738 {
6739 	if (cpu == RING_BUFFER_ALL_CPUS) {
6740 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6741 		cpumask_clear(tr->pipe_cpumask);
6742 	} else {
6743 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6744 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6745 	}
6746 }
6747 
6748 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6749 {
6750 	struct trace_array *tr = inode->i_private;
6751 	struct trace_iterator *iter;
6752 	int cpu;
6753 	int ret;
6754 
6755 	ret = tracing_check_open_get_tr(tr);
6756 	if (ret)
6757 		return ret;
6758 
6759 	mutex_lock(&trace_types_lock);
6760 	cpu = tracing_get_cpu(inode);
6761 	ret = open_pipe_on_cpu(tr, cpu);
6762 	if (ret)
6763 		goto fail_pipe_on_cpu;
6764 
6765 	/* create a buffer to store the information to pass to userspace */
6766 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6767 	if (!iter) {
6768 		ret = -ENOMEM;
6769 		goto fail_alloc_iter;
6770 	}
6771 
6772 	trace_seq_init(&iter->seq);
6773 	iter->trace = tr->current_trace;
6774 
6775 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6776 		ret = -ENOMEM;
6777 		goto fail;
6778 	}
6779 
6780 	/* trace pipe does not show start of buffer */
6781 	cpumask_setall(iter->started);
6782 
6783 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6784 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6785 
6786 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6787 	if (trace_clocks[tr->clock_id].in_ns)
6788 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6789 
6790 	iter->tr = tr;
6791 	iter->array_buffer = &tr->array_buffer;
6792 	iter->cpu_file = cpu;
6793 	mutex_init(&iter->mutex);
6794 	filp->private_data = iter;
6795 
6796 	if (iter->trace->pipe_open)
6797 		iter->trace->pipe_open(iter);
6798 
6799 	nonseekable_open(inode, filp);
6800 
6801 	tr->trace_ref++;
6802 
6803 	mutex_unlock(&trace_types_lock);
6804 	return ret;
6805 
6806 fail:
6807 	kfree(iter);
6808 fail_alloc_iter:
6809 	close_pipe_on_cpu(tr, cpu);
6810 fail_pipe_on_cpu:
6811 	__trace_array_put(tr);
6812 	mutex_unlock(&trace_types_lock);
6813 	return ret;
6814 }
6815 
6816 static int tracing_release_pipe(struct inode *inode, struct file *file)
6817 {
6818 	struct trace_iterator *iter = file->private_data;
6819 	struct trace_array *tr = inode->i_private;
6820 
6821 	mutex_lock(&trace_types_lock);
6822 
6823 	tr->trace_ref--;
6824 
6825 	if (iter->trace->pipe_close)
6826 		iter->trace->pipe_close(iter);
6827 	close_pipe_on_cpu(tr, iter->cpu_file);
6828 	mutex_unlock(&trace_types_lock);
6829 
6830 	free_trace_iter_content(iter);
6831 	kfree(iter);
6832 
6833 	trace_array_put(tr);
6834 
6835 	return 0;
6836 }
6837 
6838 static __poll_t
6839 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6840 {
6841 	struct trace_array *tr = iter->tr;
6842 
6843 	/* Iterators are static, they should be filled or empty */
6844 	if (trace_buffer_iter(iter, iter->cpu_file))
6845 		return EPOLLIN | EPOLLRDNORM;
6846 
6847 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6848 		/*
6849 		 * Always select as readable when in blocking mode
6850 		 */
6851 		return EPOLLIN | EPOLLRDNORM;
6852 	else
6853 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6854 					     filp, poll_table, iter->tr->buffer_percent);
6855 }
6856 
6857 static __poll_t
6858 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6859 {
6860 	struct trace_iterator *iter = filp->private_data;
6861 
6862 	return trace_poll(iter, filp, poll_table);
6863 }
6864 
6865 /* Must be called with iter->mutex held. */
6866 static int tracing_wait_pipe(struct file *filp)
6867 {
6868 	struct trace_iterator *iter = filp->private_data;
6869 	int ret;
6870 
6871 	while (trace_empty(iter)) {
6872 
6873 		if ((filp->f_flags & O_NONBLOCK)) {
6874 			return -EAGAIN;
6875 		}
6876 
6877 		/*
6878 		 * We block until we read something and tracing is disabled.
6879 		 * We still block if tracing is disabled, but we have never
6880 		 * read anything. This allows a user to cat this file, and
6881 		 * then enable tracing. But after we have read something,
6882 		 * we give an EOF when tracing is again disabled.
6883 		 *
6884 		 * iter->pos will be 0 if we haven't read anything.
6885 		 */
6886 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6887 			break;
6888 
6889 		mutex_unlock(&iter->mutex);
6890 
6891 		ret = wait_on_pipe(iter, 0);
6892 
6893 		mutex_lock(&iter->mutex);
6894 
6895 		if (ret)
6896 			return ret;
6897 	}
6898 
6899 	return 1;
6900 }
6901 
6902 /*
6903  * Consumer reader.
6904  */
6905 static ssize_t
6906 tracing_read_pipe(struct file *filp, char __user *ubuf,
6907 		  size_t cnt, loff_t *ppos)
6908 {
6909 	struct trace_iterator *iter = filp->private_data;
6910 	ssize_t sret;
6911 
6912 	/*
6913 	 * Avoid more than one consumer on a single file descriptor
6914 	 * This is just a matter of traces coherency, the ring buffer itself
6915 	 * is protected.
6916 	 */
6917 	mutex_lock(&iter->mutex);
6918 
6919 	/* return any leftover data */
6920 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6921 	if (sret != -EBUSY)
6922 		goto out;
6923 
6924 	trace_seq_init(&iter->seq);
6925 
6926 	if (iter->trace->read) {
6927 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6928 		if (sret)
6929 			goto out;
6930 	}
6931 
6932 waitagain:
6933 	sret = tracing_wait_pipe(filp);
6934 	if (sret <= 0)
6935 		goto out;
6936 
6937 	/* stop when tracing is finished */
6938 	if (trace_empty(iter)) {
6939 		sret = 0;
6940 		goto out;
6941 	}
6942 
6943 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6944 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6945 
6946 	/* reset all but tr, trace, and overruns */
6947 	trace_iterator_reset(iter);
6948 	cpumask_clear(iter->started);
6949 	trace_seq_init(&iter->seq);
6950 
6951 	trace_event_read_lock();
6952 	trace_access_lock(iter->cpu_file);
6953 	while (trace_find_next_entry_inc(iter) != NULL) {
6954 		enum print_line_t ret;
6955 		int save_len = iter->seq.seq.len;
6956 
6957 		ret = print_trace_line(iter);
6958 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6959 			/*
6960 			 * If one print_trace_line() fills entire trace_seq in one shot,
6961 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6962 			 * In this case, we need to consume it, otherwise, loop will peek
6963 			 * this event next time, resulting in an infinite loop.
6964 			 */
6965 			if (save_len == 0) {
6966 				iter->seq.full = 0;
6967 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6968 				trace_consume(iter);
6969 				break;
6970 			}
6971 
6972 			/* In other cases, don't print partial lines */
6973 			iter->seq.seq.len = save_len;
6974 			break;
6975 		}
6976 		if (ret != TRACE_TYPE_NO_CONSUME)
6977 			trace_consume(iter);
6978 
6979 		if (trace_seq_used(&iter->seq) >= cnt)
6980 			break;
6981 
6982 		/*
6983 		 * Setting the full flag means we reached the trace_seq buffer
6984 		 * size and we should leave by partial output condition above.
6985 		 * One of the trace_seq_* functions is not used properly.
6986 		 */
6987 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6988 			  iter->ent->type);
6989 	}
6990 	trace_access_unlock(iter->cpu_file);
6991 	trace_event_read_unlock();
6992 
6993 	/* Now copy what we have to the user */
6994 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6995 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6996 		trace_seq_init(&iter->seq);
6997 
6998 	/*
6999 	 * If there was nothing to send to user, in spite of consuming trace
7000 	 * entries, go back to wait for more entries.
7001 	 */
7002 	if (sret == -EBUSY)
7003 		goto waitagain;
7004 
7005 out:
7006 	mutex_unlock(&iter->mutex);
7007 
7008 	return sret;
7009 }
7010 
7011 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7012 				     unsigned int idx)
7013 {
7014 	__free_page(spd->pages[idx]);
7015 }
7016 
7017 static size_t
7018 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7019 {
7020 	size_t count;
7021 	int save_len;
7022 	int ret;
7023 
7024 	/* Seq buffer is page-sized, exactly what we need. */
7025 	for (;;) {
7026 		save_len = iter->seq.seq.len;
7027 		ret = print_trace_line(iter);
7028 
7029 		if (trace_seq_has_overflowed(&iter->seq)) {
7030 			iter->seq.seq.len = save_len;
7031 			break;
7032 		}
7033 
7034 		/*
7035 		 * This should not be hit, because it should only
7036 		 * be set if the iter->seq overflowed. But check it
7037 		 * anyway to be safe.
7038 		 */
7039 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7040 			iter->seq.seq.len = save_len;
7041 			break;
7042 		}
7043 
7044 		count = trace_seq_used(&iter->seq) - save_len;
7045 		if (rem < count) {
7046 			rem = 0;
7047 			iter->seq.seq.len = save_len;
7048 			break;
7049 		}
7050 
7051 		if (ret != TRACE_TYPE_NO_CONSUME)
7052 			trace_consume(iter);
7053 		rem -= count;
7054 		if (!trace_find_next_entry_inc(iter))	{
7055 			rem = 0;
7056 			iter->ent = NULL;
7057 			break;
7058 		}
7059 	}
7060 
7061 	return rem;
7062 }
7063 
7064 static ssize_t tracing_splice_read_pipe(struct file *filp,
7065 					loff_t *ppos,
7066 					struct pipe_inode_info *pipe,
7067 					size_t len,
7068 					unsigned int flags)
7069 {
7070 	struct page *pages_def[PIPE_DEF_BUFFERS];
7071 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7072 	struct trace_iterator *iter = filp->private_data;
7073 	struct splice_pipe_desc spd = {
7074 		.pages		= pages_def,
7075 		.partial	= partial_def,
7076 		.nr_pages	= 0, /* This gets updated below. */
7077 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7078 		.ops		= &default_pipe_buf_ops,
7079 		.spd_release	= tracing_spd_release_pipe,
7080 	};
7081 	ssize_t ret;
7082 	size_t rem;
7083 	unsigned int i;
7084 
7085 	if (splice_grow_spd(pipe, &spd))
7086 		return -ENOMEM;
7087 
7088 	mutex_lock(&iter->mutex);
7089 
7090 	if (iter->trace->splice_read) {
7091 		ret = iter->trace->splice_read(iter, filp,
7092 					       ppos, pipe, len, flags);
7093 		if (ret)
7094 			goto out_err;
7095 	}
7096 
7097 	ret = tracing_wait_pipe(filp);
7098 	if (ret <= 0)
7099 		goto out_err;
7100 
7101 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7102 		ret = -EFAULT;
7103 		goto out_err;
7104 	}
7105 
7106 	trace_event_read_lock();
7107 	trace_access_lock(iter->cpu_file);
7108 
7109 	/* Fill as many pages as possible. */
7110 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7111 		spd.pages[i] = alloc_page(GFP_KERNEL);
7112 		if (!spd.pages[i])
7113 			break;
7114 
7115 		rem = tracing_fill_pipe_page(rem, iter);
7116 
7117 		/* Copy the data into the page, so we can start over. */
7118 		ret = trace_seq_to_buffer(&iter->seq,
7119 					  page_address(spd.pages[i]),
7120 					  trace_seq_used(&iter->seq));
7121 		if (ret < 0) {
7122 			__free_page(spd.pages[i]);
7123 			break;
7124 		}
7125 		spd.partial[i].offset = 0;
7126 		spd.partial[i].len = trace_seq_used(&iter->seq);
7127 
7128 		trace_seq_init(&iter->seq);
7129 	}
7130 
7131 	trace_access_unlock(iter->cpu_file);
7132 	trace_event_read_unlock();
7133 	mutex_unlock(&iter->mutex);
7134 
7135 	spd.nr_pages = i;
7136 
7137 	if (i)
7138 		ret = splice_to_pipe(pipe, &spd);
7139 	else
7140 		ret = 0;
7141 out:
7142 	splice_shrink_spd(&spd);
7143 	return ret;
7144 
7145 out_err:
7146 	mutex_unlock(&iter->mutex);
7147 	goto out;
7148 }
7149 
7150 static ssize_t
7151 tracing_entries_read(struct file *filp, char __user *ubuf,
7152 		     size_t cnt, loff_t *ppos)
7153 {
7154 	struct inode *inode = file_inode(filp);
7155 	struct trace_array *tr = inode->i_private;
7156 	int cpu = tracing_get_cpu(inode);
7157 	char buf[64];
7158 	int r = 0;
7159 	ssize_t ret;
7160 
7161 	mutex_lock(&trace_types_lock);
7162 
7163 	if (cpu == RING_BUFFER_ALL_CPUS) {
7164 		int cpu, buf_size_same;
7165 		unsigned long size;
7166 
7167 		size = 0;
7168 		buf_size_same = 1;
7169 		/* check if all cpu sizes are same */
7170 		for_each_tracing_cpu(cpu) {
7171 			/* fill in the size from first enabled cpu */
7172 			if (size == 0)
7173 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7174 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7175 				buf_size_same = 0;
7176 				break;
7177 			}
7178 		}
7179 
7180 		if (buf_size_same) {
7181 			if (!tr->ring_buffer_expanded)
7182 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7183 					    size >> 10,
7184 					    trace_buf_size >> 10);
7185 			else
7186 				r = sprintf(buf, "%lu\n", size >> 10);
7187 		} else
7188 			r = sprintf(buf, "X\n");
7189 	} else
7190 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7191 
7192 	mutex_unlock(&trace_types_lock);
7193 
7194 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7195 	return ret;
7196 }
7197 
7198 static ssize_t
7199 tracing_entries_write(struct file *filp, const char __user *ubuf,
7200 		      size_t cnt, loff_t *ppos)
7201 {
7202 	struct inode *inode = file_inode(filp);
7203 	struct trace_array *tr = inode->i_private;
7204 	unsigned long val;
7205 	int ret;
7206 
7207 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7208 	if (ret)
7209 		return ret;
7210 
7211 	/* must have at least 1 entry */
7212 	if (!val)
7213 		return -EINVAL;
7214 
7215 	/* value is in KB */
7216 	val <<= 10;
7217 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7218 	if (ret < 0)
7219 		return ret;
7220 
7221 	*ppos += cnt;
7222 
7223 	return cnt;
7224 }
7225 
7226 static ssize_t
7227 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7228 				size_t cnt, loff_t *ppos)
7229 {
7230 	struct trace_array *tr = filp->private_data;
7231 	char buf[64];
7232 	int r, cpu;
7233 	unsigned long size = 0, expanded_size = 0;
7234 
7235 	mutex_lock(&trace_types_lock);
7236 	for_each_tracing_cpu(cpu) {
7237 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7238 		if (!tr->ring_buffer_expanded)
7239 			expanded_size += trace_buf_size >> 10;
7240 	}
7241 	if (tr->ring_buffer_expanded)
7242 		r = sprintf(buf, "%lu\n", size);
7243 	else
7244 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7245 	mutex_unlock(&trace_types_lock);
7246 
7247 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7248 }
7249 
7250 static ssize_t
7251 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7252 			  size_t cnt, loff_t *ppos)
7253 {
7254 	/*
7255 	 * There is no need to read what the user has written, this function
7256 	 * is just to make sure that there is no error when "echo" is used
7257 	 */
7258 
7259 	*ppos += cnt;
7260 
7261 	return cnt;
7262 }
7263 
7264 static int
7265 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7266 {
7267 	struct trace_array *tr = inode->i_private;
7268 
7269 	/* disable tracing ? */
7270 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7271 		tracer_tracing_off(tr);
7272 	/* resize the ring buffer to 0 */
7273 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7274 
7275 	trace_array_put(tr);
7276 
7277 	return 0;
7278 }
7279 
7280 static ssize_t
7281 tracing_mark_write(struct file *filp, const char __user *ubuf,
7282 					size_t cnt, loff_t *fpos)
7283 {
7284 	struct trace_array *tr = filp->private_data;
7285 	struct ring_buffer_event *event;
7286 	enum event_trigger_type tt = ETT_NONE;
7287 	struct trace_buffer *buffer;
7288 	struct print_entry *entry;
7289 	int meta_size;
7290 	ssize_t written;
7291 	size_t size;
7292 	int len;
7293 
7294 /* Used in tracing_mark_raw_write() as well */
7295 #define FAULTED_STR "<faulted>"
7296 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7297 
7298 	if (tracing_disabled)
7299 		return -EINVAL;
7300 
7301 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7302 		return -EINVAL;
7303 
7304 	if ((ssize_t)cnt < 0)
7305 		return -EINVAL;
7306 
7307 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7308  again:
7309 	size = cnt + meta_size;
7310 
7311 	/* If less than "<faulted>", then make sure we can still add that */
7312 	if (cnt < FAULTED_SIZE)
7313 		size += FAULTED_SIZE - cnt;
7314 
7315 	if (size > TRACE_SEQ_BUFFER_SIZE) {
7316 		cnt -= size - TRACE_SEQ_BUFFER_SIZE;
7317 		goto again;
7318 	}
7319 
7320 	buffer = tr->array_buffer.buffer;
7321 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7322 					    tracing_gen_ctx());
7323 	if (unlikely(!event)) {
7324 		/*
7325 		 * If the size was greater than what was allowed, then
7326 		 * make it smaller and try again.
7327 		 */
7328 		if (size > ring_buffer_max_event_size(buffer)) {
7329 			/* cnt < FAULTED size should never be bigger than max */
7330 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7331 				return -EBADF;
7332 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7333 			/* The above should only happen once */
7334 			if (WARN_ON_ONCE(cnt + meta_size == size))
7335 				return -EBADF;
7336 			goto again;
7337 		}
7338 
7339 		/* Ring buffer disabled, return as if not open for write */
7340 		return -EBADF;
7341 	}
7342 
7343 	entry = ring_buffer_event_data(event);
7344 	entry->ip = _THIS_IP_;
7345 
7346 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7347 	if (len) {
7348 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7349 		cnt = FAULTED_SIZE;
7350 		written = -EFAULT;
7351 	} else
7352 		written = cnt;
7353 
7354 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7355 		/* do not add \n before testing triggers, but add \0 */
7356 		entry->buf[cnt] = '\0';
7357 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7358 	}
7359 
7360 	if (entry->buf[cnt - 1] != '\n') {
7361 		entry->buf[cnt] = '\n';
7362 		entry->buf[cnt + 1] = '\0';
7363 	} else
7364 		entry->buf[cnt] = '\0';
7365 
7366 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7367 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7368 	__buffer_unlock_commit(buffer, event);
7369 
7370 	if (tt)
7371 		event_triggers_post_call(tr->trace_marker_file, tt);
7372 
7373 	return written;
7374 }
7375 
7376 static ssize_t
7377 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7378 					size_t cnt, loff_t *fpos)
7379 {
7380 	struct trace_array *tr = filp->private_data;
7381 	struct ring_buffer_event *event;
7382 	struct trace_buffer *buffer;
7383 	struct raw_data_entry *entry;
7384 	ssize_t written;
7385 	int size;
7386 	int len;
7387 
7388 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7389 
7390 	if (tracing_disabled)
7391 		return -EINVAL;
7392 
7393 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7394 		return -EINVAL;
7395 
7396 	/* The marker must at least have a tag id */
7397 	if (cnt < sizeof(unsigned int))
7398 		return -EINVAL;
7399 
7400 	size = sizeof(*entry) + cnt;
7401 	if (cnt < FAULT_SIZE_ID)
7402 		size += FAULT_SIZE_ID - cnt;
7403 
7404 	buffer = tr->array_buffer.buffer;
7405 
7406 	if (size > ring_buffer_max_event_size(buffer))
7407 		return -EINVAL;
7408 
7409 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7410 					    tracing_gen_ctx());
7411 	if (!event)
7412 		/* Ring buffer disabled, return as if not open for write */
7413 		return -EBADF;
7414 
7415 	entry = ring_buffer_event_data(event);
7416 
7417 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7418 	if (len) {
7419 		entry->id = -1;
7420 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7421 		written = -EFAULT;
7422 	} else
7423 		written = cnt;
7424 
7425 	__buffer_unlock_commit(buffer, event);
7426 
7427 	return written;
7428 }
7429 
7430 static int tracing_clock_show(struct seq_file *m, void *v)
7431 {
7432 	struct trace_array *tr = m->private;
7433 	int i;
7434 
7435 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7436 		seq_printf(m,
7437 			"%s%s%s%s", i ? " " : "",
7438 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7439 			i == tr->clock_id ? "]" : "");
7440 	seq_putc(m, '\n');
7441 
7442 	return 0;
7443 }
7444 
7445 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7446 {
7447 	int i;
7448 
7449 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7450 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7451 			break;
7452 	}
7453 	if (i == ARRAY_SIZE(trace_clocks))
7454 		return -EINVAL;
7455 
7456 	mutex_lock(&trace_types_lock);
7457 
7458 	tr->clock_id = i;
7459 
7460 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7461 
7462 	/*
7463 	 * New clock may not be consistent with the previous clock.
7464 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7465 	 */
7466 	tracing_reset_online_cpus(&tr->array_buffer);
7467 
7468 #ifdef CONFIG_TRACER_MAX_TRACE
7469 	if (tr->max_buffer.buffer)
7470 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7471 	tracing_reset_online_cpus(&tr->max_buffer);
7472 #endif
7473 
7474 	mutex_unlock(&trace_types_lock);
7475 
7476 	return 0;
7477 }
7478 
7479 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7480 				   size_t cnt, loff_t *fpos)
7481 {
7482 	struct seq_file *m = filp->private_data;
7483 	struct trace_array *tr = m->private;
7484 	char buf[64];
7485 	const char *clockstr;
7486 	int ret;
7487 
7488 	if (cnt >= sizeof(buf))
7489 		return -EINVAL;
7490 
7491 	if (copy_from_user(buf, ubuf, cnt))
7492 		return -EFAULT;
7493 
7494 	buf[cnt] = 0;
7495 
7496 	clockstr = strstrip(buf);
7497 
7498 	ret = tracing_set_clock(tr, clockstr);
7499 	if (ret)
7500 		return ret;
7501 
7502 	*fpos += cnt;
7503 
7504 	return cnt;
7505 }
7506 
7507 static int tracing_clock_open(struct inode *inode, struct file *file)
7508 {
7509 	struct trace_array *tr = inode->i_private;
7510 	int ret;
7511 
7512 	ret = tracing_check_open_get_tr(tr);
7513 	if (ret)
7514 		return ret;
7515 
7516 	ret = single_open(file, tracing_clock_show, inode->i_private);
7517 	if (ret < 0)
7518 		trace_array_put(tr);
7519 
7520 	return ret;
7521 }
7522 
7523 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7524 {
7525 	struct trace_array *tr = m->private;
7526 
7527 	mutex_lock(&trace_types_lock);
7528 
7529 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7530 		seq_puts(m, "delta [absolute]\n");
7531 	else
7532 		seq_puts(m, "[delta] absolute\n");
7533 
7534 	mutex_unlock(&trace_types_lock);
7535 
7536 	return 0;
7537 }
7538 
7539 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7540 {
7541 	struct trace_array *tr = inode->i_private;
7542 	int ret;
7543 
7544 	ret = tracing_check_open_get_tr(tr);
7545 	if (ret)
7546 		return ret;
7547 
7548 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7549 	if (ret < 0)
7550 		trace_array_put(tr);
7551 
7552 	return ret;
7553 }
7554 
7555 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7556 {
7557 	if (rbe == this_cpu_read(trace_buffered_event))
7558 		return ring_buffer_time_stamp(buffer);
7559 
7560 	return ring_buffer_event_time_stamp(buffer, rbe);
7561 }
7562 
7563 /*
7564  * Set or disable using the per CPU trace_buffer_event when possible.
7565  */
7566 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7567 {
7568 	int ret = 0;
7569 
7570 	mutex_lock(&trace_types_lock);
7571 
7572 	if (set && tr->no_filter_buffering_ref++)
7573 		goto out;
7574 
7575 	if (!set) {
7576 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7577 			ret = -EINVAL;
7578 			goto out;
7579 		}
7580 
7581 		--tr->no_filter_buffering_ref;
7582 	}
7583  out:
7584 	mutex_unlock(&trace_types_lock);
7585 
7586 	return ret;
7587 }
7588 
7589 struct ftrace_buffer_info {
7590 	struct trace_iterator	iter;
7591 	void			*spare;
7592 	unsigned int		spare_cpu;
7593 	unsigned int		spare_size;
7594 	unsigned int		read;
7595 };
7596 
7597 #ifdef CONFIG_TRACER_SNAPSHOT
7598 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7599 {
7600 	struct trace_array *tr = inode->i_private;
7601 	struct trace_iterator *iter;
7602 	struct seq_file *m;
7603 	int ret;
7604 
7605 	ret = tracing_check_open_get_tr(tr);
7606 	if (ret)
7607 		return ret;
7608 
7609 	if (file->f_mode & FMODE_READ) {
7610 		iter = __tracing_open(inode, file, true);
7611 		if (IS_ERR(iter))
7612 			ret = PTR_ERR(iter);
7613 	} else {
7614 		/* Writes still need the seq_file to hold the private data */
7615 		ret = -ENOMEM;
7616 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7617 		if (!m)
7618 			goto out;
7619 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7620 		if (!iter) {
7621 			kfree(m);
7622 			goto out;
7623 		}
7624 		ret = 0;
7625 
7626 		iter->tr = tr;
7627 		iter->array_buffer = &tr->max_buffer;
7628 		iter->cpu_file = tracing_get_cpu(inode);
7629 		m->private = iter;
7630 		file->private_data = m;
7631 	}
7632 out:
7633 	if (ret < 0)
7634 		trace_array_put(tr);
7635 
7636 	return ret;
7637 }
7638 
7639 static void tracing_swap_cpu_buffer(void *tr)
7640 {
7641 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7642 }
7643 
7644 static ssize_t
7645 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7646 		       loff_t *ppos)
7647 {
7648 	struct seq_file *m = filp->private_data;
7649 	struct trace_iterator *iter = m->private;
7650 	struct trace_array *tr = iter->tr;
7651 	unsigned long val;
7652 	int ret;
7653 
7654 	ret = tracing_update_buffers(tr);
7655 	if (ret < 0)
7656 		return ret;
7657 
7658 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7659 	if (ret)
7660 		return ret;
7661 
7662 	mutex_lock(&trace_types_lock);
7663 
7664 	if (tr->current_trace->use_max_tr) {
7665 		ret = -EBUSY;
7666 		goto out;
7667 	}
7668 
7669 	local_irq_disable();
7670 	arch_spin_lock(&tr->max_lock);
7671 	if (tr->cond_snapshot)
7672 		ret = -EBUSY;
7673 	arch_spin_unlock(&tr->max_lock);
7674 	local_irq_enable();
7675 	if (ret)
7676 		goto out;
7677 
7678 	switch (val) {
7679 	case 0:
7680 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7681 			ret = -EINVAL;
7682 			break;
7683 		}
7684 		if (tr->allocated_snapshot)
7685 			free_snapshot(tr);
7686 		break;
7687 	case 1:
7688 /* Only allow per-cpu swap if the ring buffer supports it */
7689 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7690 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7691 			ret = -EINVAL;
7692 			break;
7693 		}
7694 #endif
7695 		if (tr->allocated_snapshot)
7696 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7697 					&tr->array_buffer, iter->cpu_file);
7698 		else
7699 			ret = tracing_alloc_snapshot_instance(tr);
7700 		if (ret < 0)
7701 			break;
7702 		/* Now, we're going to swap */
7703 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7704 			local_irq_disable();
7705 			update_max_tr(tr, current, smp_processor_id(), NULL);
7706 			local_irq_enable();
7707 		} else {
7708 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7709 						 (void *)tr, 1);
7710 		}
7711 		break;
7712 	default:
7713 		if (tr->allocated_snapshot) {
7714 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7715 				tracing_reset_online_cpus(&tr->max_buffer);
7716 			else
7717 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7718 		}
7719 		break;
7720 	}
7721 
7722 	if (ret >= 0) {
7723 		*ppos += cnt;
7724 		ret = cnt;
7725 	}
7726 out:
7727 	mutex_unlock(&trace_types_lock);
7728 	return ret;
7729 }
7730 
7731 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7732 {
7733 	struct seq_file *m = file->private_data;
7734 	int ret;
7735 
7736 	ret = tracing_release(inode, file);
7737 
7738 	if (file->f_mode & FMODE_READ)
7739 		return ret;
7740 
7741 	/* If write only, the seq_file is just a stub */
7742 	if (m)
7743 		kfree(m->private);
7744 	kfree(m);
7745 
7746 	return 0;
7747 }
7748 
7749 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7750 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7751 				    size_t count, loff_t *ppos);
7752 static int tracing_buffers_release(struct inode *inode, struct file *file);
7753 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7754 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7755 
7756 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7757 {
7758 	struct ftrace_buffer_info *info;
7759 	int ret;
7760 
7761 	/* The following checks for tracefs lockdown */
7762 	ret = tracing_buffers_open(inode, filp);
7763 	if (ret < 0)
7764 		return ret;
7765 
7766 	info = filp->private_data;
7767 
7768 	if (info->iter.trace->use_max_tr) {
7769 		tracing_buffers_release(inode, filp);
7770 		return -EBUSY;
7771 	}
7772 
7773 	info->iter.snapshot = true;
7774 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7775 
7776 	return ret;
7777 }
7778 
7779 #endif /* CONFIG_TRACER_SNAPSHOT */
7780 
7781 
7782 static const struct file_operations tracing_thresh_fops = {
7783 	.open		= tracing_open_generic,
7784 	.read		= tracing_thresh_read,
7785 	.write		= tracing_thresh_write,
7786 	.llseek		= generic_file_llseek,
7787 };
7788 
7789 #ifdef CONFIG_TRACER_MAX_TRACE
7790 static const struct file_operations tracing_max_lat_fops = {
7791 	.open		= tracing_open_generic_tr,
7792 	.read		= tracing_max_lat_read,
7793 	.write		= tracing_max_lat_write,
7794 	.llseek		= generic_file_llseek,
7795 	.release	= tracing_release_generic_tr,
7796 };
7797 #endif
7798 
7799 static const struct file_operations set_tracer_fops = {
7800 	.open		= tracing_open_generic_tr,
7801 	.read		= tracing_set_trace_read,
7802 	.write		= tracing_set_trace_write,
7803 	.llseek		= generic_file_llseek,
7804 	.release	= tracing_release_generic_tr,
7805 };
7806 
7807 static const struct file_operations tracing_pipe_fops = {
7808 	.open		= tracing_open_pipe,
7809 	.poll		= tracing_poll_pipe,
7810 	.read		= tracing_read_pipe,
7811 	.splice_read	= tracing_splice_read_pipe,
7812 	.release	= tracing_release_pipe,
7813 	.llseek		= no_llseek,
7814 };
7815 
7816 static const struct file_operations tracing_entries_fops = {
7817 	.open		= tracing_open_generic_tr,
7818 	.read		= tracing_entries_read,
7819 	.write		= tracing_entries_write,
7820 	.llseek		= generic_file_llseek,
7821 	.release	= tracing_release_generic_tr,
7822 };
7823 
7824 static const struct file_operations tracing_total_entries_fops = {
7825 	.open		= tracing_open_generic_tr,
7826 	.read		= tracing_total_entries_read,
7827 	.llseek		= generic_file_llseek,
7828 	.release	= tracing_release_generic_tr,
7829 };
7830 
7831 static const struct file_operations tracing_free_buffer_fops = {
7832 	.open		= tracing_open_generic_tr,
7833 	.write		= tracing_free_buffer_write,
7834 	.release	= tracing_free_buffer_release,
7835 };
7836 
7837 static const struct file_operations tracing_mark_fops = {
7838 	.open		= tracing_mark_open,
7839 	.write		= tracing_mark_write,
7840 	.release	= tracing_release_generic_tr,
7841 };
7842 
7843 static const struct file_operations tracing_mark_raw_fops = {
7844 	.open		= tracing_mark_open,
7845 	.write		= tracing_mark_raw_write,
7846 	.release	= tracing_release_generic_tr,
7847 };
7848 
7849 static const struct file_operations trace_clock_fops = {
7850 	.open		= tracing_clock_open,
7851 	.read		= seq_read,
7852 	.llseek		= seq_lseek,
7853 	.release	= tracing_single_release_tr,
7854 	.write		= tracing_clock_write,
7855 };
7856 
7857 static const struct file_operations trace_time_stamp_mode_fops = {
7858 	.open		= tracing_time_stamp_mode_open,
7859 	.read		= seq_read,
7860 	.llseek		= seq_lseek,
7861 	.release	= tracing_single_release_tr,
7862 };
7863 
7864 #ifdef CONFIG_TRACER_SNAPSHOT
7865 static const struct file_operations snapshot_fops = {
7866 	.open		= tracing_snapshot_open,
7867 	.read		= seq_read,
7868 	.write		= tracing_snapshot_write,
7869 	.llseek		= tracing_lseek,
7870 	.release	= tracing_snapshot_release,
7871 };
7872 
7873 static const struct file_operations snapshot_raw_fops = {
7874 	.open		= snapshot_raw_open,
7875 	.read		= tracing_buffers_read,
7876 	.release	= tracing_buffers_release,
7877 	.splice_read	= tracing_buffers_splice_read,
7878 	.llseek		= no_llseek,
7879 };
7880 
7881 #endif /* CONFIG_TRACER_SNAPSHOT */
7882 
7883 /*
7884  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7885  * @filp: The active open file structure
7886  * @ubuf: The userspace provided buffer to read value into
7887  * @cnt: The maximum number of bytes to read
7888  * @ppos: The current "file" position
7889  *
7890  * This function implements the write interface for a struct trace_min_max_param.
7891  * The filp->private_data must point to a trace_min_max_param structure that
7892  * defines where to write the value, the min and the max acceptable values,
7893  * and a lock to protect the write.
7894  */
7895 static ssize_t
7896 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7897 {
7898 	struct trace_min_max_param *param = filp->private_data;
7899 	u64 val;
7900 	int err;
7901 
7902 	if (!param)
7903 		return -EFAULT;
7904 
7905 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7906 	if (err)
7907 		return err;
7908 
7909 	if (param->lock)
7910 		mutex_lock(param->lock);
7911 
7912 	if (param->min && val < *param->min)
7913 		err = -EINVAL;
7914 
7915 	if (param->max && val > *param->max)
7916 		err = -EINVAL;
7917 
7918 	if (!err)
7919 		*param->val = val;
7920 
7921 	if (param->lock)
7922 		mutex_unlock(param->lock);
7923 
7924 	if (err)
7925 		return err;
7926 
7927 	return cnt;
7928 }
7929 
7930 /*
7931  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7932  * @filp: The active open file structure
7933  * @ubuf: The userspace provided buffer to read value into
7934  * @cnt: The maximum number of bytes to read
7935  * @ppos: The current "file" position
7936  *
7937  * This function implements the read interface for a struct trace_min_max_param.
7938  * The filp->private_data must point to a trace_min_max_param struct with valid
7939  * data.
7940  */
7941 static ssize_t
7942 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7943 {
7944 	struct trace_min_max_param *param = filp->private_data;
7945 	char buf[U64_STR_SIZE];
7946 	int len;
7947 	u64 val;
7948 
7949 	if (!param)
7950 		return -EFAULT;
7951 
7952 	val = *param->val;
7953 
7954 	if (cnt > sizeof(buf))
7955 		cnt = sizeof(buf);
7956 
7957 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7958 
7959 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7960 }
7961 
7962 const struct file_operations trace_min_max_fops = {
7963 	.open		= tracing_open_generic,
7964 	.read		= trace_min_max_read,
7965 	.write		= trace_min_max_write,
7966 };
7967 
7968 #define TRACING_LOG_ERRS_MAX	8
7969 #define TRACING_LOG_LOC_MAX	128
7970 
7971 #define CMD_PREFIX "  Command: "
7972 
7973 struct err_info {
7974 	const char	**errs;	/* ptr to loc-specific array of err strings */
7975 	u8		type;	/* index into errs -> specific err string */
7976 	u16		pos;	/* caret position */
7977 	u64		ts;
7978 };
7979 
7980 struct tracing_log_err {
7981 	struct list_head	list;
7982 	struct err_info		info;
7983 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7984 	char			*cmd;                     /* what caused err */
7985 };
7986 
7987 static DEFINE_MUTEX(tracing_err_log_lock);
7988 
7989 static struct tracing_log_err *alloc_tracing_log_err(int len)
7990 {
7991 	struct tracing_log_err *err;
7992 
7993 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7994 	if (!err)
7995 		return ERR_PTR(-ENOMEM);
7996 
7997 	err->cmd = kzalloc(len, GFP_KERNEL);
7998 	if (!err->cmd) {
7999 		kfree(err);
8000 		return ERR_PTR(-ENOMEM);
8001 	}
8002 
8003 	return err;
8004 }
8005 
8006 static void free_tracing_log_err(struct tracing_log_err *err)
8007 {
8008 	kfree(err->cmd);
8009 	kfree(err);
8010 }
8011 
8012 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8013 						   int len)
8014 {
8015 	struct tracing_log_err *err;
8016 	char *cmd;
8017 
8018 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8019 		err = alloc_tracing_log_err(len);
8020 		if (PTR_ERR(err) != -ENOMEM)
8021 			tr->n_err_log_entries++;
8022 
8023 		return err;
8024 	}
8025 	cmd = kzalloc(len, GFP_KERNEL);
8026 	if (!cmd)
8027 		return ERR_PTR(-ENOMEM);
8028 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8029 	kfree(err->cmd);
8030 	err->cmd = cmd;
8031 	list_del(&err->list);
8032 
8033 	return err;
8034 }
8035 
8036 /**
8037  * err_pos - find the position of a string within a command for error careting
8038  * @cmd: The tracing command that caused the error
8039  * @str: The string to position the caret at within @cmd
8040  *
8041  * Finds the position of the first occurrence of @str within @cmd.  The
8042  * return value can be passed to tracing_log_err() for caret placement
8043  * within @cmd.
8044  *
8045  * Returns the index within @cmd of the first occurrence of @str or 0
8046  * if @str was not found.
8047  */
8048 unsigned int err_pos(char *cmd, const char *str)
8049 {
8050 	char *found;
8051 
8052 	if (WARN_ON(!strlen(cmd)))
8053 		return 0;
8054 
8055 	found = strstr(cmd, str);
8056 	if (found)
8057 		return found - cmd;
8058 
8059 	return 0;
8060 }
8061 
8062 /**
8063  * tracing_log_err - write an error to the tracing error log
8064  * @tr: The associated trace array for the error (NULL for top level array)
8065  * @loc: A string describing where the error occurred
8066  * @cmd: The tracing command that caused the error
8067  * @errs: The array of loc-specific static error strings
8068  * @type: The index into errs[], which produces the specific static err string
8069  * @pos: The position the caret should be placed in the cmd
8070  *
8071  * Writes an error into tracing/error_log of the form:
8072  *
8073  * <loc>: error: <text>
8074  *   Command: <cmd>
8075  *              ^
8076  *
8077  * tracing/error_log is a small log file containing the last
8078  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8079  * unless there has been a tracing error, and the error log can be
8080  * cleared and have its memory freed by writing the empty string in
8081  * truncation mode to it i.e. echo > tracing/error_log.
8082  *
8083  * NOTE: the @errs array along with the @type param are used to
8084  * produce a static error string - this string is not copied and saved
8085  * when the error is logged - only a pointer to it is saved.  See
8086  * existing callers for examples of how static strings are typically
8087  * defined for use with tracing_log_err().
8088  */
8089 void tracing_log_err(struct trace_array *tr,
8090 		     const char *loc, const char *cmd,
8091 		     const char **errs, u8 type, u16 pos)
8092 {
8093 	struct tracing_log_err *err;
8094 	int len = 0;
8095 
8096 	if (!tr)
8097 		tr = &global_trace;
8098 
8099 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8100 
8101 	mutex_lock(&tracing_err_log_lock);
8102 	err = get_tracing_log_err(tr, len);
8103 	if (PTR_ERR(err) == -ENOMEM) {
8104 		mutex_unlock(&tracing_err_log_lock);
8105 		return;
8106 	}
8107 
8108 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8109 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8110 
8111 	err->info.errs = errs;
8112 	err->info.type = type;
8113 	err->info.pos = pos;
8114 	err->info.ts = local_clock();
8115 
8116 	list_add_tail(&err->list, &tr->err_log);
8117 	mutex_unlock(&tracing_err_log_lock);
8118 }
8119 
8120 static void clear_tracing_err_log(struct trace_array *tr)
8121 {
8122 	struct tracing_log_err *err, *next;
8123 
8124 	mutex_lock(&tracing_err_log_lock);
8125 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8126 		list_del(&err->list);
8127 		free_tracing_log_err(err);
8128 	}
8129 
8130 	tr->n_err_log_entries = 0;
8131 	mutex_unlock(&tracing_err_log_lock);
8132 }
8133 
8134 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8135 {
8136 	struct trace_array *tr = m->private;
8137 
8138 	mutex_lock(&tracing_err_log_lock);
8139 
8140 	return seq_list_start(&tr->err_log, *pos);
8141 }
8142 
8143 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8144 {
8145 	struct trace_array *tr = m->private;
8146 
8147 	return seq_list_next(v, &tr->err_log, pos);
8148 }
8149 
8150 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8151 {
8152 	mutex_unlock(&tracing_err_log_lock);
8153 }
8154 
8155 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8156 {
8157 	u16 i;
8158 
8159 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8160 		seq_putc(m, ' ');
8161 	for (i = 0; i < pos; i++)
8162 		seq_putc(m, ' ');
8163 	seq_puts(m, "^\n");
8164 }
8165 
8166 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8167 {
8168 	struct tracing_log_err *err = v;
8169 
8170 	if (err) {
8171 		const char *err_text = err->info.errs[err->info.type];
8172 		u64 sec = err->info.ts;
8173 		u32 nsec;
8174 
8175 		nsec = do_div(sec, NSEC_PER_SEC);
8176 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8177 			   err->loc, err_text);
8178 		seq_printf(m, "%s", err->cmd);
8179 		tracing_err_log_show_pos(m, err->info.pos);
8180 	}
8181 
8182 	return 0;
8183 }
8184 
8185 static const struct seq_operations tracing_err_log_seq_ops = {
8186 	.start  = tracing_err_log_seq_start,
8187 	.next   = tracing_err_log_seq_next,
8188 	.stop   = tracing_err_log_seq_stop,
8189 	.show   = tracing_err_log_seq_show
8190 };
8191 
8192 static int tracing_err_log_open(struct inode *inode, struct file *file)
8193 {
8194 	struct trace_array *tr = inode->i_private;
8195 	int ret = 0;
8196 
8197 	ret = tracing_check_open_get_tr(tr);
8198 	if (ret)
8199 		return ret;
8200 
8201 	/* If this file was opened for write, then erase contents */
8202 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8203 		clear_tracing_err_log(tr);
8204 
8205 	if (file->f_mode & FMODE_READ) {
8206 		ret = seq_open(file, &tracing_err_log_seq_ops);
8207 		if (!ret) {
8208 			struct seq_file *m = file->private_data;
8209 			m->private = tr;
8210 		} else {
8211 			trace_array_put(tr);
8212 		}
8213 	}
8214 	return ret;
8215 }
8216 
8217 static ssize_t tracing_err_log_write(struct file *file,
8218 				     const char __user *buffer,
8219 				     size_t count, loff_t *ppos)
8220 {
8221 	return count;
8222 }
8223 
8224 static int tracing_err_log_release(struct inode *inode, struct file *file)
8225 {
8226 	struct trace_array *tr = inode->i_private;
8227 
8228 	trace_array_put(tr);
8229 
8230 	if (file->f_mode & FMODE_READ)
8231 		seq_release(inode, file);
8232 
8233 	return 0;
8234 }
8235 
8236 static const struct file_operations tracing_err_log_fops = {
8237 	.open           = tracing_err_log_open,
8238 	.write		= tracing_err_log_write,
8239 	.read           = seq_read,
8240 	.llseek         = tracing_lseek,
8241 	.release        = tracing_err_log_release,
8242 };
8243 
8244 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8245 {
8246 	struct trace_array *tr = inode->i_private;
8247 	struct ftrace_buffer_info *info;
8248 	int ret;
8249 
8250 	ret = tracing_check_open_get_tr(tr);
8251 	if (ret)
8252 		return ret;
8253 
8254 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8255 	if (!info) {
8256 		trace_array_put(tr);
8257 		return -ENOMEM;
8258 	}
8259 
8260 	mutex_lock(&trace_types_lock);
8261 
8262 	info->iter.tr		= tr;
8263 	info->iter.cpu_file	= tracing_get_cpu(inode);
8264 	info->iter.trace	= tr->current_trace;
8265 	info->iter.array_buffer = &tr->array_buffer;
8266 	info->spare		= NULL;
8267 	/* Force reading ring buffer for first read */
8268 	info->read		= (unsigned int)-1;
8269 
8270 	filp->private_data = info;
8271 
8272 	tr->trace_ref++;
8273 
8274 	mutex_unlock(&trace_types_lock);
8275 
8276 	ret = nonseekable_open(inode, filp);
8277 	if (ret < 0)
8278 		trace_array_put(tr);
8279 
8280 	return ret;
8281 }
8282 
8283 static __poll_t
8284 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8285 {
8286 	struct ftrace_buffer_info *info = filp->private_data;
8287 	struct trace_iterator *iter = &info->iter;
8288 
8289 	return trace_poll(iter, filp, poll_table);
8290 }
8291 
8292 static ssize_t
8293 tracing_buffers_read(struct file *filp, char __user *ubuf,
8294 		     size_t count, loff_t *ppos)
8295 {
8296 	struct ftrace_buffer_info *info = filp->private_data;
8297 	struct trace_iterator *iter = &info->iter;
8298 	void *trace_data;
8299 	int page_size;
8300 	ssize_t ret = 0;
8301 	ssize_t size;
8302 
8303 	if (!count)
8304 		return 0;
8305 
8306 #ifdef CONFIG_TRACER_MAX_TRACE
8307 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8308 		return -EBUSY;
8309 #endif
8310 
8311 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8312 
8313 	/* Make sure the spare matches the current sub buffer size */
8314 	if (info->spare) {
8315 		if (page_size != info->spare_size) {
8316 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8317 						   info->spare_cpu, info->spare);
8318 			info->spare = NULL;
8319 		}
8320 	}
8321 
8322 	if (!info->spare) {
8323 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8324 							  iter->cpu_file);
8325 		if (IS_ERR(info->spare)) {
8326 			ret = PTR_ERR(info->spare);
8327 			info->spare = NULL;
8328 		} else {
8329 			info->spare_cpu = iter->cpu_file;
8330 			info->spare_size = page_size;
8331 		}
8332 	}
8333 	if (!info->spare)
8334 		return ret;
8335 
8336 	/* Do we have previous read data to read? */
8337 	if (info->read < page_size)
8338 		goto read;
8339 
8340  again:
8341 	trace_access_lock(iter->cpu_file);
8342 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8343 				    info->spare,
8344 				    count,
8345 				    iter->cpu_file, 0);
8346 	trace_access_unlock(iter->cpu_file);
8347 
8348 	if (ret < 0) {
8349 		if (trace_empty(iter)) {
8350 			if ((filp->f_flags & O_NONBLOCK))
8351 				return -EAGAIN;
8352 
8353 			ret = wait_on_pipe(iter, 0);
8354 			if (ret)
8355 				return ret;
8356 
8357 			goto again;
8358 		}
8359 		return 0;
8360 	}
8361 
8362 	info->read = 0;
8363  read:
8364 	size = page_size - info->read;
8365 	if (size > count)
8366 		size = count;
8367 	trace_data = ring_buffer_read_page_data(info->spare);
8368 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8369 	if (ret == size)
8370 		return -EFAULT;
8371 
8372 	size -= ret;
8373 
8374 	*ppos += size;
8375 	info->read += size;
8376 
8377 	return size;
8378 }
8379 
8380 static int tracing_buffers_release(struct inode *inode, struct file *file)
8381 {
8382 	struct ftrace_buffer_info *info = file->private_data;
8383 	struct trace_iterator *iter = &info->iter;
8384 
8385 	mutex_lock(&trace_types_lock);
8386 
8387 	iter->tr->trace_ref--;
8388 
8389 	__trace_array_put(iter->tr);
8390 
8391 	iter->wait_index++;
8392 	/* Make sure the waiters see the new wait_index */
8393 	smp_wmb();
8394 
8395 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8396 
8397 	if (info->spare)
8398 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8399 					   info->spare_cpu, info->spare);
8400 	kvfree(info);
8401 
8402 	mutex_unlock(&trace_types_lock);
8403 
8404 	return 0;
8405 }
8406 
8407 struct buffer_ref {
8408 	struct trace_buffer	*buffer;
8409 	void			*page;
8410 	int			cpu;
8411 	refcount_t		refcount;
8412 };
8413 
8414 static void buffer_ref_release(struct buffer_ref *ref)
8415 {
8416 	if (!refcount_dec_and_test(&ref->refcount))
8417 		return;
8418 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8419 	kfree(ref);
8420 }
8421 
8422 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8423 				    struct pipe_buffer *buf)
8424 {
8425 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8426 
8427 	buffer_ref_release(ref);
8428 	buf->private = 0;
8429 }
8430 
8431 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8432 				struct pipe_buffer *buf)
8433 {
8434 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8435 
8436 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8437 		return false;
8438 
8439 	refcount_inc(&ref->refcount);
8440 	return true;
8441 }
8442 
8443 /* Pipe buffer operations for a buffer. */
8444 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8445 	.release		= buffer_pipe_buf_release,
8446 	.get			= buffer_pipe_buf_get,
8447 };
8448 
8449 /*
8450  * Callback from splice_to_pipe(), if we need to release some pages
8451  * at the end of the spd in case we error'ed out in filling the pipe.
8452  */
8453 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8454 {
8455 	struct buffer_ref *ref =
8456 		(struct buffer_ref *)spd->partial[i].private;
8457 
8458 	buffer_ref_release(ref);
8459 	spd->partial[i].private = 0;
8460 }
8461 
8462 static ssize_t
8463 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8464 			    struct pipe_inode_info *pipe, size_t len,
8465 			    unsigned int flags)
8466 {
8467 	struct ftrace_buffer_info *info = file->private_data;
8468 	struct trace_iterator *iter = &info->iter;
8469 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8470 	struct page *pages_def[PIPE_DEF_BUFFERS];
8471 	struct splice_pipe_desc spd = {
8472 		.pages		= pages_def,
8473 		.partial	= partial_def,
8474 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8475 		.ops		= &buffer_pipe_buf_ops,
8476 		.spd_release	= buffer_spd_release,
8477 	};
8478 	struct buffer_ref *ref;
8479 	int page_size;
8480 	int entries, i;
8481 	ssize_t ret = 0;
8482 
8483 #ifdef CONFIG_TRACER_MAX_TRACE
8484 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8485 		return -EBUSY;
8486 #endif
8487 
8488 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8489 	if (*ppos & (page_size - 1))
8490 		return -EINVAL;
8491 
8492 	if (len & (page_size - 1)) {
8493 		if (len < page_size)
8494 			return -EINVAL;
8495 		len &= (~(page_size - 1));
8496 	}
8497 
8498 	if (splice_grow_spd(pipe, &spd))
8499 		return -ENOMEM;
8500 
8501  again:
8502 	trace_access_lock(iter->cpu_file);
8503 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8504 
8505 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8506 		struct page *page;
8507 		int r;
8508 
8509 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8510 		if (!ref) {
8511 			ret = -ENOMEM;
8512 			break;
8513 		}
8514 
8515 		refcount_set(&ref->refcount, 1);
8516 		ref->buffer = iter->array_buffer->buffer;
8517 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8518 		if (IS_ERR(ref->page)) {
8519 			ret = PTR_ERR(ref->page);
8520 			ref->page = NULL;
8521 			kfree(ref);
8522 			break;
8523 		}
8524 		ref->cpu = iter->cpu_file;
8525 
8526 		r = ring_buffer_read_page(ref->buffer, ref->page,
8527 					  len, iter->cpu_file, 1);
8528 		if (r < 0) {
8529 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8530 						   ref->page);
8531 			kfree(ref);
8532 			break;
8533 		}
8534 
8535 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8536 
8537 		spd.pages[i] = page;
8538 		spd.partial[i].len = page_size;
8539 		spd.partial[i].offset = 0;
8540 		spd.partial[i].private = (unsigned long)ref;
8541 		spd.nr_pages++;
8542 		*ppos += page_size;
8543 
8544 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8545 	}
8546 
8547 	trace_access_unlock(iter->cpu_file);
8548 	spd.nr_pages = i;
8549 
8550 	/* did we read anything? */
8551 	if (!spd.nr_pages) {
8552 		long wait_index;
8553 
8554 		if (ret)
8555 			goto out;
8556 
8557 		ret = -EAGAIN;
8558 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8559 			goto out;
8560 
8561 		wait_index = READ_ONCE(iter->wait_index);
8562 
8563 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8564 		if (ret)
8565 			goto out;
8566 
8567 		/* No need to wait after waking up when tracing is off */
8568 		if (!tracer_tracing_is_on(iter->tr))
8569 			goto out;
8570 
8571 		/* Make sure we see the new wait_index */
8572 		smp_rmb();
8573 		if (wait_index != iter->wait_index)
8574 			goto out;
8575 
8576 		goto again;
8577 	}
8578 
8579 	ret = splice_to_pipe(pipe, &spd);
8580 out:
8581 	splice_shrink_spd(&spd);
8582 
8583 	return ret;
8584 }
8585 
8586 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8587 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8588 {
8589 	struct ftrace_buffer_info *info = file->private_data;
8590 	struct trace_iterator *iter = &info->iter;
8591 
8592 	if (cmd)
8593 		return -ENOIOCTLCMD;
8594 
8595 	mutex_lock(&trace_types_lock);
8596 
8597 	iter->wait_index++;
8598 	/* Make sure the waiters see the new wait_index */
8599 	smp_wmb();
8600 
8601 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8602 
8603 	mutex_unlock(&trace_types_lock);
8604 	return 0;
8605 }
8606 
8607 static const struct file_operations tracing_buffers_fops = {
8608 	.open		= tracing_buffers_open,
8609 	.read		= tracing_buffers_read,
8610 	.poll		= tracing_buffers_poll,
8611 	.release	= tracing_buffers_release,
8612 	.splice_read	= tracing_buffers_splice_read,
8613 	.unlocked_ioctl = tracing_buffers_ioctl,
8614 	.llseek		= no_llseek,
8615 };
8616 
8617 static ssize_t
8618 tracing_stats_read(struct file *filp, char __user *ubuf,
8619 		   size_t count, loff_t *ppos)
8620 {
8621 	struct inode *inode = file_inode(filp);
8622 	struct trace_array *tr = inode->i_private;
8623 	struct array_buffer *trace_buf = &tr->array_buffer;
8624 	int cpu = tracing_get_cpu(inode);
8625 	struct trace_seq *s;
8626 	unsigned long cnt;
8627 	unsigned long long t;
8628 	unsigned long usec_rem;
8629 
8630 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8631 	if (!s)
8632 		return -ENOMEM;
8633 
8634 	trace_seq_init(s);
8635 
8636 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8637 	trace_seq_printf(s, "entries: %ld\n", cnt);
8638 
8639 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8640 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8641 
8642 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8643 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8644 
8645 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8646 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8647 
8648 	if (trace_clocks[tr->clock_id].in_ns) {
8649 		/* local or global for trace_clock */
8650 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8651 		usec_rem = do_div(t, USEC_PER_SEC);
8652 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8653 								t, usec_rem);
8654 
8655 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8656 		usec_rem = do_div(t, USEC_PER_SEC);
8657 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8658 	} else {
8659 		/* counter or tsc mode for trace_clock */
8660 		trace_seq_printf(s, "oldest event ts: %llu\n",
8661 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8662 
8663 		trace_seq_printf(s, "now ts: %llu\n",
8664 				ring_buffer_time_stamp(trace_buf->buffer));
8665 	}
8666 
8667 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8668 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8669 
8670 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8671 	trace_seq_printf(s, "read events: %ld\n", cnt);
8672 
8673 	count = simple_read_from_buffer(ubuf, count, ppos,
8674 					s->buffer, trace_seq_used(s));
8675 
8676 	kfree(s);
8677 
8678 	return count;
8679 }
8680 
8681 static const struct file_operations tracing_stats_fops = {
8682 	.open		= tracing_open_generic_tr,
8683 	.read		= tracing_stats_read,
8684 	.llseek		= generic_file_llseek,
8685 	.release	= tracing_release_generic_tr,
8686 };
8687 
8688 #ifdef CONFIG_DYNAMIC_FTRACE
8689 
8690 static ssize_t
8691 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8692 		  size_t cnt, loff_t *ppos)
8693 {
8694 	ssize_t ret;
8695 	char *buf;
8696 	int r;
8697 
8698 	/* 256 should be plenty to hold the amount needed */
8699 	buf = kmalloc(256, GFP_KERNEL);
8700 	if (!buf)
8701 		return -ENOMEM;
8702 
8703 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8704 		      ftrace_update_tot_cnt,
8705 		      ftrace_number_of_pages,
8706 		      ftrace_number_of_groups);
8707 
8708 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8709 	kfree(buf);
8710 	return ret;
8711 }
8712 
8713 static const struct file_operations tracing_dyn_info_fops = {
8714 	.open		= tracing_open_generic,
8715 	.read		= tracing_read_dyn_info,
8716 	.llseek		= generic_file_llseek,
8717 };
8718 #endif /* CONFIG_DYNAMIC_FTRACE */
8719 
8720 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8721 static void
8722 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8723 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8724 		void *data)
8725 {
8726 	tracing_snapshot_instance(tr);
8727 }
8728 
8729 static void
8730 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8731 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8732 		      void *data)
8733 {
8734 	struct ftrace_func_mapper *mapper = data;
8735 	long *count = NULL;
8736 
8737 	if (mapper)
8738 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8739 
8740 	if (count) {
8741 
8742 		if (*count <= 0)
8743 			return;
8744 
8745 		(*count)--;
8746 	}
8747 
8748 	tracing_snapshot_instance(tr);
8749 }
8750 
8751 static int
8752 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8753 		      struct ftrace_probe_ops *ops, void *data)
8754 {
8755 	struct ftrace_func_mapper *mapper = data;
8756 	long *count = NULL;
8757 
8758 	seq_printf(m, "%ps:", (void *)ip);
8759 
8760 	seq_puts(m, "snapshot");
8761 
8762 	if (mapper)
8763 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8764 
8765 	if (count)
8766 		seq_printf(m, ":count=%ld\n", *count);
8767 	else
8768 		seq_puts(m, ":unlimited\n");
8769 
8770 	return 0;
8771 }
8772 
8773 static int
8774 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8775 		     unsigned long ip, void *init_data, void **data)
8776 {
8777 	struct ftrace_func_mapper *mapper = *data;
8778 
8779 	if (!mapper) {
8780 		mapper = allocate_ftrace_func_mapper();
8781 		if (!mapper)
8782 			return -ENOMEM;
8783 		*data = mapper;
8784 	}
8785 
8786 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8787 }
8788 
8789 static void
8790 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8791 		     unsigned long ip, void *data)
8792 {
8793 	struct ftrace_func_mapper *mapper = data;
8794 
8795 	if (!ip) {
8796 		if (!mapper)
8797 			return;
8798 		free_ftrace_func_mapper(mapper, NULL);
8799 		return;
8800 	}
8801 
8802 	ftrace_func_mapper_remove_ip(mapper, ip);
8803 }
8804 
8805 static struct ftrace_probe_ops snapshot_probe_ops = {
8806 	.func			= ftrace_snapshot,
8807 	.print			= ftrace_snapshot_print,
8808 };
8809 
8810 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8811 	.func			= ftrace_count_snapshot,
8812 	.print			= ftrace_snapshot_print,
8813 	.init			= ftrace_snapshot_init,
8814 	.free			= ftrace_snapshot_free,
8815 };
8816 
8817 static int
8818 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8819 			       char *glob, char *cmd, char *param, int enable)
8820 {
8821 	struct ftrace_probe_ops *ops;
8822 	void *count = (void *)-1;
8823 	char *number;
8824 	int ret;
8825 
8826 	if (!tr)
8827 		return -ENODEV;
8828 
8829 	/* hash funcs only work with set_ftrace_filter */
8830 	if (!enable)
8831 		return -EINVAL;
8832 
8833 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8834 
8835 	if (glob[0] == '!')
8836 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8837 
8838 	if (!param)
8839 		goto out_reg;
8840 
8841 	number = strsep(&param, ":");
8842 
8843 	if (!strlen(number))
8844 		goto out_reg;
8845 
8846 	/*
8847 	 * We use the callback data field (which is a pointer)
8848 	 * as our counter.
8849 	 */
8850 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8851 	if (ret)
8852 		return ret;
8853 
8854  out_reg:
8855 	ret = tracing_alloc_snapshot_instance(tr);
8856 	if (ret < 0)
8857 		goto out;
8858 
8859 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8860 
8861  out:
8862 	return ret < 0 ? ret : 0;
8863 }
8864 
8865 static struct ftrace_func_command ftrace_snapshot_cmd = {
8866 	.name			= "snapshot",
8867 	.func			= ftrace_trace_snapshot_callback,
8868 };
8869 
8870 static __init int register_snapshot_cmd(void)
8871 {
8872 	return register_ftrace_command(&ftrace_snapshot_cmd);
8873 }
8874 #else
8875 static inline __init int register_snapshot_cmd(void) { return 0; }
8876 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8877 
8878 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8879 {
8880 	if (WARN_ON(!tr->dir))
8881 		return ERR_PTR(-ENODEV);
8882 
8883 	/* Top directory uses NULL as the parent */
8884 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8885 		return NULL;
8886 
8887 	/* All sub buffers have a descriptor */
8888 	return tr->dir;
8889 }
8890 
8891 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8892 {
8893 	struct dentry *d_tracer;
8894 
8895 	if (tr->percpu_dir)
8896 		return tr->percpu_dir;
8897 
8898 	d_tracer = tracing_get_dentry(tr);
8899 	if (IS_ERR(d_tracer))
8900 		return NULL;
8901 
8902 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8903 
8904 	MEM_FAIL(!tr->percpu_dir,
8905 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8906 
8907 	return tr->percpu_dir;
8908 }
8909 
8910 static struct dentry *
8911 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8912 		      void *data, long cpu, const struct file_operations *fops)
8913 {
8914 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8915 
8916 	if (ret) /* See tracing_get_cpu() */
8917 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8918 	return ret;
8919 }
8920 
8921 static void
8922 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8923 {
8924 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8925 	struct dentry *d_cpu;
8926 	char cpu_dir[30]; /* 30 characters should be more than enough */
8927 
8928 	if (!d_percpu)
8929 		return;
8930 
8931 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8932 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8933 	if (!d_cpu) {
8934 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8935 		return;
8936 	}
8937 
8938 	/* per cpu trace_pipe */
8939 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8940 				tr, cpu, &tracing_pipe_fops);
8941 
8942 	/* per cpu trace */
8943 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8944 				tr, cpu, &tracing_fops);
8945 
8946 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8947 				tr, cpu, &tracing_buffers_fops);
8948 
8949 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8950 				tr, cpu, &tracing_stats_fops);
8951 
8952 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8953 				tr, cpu, &tracing_entries_fops);
8954 
8955 #ifdef CONFIG_TRACER_SNAPSHOT
8956 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8957 				tr, cpu, &snapshot_fops);
8958 
8959 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8960 				tr, cpu, &snapshot_raw_fops);
8961 #endif
8962 }
8963 
8964 #ifdef CONFIG_FTRACE_SELFTEST
8965 /* Let selftest have access to static functions in this file */
8966 #include "trace_selftest.c"
8967 #endif
8968 
8969 static ssize_t
8970 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8971 			loff_t *ppos)
8972 {
8973 	struct trace_option_dentry *topt = filp->private_data;
8974 	char *buf;
8975 
8976 	if (topt->flags->val & topt->opt->bit)
8977 		buf = "1\n";
8978 	else
8979 		buf = "0\n";
8980 
8981 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8982 }
8983 
8984 static ssize_t
8985 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8986 			 loff_t *ppos)
8987 {
8988 	struct trace_option_dentry *topt = filp->private_data;
8989 	unsigned long val;
8990 	int ret;
8991 
8992 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8993 	if (ret)
8994 		return ret;
8995 
8996 	if (val != 0 && val != 1)
8997 		return -EINVAL;
8998 
8999 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9000 		mutex_lock(&trace_types_lock);
9001 		ret = __set_tracer_option(topt->tr, topt->flags,
9002 					  topt->opt, !val);
9003 		mutex_unlock(&trace_types_lock);
9004 		if (ret)
9005 			return ret;
9006 	}
9007 
9008 	*ppos += cnt;
9009 
9010 	return cnt;
9011 }
9012 
9013 static int tracing_open_options(struct inode *inode, struct file *filp)
9014 {
9015 	struct trace_option_dentry *topt = inode->i_private;
9016 	int ret;
9017 
9018 	ret = tracing_check_open_get_tr(topt->tr);
9019 	if (ret)
9020 		return ret;
9021 
9022 	filp->private_data = inode->i_private;
9023 	return 0;
9024 }
9025 
9026 static int tracing_release_options(struct inode *inode, struct file *file)
9027 {
9028 	struct trace_option_dentry *topt = file->private_data;
9029 
9030 	trace_array_put(topt->tr);
9031 	return 0;
9032 }
9033 
9034 static const struct file_operations trace_options_fops = {
9035 	.open = tracing_open_options,
9036 	.read = trace_options_read,
9037 	.write = trace_options_write,
9038 	.llseek	= generic_file_llseek,
9039 	.release = tracing_release_options,
9040 };
9041 
9042 /*
9043  * In order to pass in both the trace_array descriptor as well as the index
9044  * to the flag that the trace option file represents, the trace_array
9045  * has a character array of trace_flags_index[], which holds the index
9046  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9047  * The address of this character array is passed to the flag option file
9048  * read/write callbacks.
9049  *
9050  * In order to extract both the index and the trace_array descriptor,
9051  * get_tr_index() uses the following algorithm.
9052  *
9053  *   idx = *ptr;
9054  *
9055  * As the pointer itself contains the address of the index (remember
9056  * index[1] == 1).
9057  *
9058  * Then to get the trace_array descriptor, by subtracting that index
9059  * from the ptr, we get to the start of the index itself.
9060  *
9061  *   ptr - idx == &index[0]
9062  *
9063  * Then a simple container_of() from that pointer gets us to the
9064  * trace_array descriptor.
9065  */
9066 static void get_tr_index(void *data, struct trace_array **ptr,
9067 			 unsigned int *pindex)
9068 {
9069 	*pindex = *(unsigned char *)data;
9070 
9071 	*ptr = container_of(data - *pindex, struct trace_array,
9072 			    trace_flags_index);
9073 }
9074 
9075 static ssize_t
9076 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9077 			loff_t *ppos)
9078 {
9079 	void *tr_index = filp->private_data;
9080 	struct trace_array *tr;
9081 	unsigned int index;
9082 	char *buf;
9083 
9084 	get_tr_index(tr_index, &tr, &index);
9085 
9086 	if (tr->trace_flags & (1 << index))
9087 		buf = "1\n";
9088 	else
9089 		buf = "0\n";
9090 
9091 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9092 }
9093 
9094 static ssize_t
9095 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9096 			 loff_t *ppos)
9097 {
9098 	void *tr_index = filp->private_data;
9099 	struct trace_array *tr;
9100 	unsigned int index;
9101 	unsigned long val;
9102 	int ret;
9103 
9104 	get_tr_index(tr_index, &tr, &index);
9105 
9106 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9107 	if (ret)
9108 		return ret;
9109 
9110 	if (val != 0 && val != 1)
9111 		return -EINVAL;
9112 
9113 	mutex_lock(&event_mutex);
9114 	mutex_lock(&trace_types_lock);
9115 	ret = set_tracer_flag(tr, 1 << index, val);
9116 	mutex_unlock(&trace_types_lock);
9117 	mutex_unlock(&event_mutex);
9118 
9119 	if (ret < 0)
9120 		return ret;
9121 
9122 	*ppos += cnt;
9123 
9124 	return cnt;
9125 }
9126 
9127 static const struct file_operations trace_options_core_fops = {
9128 	.open = tracing_open_generic,
9129 	.read = trace_options_core_read,
9130 	.write = trace_options_core_write,
9131 	.llseek = generic_file_llseek,
9132 };
9133 
9134 struct dentry *trace_create_file(const char *name,
9135 				 umode_t mode,
9136 				 struct dentry *parent,
9137 				 void *data,
9138 				 const struct file_operations *fops)
9139 {
9140 	struct dentry *ret;
9141 
9142 	ret = tracefs_create_file(name, mode, parent, data, fops);
9143 	if (!ret)
9144 		pr_warn("Could not create tracefs '%s' entry\n", name);
9145 
9146 	return ret;
9147 }
9148 
9149 
9150 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9151 {
9152 	struct dentry *d_tracer;
9153 
9154 	if (tr->options)
9155 		return tr->options;
9156 
9157 	d_tracer = tracing_get_dentry(tr);
9158 	if (IS_ERR(d_tracer))
9159 		return NULL;
9160 
9161 	tr->options = tracefs_create_dir("options", d_tracer);
9162 	if (!tr->options) {
9163 		pr_warn("Could not create tracefs directory 'options'\n");
9164 		return NULL;
9165 	}
9166 
9167 	return tr->options;
9168 }
9169 
9170 static void
9171 create_trace_option_file(struct trace_array *tr,
9172 			 struct trace_option_dentry *topt,
9173 			 struct tracer_flags *flags,
9174 			 struct tracer_opt *opt)
9175 {
9176 	struct dentry *t_options;
9177 
9178 	t_options = trace_options_init_dentry(tr);
9179 	if (!t_options)
9180 		return;
9181 
9182 	topt->flags = flags;
9183 	topt->opt = opt;
9184 	topt->tr = tr;
9185 
9186 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9187 					t_options, topt, &trace_options_fops);
9188 
9189 }
9190 
9191 static void
9192 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9193 {
9194 	struct trace_option_dentry *topts;
9195 	struct trace_options *tr_topts;
9196 	struct tracer_flags *flags;
9197 	struct tracer_opt *opts;
9198 	int cnt;
9199 	int i;
9200 
9201 	if (!tracer)
9202 		return;
9203 
9204 	flags = tracer->flags;
9205 
9206 	if (!flags || !flags->opts)
9207 		return;
9208 
9209 	/*
9210 	 * If this is an instance, only create flags for tracers
9211 	 * the instance may have.
9212 	 */
9213 	if (!trace_ok_for_array(tracer, tr))
9214 		return;
9215 
9216 	for (i = 0; i < tr->nr_topts; i++) {
9217 		/* Make sure there's no duplicate flags. */
9218 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9219 			return;
9220 	}
9221 
9222 	opts = flags->opts;
9223 
9224 	for (cnt = 0; opts[cnt].name; cnt++)
9225 		;
9226 
9227 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9228 	if (!topts)
9229 		return;
9230 
9231 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9232 			    GFP_KERNEL);
9233 	if (!tr_topts) {
9234 		kfree(topts);
9235 		return;
9236 	}
9237 
9238 	tr->topts = tr_topts;
9239 	tr->topts[tr->nr_topts].tracer = tracer;
9240 	tr->topts[tr->nr_topts].topts = topts;
9241 	tr->nr_topts++;
9242 
9243 	for (cnt = 0; opts[cnt].name; cnt++) {
9244 		create_trace_option_file(tr, &topts[cnt], flags,
9245 					 &opts[cnt]);
9246 		MEM_FAIL(topts[cnt].entry == NULL,
9247 			  "Failed to create trace option: %s",
9248 			  opts[cnt].name);
9249 	}
9250 }
9251 
9252 static struct dentry *
9253 create_trace_option_core_file(struct trace_array *tr,
9254 			      const char *option, long index)
9255 {
9256 	struct dentry *t_options;
9257 
9258 	t_options = trace_options_init_dentry(tr);
9259 	if (!t_options)
9260 		return NULL;
9261 
9262 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9263 				 (void *)&tr->trace_flags_index[index],
9264 				 &trace_options_core_fops);
9265 }
9266 
9267 static void create_trace_options_dir(struct trace_array *tr)
9268 {
9269 	struct dentry *t_options;
9270 	bool top_level = tr == &global_trace;
9271 	int i;
9272 
9273 	t_options = trace_options_init_dentry(tr);
9274 	if (!t_options)
9275 		return;
9276 
9277 	for (i = 0; trace_options[i]; i++) {
9278 		if (top_level ||
9279 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9280 			create_trace_option_core_file(tr, trace_options[i], i);
9281 	}
9282 }
9283 
9284 static ssize_t
9285 rb_simple_read(struct file *filp, char __user *ubuf,
9286 	       size_t cnt, loff_t *ppos)
9287 {
9288 	struct trace_array *tr = filp->private_data;
9289 	char buf[64];
9290 	int r;
9291 
9292 	r = tracer_tracing_is_on(tr);
9293 	r = sprintf(buf, "%d\n", r);
9294 
9295 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9296 }
9297 
9298 static ssize_t
9299 rb_simple_write(struct file *filp, const char __user *ubuf,
9300 		size_t cnt, loff_t *ppos)
9301 {
9302 	struct trace_array *tr = filp->private_data;
9303 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9304 	unsigned long val;
9305 	int ret;
9306 
9307 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9308 	if (ret)
9309 		return ret;
9310 
9311 	if (buffer) {
9312 		mutex_lock(&trace_types_lock);
9313 		if (!!val == tracer_tracing_is_on(tr)) {
9314 			val = 0; /* do nothing */
9315 		} else if (val) {
9316 			tracer_tracing_on(tr);
9317 			if (tr->current_trace->start)
9318 				tr->current_trace->start(tr);
9319 		} else {
9320 			tracer_tracing_off(tr);
9321 			if (tr->current_trace->stop)
9322 				tr->current_trace->stop(tr);
9323 			/* Wake up any waiters */
9324 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9325 		}
9326 		mutex_unlock(&trace_types_lock);
9327 	}
9328 
9329 	(*ppos)++;
9330 
9331 	return cnt;
9332 }
9333 
9334 static const struct file_operations rb_simple_fops = {
9335 	.open		= tracing_open_generic_tr,
9336 	.read		= rb_simple_read,
9337 	.write		= rb_simple_write,
9338 	.release	= tracing_release_generic_tr,
9339 	.llseek		= default_llseek,
9340 };
9341 
9342 static ssize_t
9343 buffer_percent_read(struct file *filp, char __user *ubuf,
9344 		    size_t cnt, loff_t *ppos)
9345 {
9346 	struct trace_array *tr = filp->private_data;
9347 	char buf[64];
9348 	int r;
9349 
9350 	r = tr->buffer_percent;
9351 	r = sprintf(buf, "%d\n", r);
9352 
9353 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9354 }
9355 
9356 static ssize_t
9357 buffer_percent_write(struct file *filp, const char __user *ubuf,
9358 		     size_t cnt, loff_t *ppos)
9359 {
9360 	struct trace_array *tr = filp->private_data;
9361 	unsigned long val;
9362 	int ret;
9363 
9364 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9365 	if (ret)
9366 		return ret;
9367 
9368 	if (val > 100)
9369 		return -EINVAL;
9370 
9371 	tr->buffer_percent = val;
9372 
9373 	(*ppos)++;
9374 
9375 	return cnt;
9376 }
9377 
9378 static const struct file_operations buffer_percent_fops = {
9379 	.open		= tracing_open_generic_tr,
9380 	.read		= buffer_percent_read,
9381 	.write		= buffer_percent_write,
9382 	.release	= tracing_release_generic_tr,
9383 	.llseek		= default_llseek,
9384 };
9385 
9386 static ssize_t
9387 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9388 {
9389 	struct trace_array *tr = filp->private_data;
9390 	size_t size;
9391 	char buf[64];
9392 	int order;
9393 	int r;
9394 
9395 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9396 	size = (PAGE_SIZE << order) / 1024;
9397 
9398 	r = sprintf(buf, "%zd\n", size);
9399 
9400 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9401 }
9402 
9403 static ssize_t
9404 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9405 			 size_t cnt, loff_t *ppos)
9406 {
9407 	struct trace_array *tr = filp->private_data;
9408 	unsigned long val;
9409 	int old_order;
9410 	int order;
9411 	int pages;
9412 	int ret;
9413 
9414 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9415 	if (ret)
9416 		return ret;
9417 
9418 	val *= 1024; /* value passed in is in KB */
9419 
9420 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9421 	order = fls(pages - 1);
9422 
9423 	/* limit between 1 and 128 system pages */
9424 	if (order < 0 || order > 7)
9425 		return -EINVAL;
9426 
9427 	/* Do not allow tracing while changing the order of the ring buffer */
9428 	tracing_stop_tr(tr);
9429 
9430 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9431 	if (old_order == order)
9432 		goto out;
9433 
9434 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9435 	if (ret)
9436 		goto out;
9437 
9438 #ifdef CONFIG_TRACER_MAX_TRACE
9439 
9440 	if (!tr->allocated_snapshot)
9441 		goto out_max;
9442 
9443 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9444 	if (ret) {
9445 		/* Put back the old order */
9446 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9447 		if (WARN_ON_ONCE(cnt)) {
9448 			/*
9449 			 * AARGH! We are left with different orders!
9450 			 * The max buffer is our "snapshot" buffer.
9451 			 * When a tracer needs a snapshot (one of the
9452 			 * latency tracers), it swaps the max buffer
9453 			 * with the saved snap shot. We succeeded to
9454 			 * update the order of the main buffer, but failed to
9455 			 * update the order of the max buffer. But when we tried
9456 			 * to reset the main buffer to the original size, we
9457 			 * failed there too. This is very unlikely to
9458 			 * happen, but if it does, warn and kill all
9459 			 * tracing.
9460 			 */
9461 			tracing_disabled = 1;
9462 		}
9463 		goto out;
9464 	}
9465  out_max:
9466 #endif
9467 	(*ppos)++;
9468  out:
9469 	if (ret)
9470 		cnt = ret;
9471 	tracing_start_tr(tr);
9472 	return cnt;
9473 }
9474 
9475 static const struct file_operations buffer_subbuf_size_fops = {
9476 	.open		= tracing_open_generic_tr,
9477 	.read		= buffer_subbuf_size_read,
9478 	.write		= buffer_subbuf_size_write,
9479 	.release	= tracing_release_generic_tr,
9480 	.llseek		= default_llseek,
9481 };
9482 
9483 static struct dentry *trace_instance_dir;
9484 
9485 static void
9486 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9487 
9488 static int
9489 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9490 {
9491 	enum ring_buffer_flags rb_flags;
9492 
9493 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9494 
9495 	buf->tr = tr;
9496 
9497 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9498 	if (!buf->buffer)
9499 		return -ENOMEM;
9500 
9501 	buf->data = alloc_percpu(struct trace_array_cpu);
9502 	if (!buf->data) {
9503 		ring_buffer_free(buf->buffer);
9504 		buf->buffer = NULL;
9505 		return -ENOMEM;
9506 	}
9507 
9508 	/* Allocate the first page for all buffers */
9509 	set_buffer_entries(&tr->array_buffer,
9510 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9511 
9512 	return 0;
9513 }
9514 
9515 static void free_trace_buffer(struct array_buffer *buf)
9516 {
9517 	if (buf->buffer) {
9518 		ring_buffer_free(buf->buffer);
9519 		buf->buffer = NULL;
9520 		free_percpu(buf->data);
9521 		buf->data = NULL;
9522 	}
9523 }
9524 
9525 static int allocate_trace_buffers(struct trace_array *tr, int size)
9526 {
9527 	int ret;
9528 
9529 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9530 	if (ret)
9531 		return ret;
9532 
9533 #ifdef CONFIG_TRACER_MAX_TRACE
9534 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9535 				    allocate_snapshot ? size : 1);
9536 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9537 		free_trace_buffer(&tr->array_buffer);
9538 		return -ENOMEM;
9539 	}
9540 	tr->allocated_snapshot = allocate_snapshot;
9541 
9542 	allocate_snapshot = false;
9543 #endif
9544 
9545 	return 0;
9546 }
9547 
9548 static void free_trace_buffers(struct trace_array *tr)
9549 {
9550 	if (!tr)
9551 		return;
9552 
9553 	free_trace_buffer(&tr->array_buffer);
9554 
9555 #ifdef CONFIG_TRACER_MAX_TRACE
9556 	free_trace_buffer(&tr->max_buffer);
9557 #endif
9558 }
9559 
9560 static void init_trace_flags_index(struct trace_array *tr)
9561 {
9562 	int i;
9563 
9564 	/* Used by the trace options files */
9565 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9566 		tr->trace_flags_index[i] = i;
9567 }
9568 
9569 static void __update_tracer_options(struct trace_array *tr)
9570 {
9571 	struct tracer *t;
9572 
9573 	for (t = trace_types; t; t = t->next)
9574 		add_tracer_options(tr, t);
9575 }
9576 
9577 static void update_tracer_options(struct trace_array *tr)
9578 {
9579 	mutex_lock(&trace_types_lock);
9580 	tracer_options_updated = true;
9581 	__update_tracer_options(tr);
9582 	mutex_unlock(&trace_types_lock);
9583 }
9584 
9585 /* Must have trace_types_lock held */
9586 struct trace_array *trace_array_find(const char *instance)
9587 {
9588 	struct trace_array *tr, *found = NULL;
9589 
9590 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9591 		if (tr->name && strcmp(tr->name, instance) == 0) {
9592 			found = tr;
9593 			break;
9594 		}
9595 	}
9596 
9597 	return found;
9598 }
9599 
9600 struct trace_array *trace_array_find_get(const char *instance)
9601 {
9602 	struct trace_array *tr;
9603 
9604 	mutex_lock(&trace_types_lock);
9605 	tr = trace_array_find(instance);
9606 	if (tr)
9607 		tr->ref++;
9608 	mutex_unlock(&trace_types_lock);
9609 
9610 	return tr;
9611 }
9612 
9613 static int trace_array_create_dir(struct trace_array *tr)
9614 {
9615 	int ret;
9616 
9617 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9618 	if (!tr->dir)
9619 		return -EINVAL;
9620 
9621 	ret = event_trace_add_tracer(tr->dir, tr);
9622 	if (ret) {
9623 		tracefs_remove(tr->dir);
9624 		return ret;
9625 	}
9626 
9627 	init_tracer_tracefs(tr, tr->dir);
9628 	__update_tracer_options(tr);
9629 
9630 	return ret;
9631 }
9632 
9633 static struct trace_array *
9634 trace_array_create_systems(const char *name, const char *systems)
9635 {
9636 	struct trace_array *tr;
9637 	int ret;
9638 
9639 	ret = -ENOMEM;
9640 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9641 	if (!tr)
9642 		return ERR_PTR(ret);
9643 
9644 	tr->name = kstrdup(name, GFP_KERNEL);
9645 	if (!tr->name)
9646 		goto out_free_tr;
9647 
9648 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9649 		goto out_free_tr;
9650 
9651 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9652 		goto out_free_tr;
9653 
9654 	if (systems) {
9655 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9656 		if (!tr->system_names)
9657 			goto out_free_tr;
9658 	}
9659 
9660 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9661 
9662 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9663 
9664 	raw_spin_lock_init(&tr->start_lock);
9665 
9666 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9667 
9668 	tr->current_trace = &nop_trace;
9669 
9670 	INIT_LIST_HEAD(&tr->systems);
9671 	INIT_LIST_HEAD(&tr->events);
9672 	INIT_LIST_HEAD(&tr->hist_vars);
9673 	INIT_LIST_HEAD(&tr->err_log);
9674 
9675 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9676 		goto out_free_tr;
9677 
9678 	/* The ring buffer is defaultly expanded */
9679 	trace_set_ring_buffer_expanded(tr);
9680 
9681 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9682 		goto out_free_tr;
9683 
9684 	ftrace_init_trace_array(tr);
9685 
9686 	init_trace_flags_index(tr);
9687 
9688 	if (trace_instance_dir) {
9689 		ret = trace_array_create_dir(tr);
9690 		if (ret)
9691 			goto out_free_tr;
9692 	} else
9693 		__trace_early_add_events(tr);
9694 
9695 	list_add(&tr->list, &ftrace_trace_arrays);
9696 
9697 	tr->ref++;
9698 
9699 	return tr;
9700 
9701  out_free_tr:
9702 	ftrace_free_ftrace_ops(tr);
9703 	free_trace_buffers(tr);
9704 	free_cpumask_var(tr->pipe_cpumask);
9705 	free_cpumask_var(tr->tracing_cpumask);
9706 	kfree_const(tr->system_names);
9707 	kfree(tr->name);
9708 	kfree(tr);
9709 
9710 	return ERR_PTR(ret);
9711 }
9712 
9713 static struct trace_array *trace_array_create(const char *name)
9714 {
9715 	return trace_array_create_systems(name, NULL);
9716 }
9717 
9718 static int instance_mkdir(const char *name)
9719 {
9720 	struct trace_array *tr;
9721 	int ret;
9722 
9723 	mutex_lock(&event_mutex);
9724 	mutex_lock(&trace_types_lock);
9725 
9726 	ret = -EEXIST;
9727 	if (trace_array_find(name))
9728 		goto out_unlock;
9729 
9730 	tr = trace_array_create(name);
9731 
9732 	ret = PTR_ERR_OR_ZERO(tr);
9733 
9734 out_unlock:
9735 	mutex_unlock(&trace_types_lock);
9736 	mutex_unlock(&event_mutex);
9737 	return ret;
9738 }
9739 
9740 /**
9741  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9742  * @name: The name of the trace array to be looked up/created.
9743  * @systems: A list of systems to create event directories for (NULL for all)
9744  *
9745  * Returns pointer to trace array with given name.
9746  * NULL, if it cannot be created.
9747  *
9748  * NOTE: This function increments the reference counter associated with the
9749  * trace array returned. This makes sure it cannot be freed while in use.
9750  * Use trace_array_put() once the trace array is no longer needed.
9751  * If the trace_array is to be freed, trace_array_destroy() needs to
9752  * be called after the trace_array_put(), or simply let user space delete
9753  * it from the tracefs instances directory. But until the
9754  * trace_array_put() is called, user space can not delete it.
9755  *
9756  */
9757 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9758 {
9759 	struct trace_array *tr;
9760 
9761 	mutex_lock(&event_mutex);
9762 	mutex_lock(&trace_types_lock);
9763 
9764 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9765 		if (tr->name && strcmp(tr->name, name) == 0)
9766 			goto out_unlock;
9767 	}
9768 
9769 	tr = trace_array_create_systems(name, systems);
9770 
9771 	if (IS_ERR(tr))
9772 		tr = NULL;
9773 out_unlock:
9774 	if (tr)
9775 		tr->ref++;
9776 
9777 	mutex_unlock(&trace_types_lock);
9778 	mutex_unlock(&event_mutex);
9779 	return tr;
9780 }
9781 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9782 
9783 static int __remove_instance(struct trace_array *tr)
9784 {
9785 	int i;
9786 
9787 	/* Reference counter for a newly created trace array = 1. */
9788 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9789 		return -EBUSY;
9790 
9791 	list_del(&tr->list);
9792 
9793 	/* Disable all the flags that were enabled coming in */
9794 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9795 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9796 			set_tracer_flag(tr, 1 << i, 0);
9797 	}
9798 
9799 	tracing_set_nop(tr);
9800 	clear_ftrace_function_probes(tr);
9801 	event_trace_del_tracer(tr);
9802 	ftrace_clear_pids(tr);
9803 	ftrace_destroy_function_files(tr);
9804 	tracefs_remove(tr->dir);
9805 	free_percpu(tr->last_func_repeats);
9806 	free_trace_buffers(tr);
9807 	clear_tracing_err_log(tr);
9808 
9809 	for (i = 0; i < tr->nr_topts; i++) {
9810 		kfree(tr->topts[i].topts);
9811 	}
9812 	kfree(tr->topts);
9813 
9814 	free_cpumask_var(tr->pipe_cpumask);
9815 	free_cpumask_var(tr->tracing_cpumask);
9816 	kfree_const(tr->system_names);
9817 	kfree(tr->name);
9818 	kfree(tr);
9819 
9820 	return 0;
9821 }
9822 
9823 int trace_array_destroy(struct trace_array *this_tr)
9824 {
9825 	struct trace_array *tr;
9826 	int ret;
9827 
9828 	if (!this_tr)
9829 		return -EINVAL;
9830 
9831 	mutex_lock(&event_mutex);
9832 	mutex_lock(&trace_types_lock);
9833 
9834 	ret = -ENODEV;
9835 
9836 	/* Making sure trace array exists before destroying it. */
9837 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9838 		if (tr == this_tr) {
9839 			ret = __remove_instance(tr);
9840 			break;
9841 		}
9842 	}
9843 
9844 	mutex_unlock(&trace_types_lock);
9845 	mutex_unlock(&event_mutex);
9846 
9847 	return ret;
9848 }
9849 EXPORT_SYMBOL_GPL(trace_array_destroy);
9850 
9851 static int instance_rmdir(const char *name)
9852 {
9853 	struct trace_array *tr;
9854 	int ret;
9855 
9856 	mutex_lock(&event_mutex);
9857 	mutex_lock(&trace_types_lock);
9858 
9859 	ret = -ENODEV;
9860 	tr = trace_array_find(name);
9861 	if (tr)
9862 		ret = __remove_instance(tr);
9863 
9864 	mutex_unlock(&trace_types_lock);
9865 	mutex_unlock(&event_mutex);
9866 
9867 	return ret;
9868 }
9869 
9870 static __init void create_trace_instances(struct dentry *d_tracer)
9871 {
9872 	struct trace_array *tr;
9873 
9874 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9875 							 instance_mkdir,
9876 							 instance_rmdir);
9877 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9878 		return;
9879 
9880 	mutex_lock(&event_mutex);
9881 	mutex_lock(&trace_types_lock);
9882 
9883 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9884 		if (!tr->name)
9885 			continue;
9886 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9887 			     "Failed to create instance directory\n"))
9888 			break;
9889 	}
9890 
9891 	mutex_unlock(&trace_types_lock);
9892 	mutex_unlock(&event_mutex);
9893 }
9894 
9895 static void
9896 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9897 {
9898 	int cpu;
9899 
9900 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9901 			tr, &show_traces_fops);
9902 
9903 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9904 			tr, &set_tracer_fops);
9905 
9906 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9907 			  tr, &tracing_cpumask_fops);
9908 
9909 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9910 			  tr, &tracing_iter_fops);
9911 
9912 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9913 			  tr, &tracing_fops);
9914 
9915 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9916 			  tr, &tracing_pipe_fops);
9917 
9918 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9919 			  tr, &tracing_entries_fops);
9920 
9921 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9922 			  tr, &tracing_total_entries_fops);
9923 
9924 	trace_create_file("free_buffer", 0200, d_tracer,
9925 			  tr, &tracing_free_buffer_fops);
9926 
9927 	trace_create_file("trace_marker", 0220, d_tracer,
9928 			  tr, &tracing_mark_fops);
9929 
9930 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9931 
9932 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9933 			  tr, &tracing_mark_raw_fops);
9934 
9935 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9936 			  &trace_clock_fops);
9937 
9938 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9939 			  tr, &rb_simple_fops);
9940 
9941 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9942 			  &trace_time_stamp_mode_fops);
9943 
9944 	tr->buffer_percent = 50;
9945 
9946 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9947 			tr, &buffer_percent_fops);
9948 
9949 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9950 			  tr, &buffer_subbuf_size_fops);
9951 
9952 	create_trace_options_dir(tr);
9953 
9954 #ifdef CONFIG_TRACER_MAX_TRACE
9955 	trace_create_maxlat_file(tr, d_tracer);
9956 #endif
9957 
9958 	if (ftrace_create_function_files(tr, d_tracer))
9959 		MEM_FAIL(1, "Could not allocate function filter files");
9960 
9961 #ifdef CONFIG_TRACER_SNAPSHOT
9962 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9963 			  tr, &snapshot_fops);
9964 #endif
9965 
9966 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9967 			  tr, &tracing_err_log_fops);
9968 
9969 	for_each_tracing_cpu(cpu)
9970 		tracing_init_tracefs_percpu(tr, cpu);
9971 
9972 	ftrace_init_tracefs(tr, d_tracer);
9973 }
9974 
9975 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9976 {
9977 	struct vfsmount *mnt;
9978 	struct file_system_type *type;
9979 
9980 	/*
9981 	 * To maintain backward compatibility for tools that mount
9982 	 * debugfs to get to the tracing facility, tracefs is automatically
9983 	 * mounted to the debugfs/tracing directory.
9984 	 */
9985 	type = get_fs_type("tracefs");
9986 	if (!type)
9987 		return NULL;
9988 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9989 	put_filesystem(type);
9990 	if (IS_ERR(mnt))
9991 		return NULL;
9992 	mntget(mnt);
9993 
9994 	return mnt;
9995 }
9996 
9997 /**
9998  * tracing_init_dentry - initialize top level trace array
9999  *
10000  * This is called when creating files or directories in the tracing
10001  * directory. It is called via fs_initcall() by any of the boot up code
10002  * and expects to return the dentry of the top level tracing directory.
10003  */
10004 int tracing_init_dentry(void)
10005 {
10006 	struct trace_array *tr = &global_trace;
10007 
10008 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10009 		pr_warn("Tracing disabled due to lockdown\n");
10010 		return -EPERM;
10011 	}
10012 
10013 	/* The top level trace array uses  NULL as parent */
10014 	if (tr->dir)
10015 		return 0;
10016 
10017 	if (WARN_ON(!tracefs_initialized()))
10018 		return -ENODEV;
10019 
10020 	/*
10021 	 * As there may still be users that expect the tracing
10022 	 * files to exist in debugfs/tracing, we must automount
10023 	 * the tracefs file system there, so older tools still
10024 	 * work with the newer kernel.
10025 	 */
10026 	tr->dir = debugfs_create_automount("tracing", NULL,
10027 					   trace_automount, NULL);
10028 
10029 	return 0;
10030 }
10031 
10032 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10033 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10034 
10035 static struct workqueue_struct *eval_map_wq __initdata;
10036 static struct work_struct eval_map_work __initdata;
10037 static struct work_struct tracerfs_init_work __initdata;
10038 
10039 static void __init eval_map_work_func(struct work_struct *work)
10040 {
10041 	int len;
10042 
10043 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10044 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10045 }
10046 
10047 static int __init trace_eval_init(void)
10048 {
10049 	INIT_WORK(&eval_map_work, eval_map_work_func);
10050 
10051 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10052 	if (!eval_map_wq) {
10053 		pr_err("Unable to allocate eval_map_wq\n");
10054 		/* Do work here */
10055 		eval_map_work_func(&eval_map_work);
10056 		return -ENOMEM;
10057 	}
10058 
10059 	queue_work(eval_map_wq, &eval_map_work);
10060 	return 0;
10061 }
10062 
10063 subsys_initcall(trace_eval_init);
10064 
10065 static int __init trace_eval_sync(void)
10066 {
10067 	/* Make sure the eval map updates are finished */
10068 	if (eval_map_wq)
10069 		destroy_workqueue(eval_map_wq);
10070 	return 0;
10071 }
10072 
10073 late_initcall_sync(trace_eval_sync);
10074 
10075 
10076 #ifdef CONFIG_MODULES
10077 static void trace_module_add_evals(struct module *mod)
10078 {
10079 	if (!mod->num_trace_evals)
10080 		return;
10081 
10082 	/*
10083 	 * Modules with bad taint do not have events created, do
10084 	 * not bother with enums either.
10085 	 */
10086 	if (trace_module_has_bad_taint(mod))
10087 		return;
10088 
10089 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10090 }
10091 
10092 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10093 static void trace_module_remove_evals(struct module *mod)
10094 {
10095 	union trace_eval_map_item *map;
10096 	union trace_eval_map_item **last = &trace_eval_maps;
10097 
10098 	if (!mod->num_trace_evals)
10099 		return;
10100 
10101 	mutex_lock(&trace_eval_mutex);
10102 
10103 	map = trace_eval_maps;
10104 
10105 	while (map) {
10106 		if (map->head.mod == mod)
10107 			break;
10108 		map = trace_eval_jmp_to_tail(map);
10109 		last = &map->tail.next;
10110 		map = map->tail.next;
10111 	}
10112 	if (!map)
10113 		goto out;
10114 
10115 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10116 	kfree(map);
10117  out:
10118 	mutex_unlock(&trace_eval_mutex);
10119 }
10120 #else
10121 static inline void trace_module_remove_evals(struct module *mod) { }
10122 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10123 
10124 static int trace_module_notify(struct notifier_block *self,
10125 			       unsigned long val, void *data)
10126 {
10127 	struct module *mod = data;
10128 
10129 	switch (val) {
10130 	case MODULE_STATE_COMING:
10131 		trace_module_add_evals(mod);
10132 		break;
10133 	case MODULE_STATE_GOING:
10134 		trace_module_remove_evals(mod);
10135 		break;
10136 	}
10137 
10138 	return NOTIFY_OK;
10139 }
10140 
10141 static struct notifier_block trace_module_nb = {
10142 	.notifier_call = trace_module_notify,
10143 	.priority = 0,
10144 };
10145 #endif /* CONFIG_MODULES */
10146 
10147 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10148 {
10149 
10150 	event_trace_init();
10151 
10152 	init_tracer_tracefs(&global_trace, NULL);
10153 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10154 
10155 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10156 			&global_trace, &tracing_thresh_fops);
10157 
10158 	trace_create_file("README", TRACE_MODE_READ, NULL,
10159 			NULL, &tracing_readme_fops);
10160 
10161 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10162 			NULL, &tracing_saved_cmdlines_fops);
10163 
10164 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10165 			  NULL, &tracing_saved_cmdlines_size_fops);
10166 
10167 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10168 			NULL, &tracing_saved_tgids_fops);
10169 
10170 	trace_create_eval_file(NULL);
10171 
10172 #ifdef CONFIG_MODULES
10173 	register_module_notifier(&trace_module_nb);
10174 #endif
10175 
10176 #ifdef CONFIG_DYNAMIC_FTRACE
10177 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10178 			NULL, &tracing_dyn_info_fops);
10179 #endif
10180 
10181 	create_trace_instances(NULL);
10182 
10183 	update_tracer_options(&global_trace);
10184 }
10185 
10186 static __init int tracer_init_tracefs(void)
10187 {
10188 	int ret;
10189 
10190 	trace_access_lock_init();
10191 
10192 	ret = tracing_init_dentry();
10193 	if (ret)
10194 		return 0;
10195 
10196 	if (eval_map_wq) {
10197 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10198 		queue_work(eval_map_wq, &tracerfs_init_work);
10199 	} else {
10200 		tracer_init_tracefs_work_func(NULL);
10201 	}
10202 
10203 	rv_init_interface();
10204 
10205 	return 0;
10206 }
10207 
10208 fs_initcall(tracer_init_tracefs);
10209 
10210 static int trace_die_panic_handler(struct notifier_block *self,
10211 				unsigned long ev, void *unused);
10212 
10213 static struct notifier_block trace_panic_notifier = {
10214 	.notifier_call = trace_die_panic_handler,
10215 	.priority = INT_MAX - 1,
10216 };
10217 
10218 static struct notifier_block trace_die_notifier = {
10219 	.notifier_call = trace_die_panic_handler,
10220 	.priority = INT_MAX - 1,
10221 };
10222 
10223 /*
10224  * The idea is to execute the following die/panic callback early, in order
10225  * to avoid showing irrelevant information in the trace (like other panic
10226  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10227  * warnings get disabled (to prevent potential log flooding).
10228  */
10229 static int trace_die_panic_handler(struct notifier_block *self,
10230 				unsigned long ev, void *unused)
10231 {
10232 	if (!ftrace_dump_on_oops)
10233 		return NOTIFY_DONE;
10234 
10235 	/* The die notifier requires DIE_OOPS to trigger */
10236 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10237 		return NOTIFY_DONE;
10238 
10239 	ftrace_dump(ftrace_dump_on_oops);
10240 
10241 	return NOTIFY_DONE;
10242 }
10243 
10244 /*
10245  * printk is set to max of 1024, we really don't need it that big.
10246  * Nothing should be printing 1000 characters anyway.
10247  */
10248 #define TRACE_MAX_PRINT		1000
10249 
10250 /*
10251  * Define here KERN_TRACE so that we have one place to modify
10252  * it if we decide to change what log level the ftrace dump
10253  * should be at.
10254  */
10255 #define KERN_TRACE		KERN_EMERG
10256 
10257 void
10258 trace_printk_seq(struct trace_seq *s)
10259 {
10260 	/* Probably should print a warning here. */
10261 	if (s->seq.len >= TRACE_MAX_PRINT)
10262 		s->seq.len = TRACE_MAX_PRINT;
10263 
10264 	/*
10265 	 * More paranoid code. Although the buffer size is set to
10266 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10267 	 * an extra layer of protection.
10268 	 */
10269 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10270 		s->seq.len = s->seq.size - 1;
10271 
10272 	/* should be zero ended, but we are paranoid. */
10273 	s->buffer[s->seq.len] = 0;
10274 
10275 	printk(KERN_TRACE "%s", s->buffer);
10276 
10277 	trace_seq_init(s);
10278 }
10279 
10280 void trace_init_global_iter(struct trace_iterator *iter)
10281 {
10282 	iter->tr = &global_trace;
10283 	iter->trace = iter->tr->current_trace;
10284 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10285 	iter->array_buffer = &global_trace.array_buffer;
10286 
10287 	if (iter->trace && iter->trace->open)
10288 		iter->trace->open(iter);
10289 
10290 	/* Annotate start of buffers if we had overruns */
10291 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10292 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10293 
10294 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10295 	if (trace_clocks[iter->tr->clock_id].in_ns)
10296 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10297 
10298 	/* Can not use kmalloc for iter.temp and iter.fmt */
10299 	iter->temp = static_temp_buf;
10300 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10301 	iter->fmt = static_fmt_buf;
10302 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10303 }
10304 
10305 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10306 {
10307 	/* use static because iter can be a bit big for the stack */
10308 	static struct trace_iterator iter;
10309 	static atomic_t dump_running;
10310 	struct trace_array *tr = &global_trace;
10311 	unsigned int old_userobj;
10312 	unsigned long flags;
10313 	int cnt = 0, cpu;
10314 
10315 	/* Only allow one dump user at a time. */
10316 	if (atomic_inc_return(&dump_running) != 1) {
10317 		atomic_dec(&dump_running);
10318 		return;
10319 	}
10320 
10321 	/*
10322 	 * Always turn off tracing when we dump.
10323 	 * We don't need to show trace output of what happens
10324 	 * between multiple crashes.
10325 	 *
10326 	 * If the user does a sysrq-z, then they can re-enable
10327 	 * tracing with echo 1 > tracing_on.
10328 	 */
10329 	tracing_off();
10330 
10331 	local_irq_save(flags);
10332 
10333 	/* Simulate the iterator */
10334 	trace_init_global_iter(&iter);
10335 
10336 	for_each_tracing_cpu(cpu) {
10337 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10338 	}
10339 
10340 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10341 
10342 	/* don't look at user memory in panic mode */
10343 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10344 
10345 	switch (oops_dump_mode) {
10346 	case DUMP_ALL:
10347 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10348 		break;
10349 	case DUMP_ORIG:
10350 		iter.cpu_file = raw_smp_processor_id();
10351 		break;
10352 	case DUMP_NONE:
10353 		goto out_enable;
10354 	default:
10355 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10356 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10357 	}
10358 
10359 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10360 
10361 	/* Did function tracer already get disabled? */
10362 	if (ftrace_is_dead()) {
10363 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10364 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10365 	}
10366 
10367 	/*
10368 	 * We need to stop all tracing on all CPUS to read
10369 	 * the next buffer. This is a bit expensive, but is
10370 	 * not done often. We fill all what we can read,
10371 	 * and then release the locks again.
10372 	 */
10373 
10374 	while (!trace_empty(&iter)) {
10375 
10376 		if (!cnt)
10377 			printk(KERN_TRACE "---------------------------------\n");
10378 
10379 		cnt++;
10380 
10381 		trace_iterator_reset(&iter);
10382 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10383 
10384 		if (trace_find_next_entry_inc(&iter) != NULL) {
10385 			int ret;
10386 
10387 			ret = print_trace_line(&iter);
10388 			if (ret != TRACE_TYPE_NO_CONSUME)
10389 				trace_consume(&iter);
10390 		}
10391 		touch_nmi_watchdog();
10392 
10393 		trace_printk_seq(&iter.seq);
10394 	}
10395 
10396 	if (!cnt)
10397 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10398 	else
10399 		printk(KERN_TRACE "---------------------------------\n");
10400 
10401  out_enable:
10402 	tr->trace_flags |= old_userobj;
10403 
10404 	for_each_tracing_cpu(cpu) {
10405 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10406 	}
10407 	atomic_dec(&dump_running);
10408 	local_irq_restore(flags);
10409 }
10410 EXPORT_SYMBOL_GPL(ftrace_dump);
10411 
10412 #define WRITE_BUFSIZE  4096
10413 
10414 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10415 				size_t count, loff_t *ppos,
10416 				int (*createfn)(const char *))
10417 {
10418 	char *kbuf, *buf, *tmp;
10419 	int ret = 0;
10420 	size_t done = 0;
10421 	size_t size;
10422 
10423 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10424 	if (!kbuf)
10425 		return -ENOMEM;
10426 
10427 	while (done < count) {
10428 		size = count - done;
10429 
10430 		if (size >= WRITE_BUFSIZE)
10431 			size = WRITE_BUFSIZE - 1;
10432 
10433 		if (copy_from_user(kbuf, buffer + done, size)) {
10434 			ret = -EFAULT;
10435 			goto out;
10436 		}
10437 		kbuf[size] = '\0';
10438 		buf = kbuf;
10439 		do {
10440 			tmp = strchr(buf, '\n');
10441 			if (tmp) {
10442 				*tmp = '\0';
10443 				size = tmp - buf + 1;
10444 			} else {
10445 				size = strlen(buf);
10446 				if (done + size < count) {
10447 					if (buf != kbuf)
10448 						break;
10449 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10450 					pr_warn("Line length is too long: Should be less than %d\n",
10451 						WRITE_BUFSIZE - 2);
10452 					ret = -EINVAL;
10453 					goto out;
10454 				}
10455 			}
10456 			done += size;
10457 
10458 			/* Remove comments */
10459 			tmp = strchr(buf, '#');
10460 
10461 			if (tmp)
10462 				*tmp = '\0';
10463 
10464 			ret = createfn(buf);
10465 			if (ret)
10466 				goto out;
10467 			buf += size;
10468 
10469 		} while (done < count);
10470 	}
10471 	ret = done;
10472 
10473 out:
10474 	kfree(kbuf);
10475 
10476 	return ret;
10477 }
10478 
10479 #ifdef CONFIG_TRACER_MAX_TRACE
10480 __init static bool tr_needs_alloc_snapshot(const char *name)
10481 {
10482 	char *test;
10483 	int len = strlen(name);
10484 	bool ret;
10485 
10486 	if (!boot_snapshot_index)
10487 		return false;
10488 
10489 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10490 	    boot_snapshot_info[len] == '\t')
10491 		return true;
10492 
10493 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10494 	if (!test)
10495 		return false;
10496 
10497 	sprintf(test, "\t%s\t", name);
10498 	ret = strstr(boot_snapshot_info, test) == NULL;
10499 	kfree(test);
10500 	return ret;
10501 }
10502 
10503 __init static void do_allocate_snapshot(const char *name)
10504 {
10505 	if (!tr_needs_alloc_snapshot(name))
10506 		return;
10507 
10508 	/*
10509 	 * When allocate_snapshot is set, the next call to
10510 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10511 	 * will allocate the snapshot buffer. That will alse clear
10512 	 * this flag.
10513 	 */
10514 	allocate_snapshot = true;
10515 }
10516 #else
10517 static inline void do_allocate_snapshot(const char *name) { }
10518 #endif
10519 
10520 __init static void enable_instances(void)
10521 {
10522 	struct trace_array *tr;
10523 	char *curr_str;
10524 	char *str;
10525 	char *tok;
10526 
10527 	/* A tab is always appended */
10528 	boot_instance_info[boot_instance_index - 1] = '\0';
10529 	str = boot_instance_info;
10530 
10531 	while ((curr_str = strsep(&str, "\t"))) {
10532 
10533 		tok = strsep(&curr_str, ",");
10534 
10535 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10536 			do_allocate_snapshot(tok);
10537 
10538 		tr = trace_array_get_by_name(tok, NULL);
10539 		if (!tr) {
10540 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10541 			continue;
10542 		}
10543 		/* Allow user space to delete it */
10544 		trace_array_put(tr);
10545 
10546 		while ((tok = strsep(&curr_str, ","))) {
10547 			early_enable_events(tr, tok, true);
10548 		}
10549 	}
10550 }
10551 
10552 __init static int tracer_alloc_buffers(void)
10553 {
10554 	int ring_buf_size;
10555 	int ret = -ENOMEM;
10556 
10557 
10558 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10559 		pr_warn("Tracing disabled due to lockdown\n");
10560 		return -EPERM;
10561 	}
10562 
10563 	/*
10564 	 * Make sure we don't accidentally add more trace options
10565 	 * than we have bits for.
10566 	 */
10567 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10568 
10569 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10570 		goto out;
10571 
10572 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10573 		goto out_free_buffer_mask;
10574 
10575 	/* Only allocate trace_printk buffers if a trace_printk exists */
10576 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10577 		/* Must be called before global_trace.buffer is allocated */
10578 		trace_printk_init_buffers();
10579 
10580 	/* To save memory, keep the ring buffer size to its minimum */
10581 	if (global_trace.ring_buffer_expanded)
10582 		ring_buf_size = trace_buf_size;
10583 	else
10584 		ring_buf_size = 1;
10585 
10586 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10587 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10588 
10589 	raw_spin_lock_init(&global_trace.start_lock);
10590 
10591 	/*
10592 	 * The prepare callbacks allocates some memory for the ring buffer. We
10593 	 * don't free the buffer if the CPU goes down. If we were to free
10594 	 * the buffer, then the user would lose any trace that was in the
10595 	 * buffer. The memory will be removed once the "instance" is removed.
10596 	 */
10597 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10598 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10599 				      NULL);
10600 	if (ret < 0)
10601 		goto out_free_cpumask;
10602 	/* Used for event triggers */
10603 	ret = -ENOMEM;
10604 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10605 	if (!temp_buffer)
10606 		goto out_rm_hp_state;
10607 
10608 	if (trace_create_savedcmd() < 0)
10609 		goto out_free_temp_buffer;
10610 
10611 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10612 		goto out_free_savedcmd;
10613 
10614 	/* TODO: make the number of buffers hot pluggable with CPUS */
10615 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10616 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10617 		goto out_free_pipe_cpumask;
10618 	}
10619 	if (global_trace.buffer_disabled)
10620 		tracing_off();
10621 
10622 	if (trace_boot_clock) {
10623 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10624 		if (ret < 0)
10625 			pr_warn("Trace clock %s not defined, going back to default\n",
10626 				trace_boot_clock);
10627 	}
10628 
10629 	/*
10630 	 * register_tracer() might reference current_trace, so it
10631 	 * needs to be set before we register anything. This is
10632 	 * just a bootstrap of current_trace anyway.
10633 	 */
10634 	global_trace.current_trace = &nop_trace;
10635 
10636 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10637 
10638 	ftrace_init_global_array_ops(&global_trace);
10639 
10640 	init_trace_flags_index(&global_trace);
10641 
10642 	register_tracer(&nop_trace);
10643 
10644 	/* Function tracing may start here (via kernel command line) */
10645 	init_function_trace();
10646 
10647 	/* All seems OK, enable tracing */
10648 	tracing_disabled = 0;
10649 
10650 	atomic_notifier_chain_register(&panic_notifier_list,
10651 				       &trace_panic_notifier);
10652 
10653 	register_die_notifier(&trace_die_notifier);
10654 
10655 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10656 
10657 	INIT_LIST_HEAD(&global_trace.systems);
10658 	INIT_LIST_HEAD(&global_trace.events);
10659 	INIT_LIST_HEAD(&global_trace.hist_vars);
10660 	INIT_LIST_HEAD(&global_trace.err_log);
10661 	list_add(&global_trace.list, &ftrace_trace_arrays);
10662 
10663 	apply_trace_boot_options();
10664 
10665 	register_snapshot_cmd();
10666 
10667 	test_can_verify();
10668 
10669 	return 0;
10670 
10671 out_free_pipe_cpumask:
10672 	free_cpumask_var(global_trace.pipe_cpumask);
10673 out_free_savedcmd:
10674 	free_saved_cmdlines_buffer(savedcmd);
10675 out_free_temp_buffer:
10676 	ring_buffer_free(temp_buffer);
10677 out_rm_hp_state:
10678 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10679 out_free_cpumask:
10680 	free_cpumask_var(global_trace.tracing_cpumask);
10681 out_free_buffer_mask:
10682 	free_cpumask_var(tracing_buffer_mask);
10683 out:
10684 	return ret;
10685 }
10686 
10687 void __init ftrace_boot_snapshot(void)
10688 {
10689 #ifdef CONFIG_TRACER_MAX_TRACE
10690 	struct trace_array *tr;
10691 
10692 	if (!snapshot_at_boot)
10693 		return;
10694 
10695 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10696 		if (!tr->allocated_snapshot)
10697 			continue;
10698 
10699 		tracing_snapshot_instance(tr);
10700 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10701 	}
10702 #endif
10703 }
10704 
10705 void __init early_trace_init(void)
10706 {
10707 	if (tracepoint_printk) {
10708 		tracepoint_print_iter =
10709 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10710 		if (MEM_FAIL(!tracepoint_print_iter,
10711 			     "Failed to allocate trace iterator\n"))
10712 			tracepoint_printk = 0;
10713 		else
10714 			static_key_enable(&tracepoint_printk_key.key);
10715 	}
10716 	tracer_alloc_buffers();
10717 
10718 	init_events();
10719 }
10720 
10721 void __init trace_init(void)
10722 {
10723 	trace_event_init();
10724 
10725 	if (boot_instance_index)
10726 		enable_instances();
10727 }
10728 
10729 __init static void clear_boot_tracer(void)
10730 {
10731 	/*
10732 	 * The default tracer at boot buffer is an init section.
10733 	 * This function is called in lateinit. If we did not
10734 	 * find the boot tracer, then clear it out, to prevent
10735 	 * later registration from accessing the buffer that is
10736 	 * about to be freed.
10737 	 */
10738 	if (!default_bootup_tracer)
10739 		return;
10740 
10741 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10742 	       default_bootup_tracer);
10743 	default_bootup_tracer = NULL;
10744 }
10745 
10746 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10747 __init static void tracing_set_default_clock(void)
10748 {
10749 	/* sched_clock_stable() is determined in late_initcall */
10750 	if (!trace_boot_clock && !sched_clock_stable()) {
10751 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10752 			pr_warn("Can not set tracing clock due to lockdown\n");
10753 			return;
10754 		}
10755 
10756 		printk(KERN_WARNING
10757 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10758 		       "If you want to keep using the local clock, then add:\n"
10759 		       "  \"trace_clock=local\"\n"
10760 		       "on the kernel command line\n");
10761 		tracing_set_clock(&global_trace, "global");
10762 	}
10763 }
10764 #else
10765 static inline void tracing_set_default_clock(void) { }
10766 #endif
10767 
10768 __init static int late_trace_init(void)
10769 {
10770 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10771 		static_key_disable(&tracepoint_printk_key.key);
10772 		tracepoint_printk = 0;
10773 	}
10774 
10775 	tracing_set_default_clock();
10776 	clear_boot_tracer();
10777 	return 0;
10778 }
10779 
10780 late_initcall_sync(late_trace_init);
10781