xref: /linux-6.15/kernel/trace/trace.c (revision 403726d8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
6  * Copyright (C) 2008 Ingo Molnar <[email protected]>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <[email protected]>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE		100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 	default_bootup_tracer = bootup_tracer_buf;
173 	/* We are using ftrace early, expand it */
174 	ring_buffer_expanded = true;
175 	return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 	if (*str++ != '=' || !*str) {
182 		ftrace_dump_on_oops = DUMP_ALL;
183 		return 1;
184 	}
185 
186 	if (!strcmp("orig_cpu", str)) {
187 		ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 		__disable_trace_on_warning = 1;
199 	return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 	allocate_snapshot = true;
206 	/* We also need the main ring buffer expanded */
207 	ring_buffer_expanded = true;
208 	return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 	return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 	trace_boot_clock = trace_boot_clock_buf;
229 	return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 		tracepoint_printk = 1;
237 	return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 	nsec += 500;
244 	do_div(nsec, 1000);
245 	return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS						\
250 	(FUNCTION_DEFAULT_FLAGS |					\
251 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
252 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
253 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
254 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
258 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269 	.trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 	struct trace_array *tr;
277 	int ret = -ENODEV;
278 
279 	mutex_lock(&trace_types_lock);
280 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 		if (tr == this_tr) {
282 			tr->ref++;
283 			ret = 0;
284 			break;
285 		}
286 	}
287 	mutex_unlock(&trace_types_lock);
288 
289 	return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 	WARN_ON(!this_tr->ref);
295 	this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 	mutex_lock(&trace_types_lock);
301 	__trace_array_put(this_tr);
302 	mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 			      struct ring_buffer *buffer,
307 			      struct ring_buffer_event *event)
308 {
309 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 	    !filter_match_preds(call->filter, rec)) {
311 		__trace_event_discard_commit(buffer, event);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 	vfree(pid_list->pids);
321 	kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 	/*
335 	 * If pid_max changed after filtered_pids was created, we
336 	 * by default ignore all pids greater than the previous pid_max.
337 	 */
338 	if (search_pid >= filtered_pids->pid_max)
339 		return false;
340 
341 	return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 	/*
357 	 * Return false, because if filtered_pids does not exist,
358 	 * all pids are good to trace.
359 	 */
360 	if (!filtered_pids)
361 		return false;
362 
363 	return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 				  struct task_struct *self,
380 				  struct task_struct *task)
381 {
382 	if (!pid_list)
383 		return;
384 
385 	/* For forks, we only add if the forking task is listed */
386 	if (self) {
387 		if (!trace_find_filtered_pid(pid_list, self->pid))
388 			return;
389 	}
390 
391 	/* Sorry, but we don't support pid_max changing after setting */
392 	if (task->pid >= pid_list->pid_max)
393 		return;
394 
395 	/* "self" is set for forks, and NULL for exits */
396 	if (self)
397 		set_bit(task->pid, pid_list->pids);
398 	else
399 		clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 	unsigned long pid = (unsigned long)v;
417 
418 	(*pos)++;
419 
420 	/* pid already is +1 of the actual prevous bit */
421 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423 	/* Return pid + 1 to allow zero to be represented */
424 	if (pid < pid_list->pid_max)
425 		return (void *)(pid + 1);
426 
427 	return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 	unsigned long pid;
444 	loff_t l = 0;
445 
446 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 	if (pid >= pid_list->pid_max)
448 		return NULL;
449 
450 	/* Return pid + 1 so that zero can be the exit value */
451 	for (pid++; pid && l < *pos;
452 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 		;
454 	return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 	unsigned long pid = (unsigned long)v - 1;
468 
469 	seq_printf(m, "%lu\n", pid);
470 	return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE		127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 		    struct trace_pid_list **new_pid_list,
478 		    const char __user *ubuf, size_t cnt)
479 {
480 	struct trace_pid_list *pid_list;
481 	struct trace_parser parser;
482 	unsigned long val;
483 	int nr_pids = 0;
484 	ssize_t read = 0;
485 	ssize_t ret = 0;
486 	loff_t pos;
487 	pid_t pid;
488 
489 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 		return -ENOMEM;
491 
492 	/*
493 	 * Always recreate a new array. The write is an all or nothing
494 	 * operation. Always create a new array when adding new pids by
495 	 * the user. If the operation fails, then the current list is
496 	 * not modified.
497 	 */
498 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 	if (!pid_list)
500 		return -ENOMEM;
501 
502 	pid_list->pid_max = READ_ONCE(pid_max);
503 
504 	/* Only truncating will shrink pid_max */
505 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 		pid_list->pid_max = filtered_pids->pid_max;
507 
508 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 	if (!pid_list->pids) {
510 		kfree(pid_list);
511 		return -ENOMEM;
512 	}
513 
514 	if (filtered_pids) {
515 		/* copy the current bits to the new max */
516 		for_each_set_bit(pid, filtered_pids->pids,
517 				 filtered_pids->pid_max) {
518 			set_bit(pid, pid_list->pids);
519 			nr_pids++;
520 		}
521 	}
522 
523 	while (cnt > 0) {
524 
525 		pos = 0;
526 
527 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
528 		if (ret < 0 || !trace_parser_loaded(&parser))
529 			break;
530 
531 		read += ret;
532 		ubuf += ret;
533 		cnt -= ret;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id(), cond_data);
924 	local_irq_restore(flags);
925 }
926 
927 void tracing_snapshot_instance(struct trace_array *tr)
928 {
929 	tracing_snapshot_instance_cond(tr, NULL);
930 }
931 
932 /**
933  * tracing_snapshot - take a snapshot of the current buffer.
934  *
935  * This causes a swap between the snapshot buffer and the current live
936  * tracing buffer. You can use this to take snapshots of the live
937  * trace when some condition is triggered, but continue to trace.
938  *
939  * Note, make sure to allocate the snapshot with either
940  * a tracing_snapshot_alloc(), or by doing it manually
941  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942  *
943  * If the snapshot buffer is not allocated, it will stop tracing.
944  * Basically making a permanent snapshot.
945  */
946 void tracing_snapshot(void)
947 {
948 	struct trace_array *tr = &global_trace;
949 
950 	tracing_snapshot_instance(tr);
951 }
952 EXPORT_SYMBOL_GPL(tracing_snapshot);
953 
954 /**
955  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956  * @tr:		The tracing instance to snapshot
957  * @cond_data:	The data to be tested conditionally, and possibly saved
958  *
959  * This is the same as tracing_snapshot() except that the snapshot is
960  * conditional - the snapshot will only happen if the
961  * cond_snapshot.update() implementation receiving the cond_data
962  * returns true, which means that the trace array's cond_snapshot
963  * update() operation used the cond_data to determine whether the
964  * snapshot should be taken, and if it was, presumably saved it along
965  * with the snapshot.
966  */
967 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968 {
969 	tracing_snapshot_instance_cond(tr, cond_data);
970 }
971 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972 
973 /**
974  * tracing_snapshot_cond_data - get the user data associated with a snapshot
975  * @tr:		The tracing instance
976  *
977  * When the user enables a conditional snapshot using
978  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979  * with the snapshot.  This accessor is used to retrieve it.
980  *
981  * Should not be called from cond_snapshot.update(), since it takes
982  * the tr->max_lock lock, which the code calling
983  * cond_snapshot.update() has already done.
984  *
985  * Returns the cond_data associated with the trace array's snapshot.
986  */
987 void *tracing_cond_snapshot_data(struct trace_array *tr)
988 {
989 	void *cond_data = NULL;
990 
991 	arch_spin_lock(&tr->max_lock);
992 
993 	if (tr->cond_snapshot)
994 		cond_data = tr->cond_snapshot->cond_data;
995 
996 	arch_spin_unlock(&tr->max_lock);
997 
998 	return cond_data;
999 }
1000 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001 
1002 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003 					struct trace_buffer *size_buf, int cpu_id);
1004 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005 
1006 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007 {
1008 	int ret;
1009 
1010 	if (!tr->allocated_snapshot) {
1011 
1012 		/* allocate spare buffer */
1013 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015 		if (ret < 0)
1016 			return ret;
1017 
1018 		tr->allocated_snapshot = true;
1019 	}
1020 
1021 	return 0;
1022 }
1023 
1024 static void free_snapshot(struct trace_array *tr)
1025 {
1026 	/*
1027 	 * We don't free the ring buffer. instead, resize it because
1028 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1029 	 * we want preserve it.
1030 	 */
1031 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032 	set_buffer_entries(&tr->max_buffer, 1);
1033 	tracing_reset_online_cpus(&tr->max_buffer);
1034 	tr->allocated_snapshot = false;
1035 }
1036 
1037 /**
1038  * tracing_alloc_snapshot - allocate snapshot buffer.
1039  *
1040  * This only allocates the snapshot buffer if it isn't already
1041  * allocated - it doesn't also take a snapshot.
1042  *
1043  * This is meant to be used in cases where the snapshot buffer needs
1044  * to be set up for events that can't sleep but need to be able to
1045  * trigger a snapshot.
1046  */
1047 int tracing_alloc_snapshot(void)
1048 {
1049 	struct trace_array *tr = &global_trace;
1050 	int ret;
1051 
1052 	ret = tracing_alloc_snapshot_instance(tr);
1053 	WARN_ON(ret < 0);
1054 
1055 	return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058 
1059 /**
1060  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061  *
1062  * This is similar to tracing_snapshot(), but it will allocate the
1063  * snapshot buffer if it isn't already allocated. Use this only
1064  * where it is safe to sleep, as the allocation may sleep.
1065  *
1066  * This causes a swap between the snapshot buffer and the current live
1067  * tracing buffer. You can use this to take snapshots of the live
1068  * trace when some condition is triggered, but continue to trace.
1069  */
1070 void tracing_snapshot_alloc(void)
1071 {
1072 	int ret;
1073 
1074 	ret = tracing_alloc_snapshot();
1075 	if (ret < 0)
1076 		return;
1077 
1078 	tracing_snapshot();
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081 
1082 /**
1083  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084  * @tr:		The tracing instance
1085  * @cond_data:	User data to associate with the snapshot
1086  * @update:	Implementation of the cond_snapshot update function
1087  *
1088  * Check whether the conditional snapshot for the given instance has
1089  * already been enabled, or if the current tracer is already using a
1090  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091  * save the cond_data and update function inside.
1092  *
1093  * Returns 0 if successful, error otherwise.
1094  */
1095 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096 				 cond_update_fn_t update)
1097 {
1098 	struct cond_snapshot *cond_snapshot;
1099 	int ret = 0;
1100 
1101 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102 	if (!cond_snapshot)
1103 		return -ENOMEM;
1104 
1105 	cond_snapshot->cond_data = cond_data;
1106 	cond_snapshot->update = update;
1107 
1108 	mutex_lock(&trace_types_lock);
1109 
1110 	ret = tracing_alloc_snapshot_instance(tr);
1111 	if (ret)
1112 		goto fail_unlock;
1113 
1114 	if (tr->current_trace->use_max_tr) {
1115 		ret = -EBUSY;
1116 		goto fail_unlock;
1117 	}
1118 
1119 	if (tr->cond_snapshot) {
1120 		ret = -EBUSY;
1121 		goto fail_unlock;
1122 	}
1123 
1124 	arch_spin_lock(&tr->max_lock);
1125 	tr->cond_snapshot = cond_snapshot;
1126 	arch_spin_unlock(&tr->max_lock);
1127 
1128 	mutex_unlock(&trace_types_lock);
1129 
1130 	return ret;
1131 
1132  fail_unlock:
1133 	mutex_unlock(&trace_types_lock);
1134 	kfree(cond_snapshot);
1135 	return ret;
1136 }
1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1138 
1139 /**
1140  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1141  * @tr:		The tracing instance
1142  *
1143  * Check whether the conditional snapshot for the given instance is
1144  * enabled; if so, free the cond_snapshot associated with it,
1145  * otherwise return -EINVAL.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_disable(struct trace_array *tr)
1150 {
1151 	int ret = 0;
1152 
1153 	arch_spin_lock(&tr->max_lock);
1154 
1155 	if (!tr->cond_snapshot)
1156 		ret = -EINVAL;
1157 	else {
1158 		kfree(tr->cond_snapshot);
1159 		tr->cond_snapshot = NULL;
1160 	}
1161 
1162 	arch_spin_unlock(&tr->max_lock);
1163 
1164 	return ret;
1165 }
1166 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1167 #else
1168 void tracing_snapshot(void)
1169 {
1170 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1171 }
1172 EXPORT_SYMBOL_GPL(tracing_snapshot);
1173 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1174 {
1175 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1176 }
1177 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1178 int tracing_alloc_snapshot(void)
1179 {
1180 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1181 	return -ENODEV;
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1184 void tracing_snapshot_alloc(void)
1185 {
1186 	/* Give warning */
1187 	tracing_snapshot();
1188 }
1189 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1190 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 {
1192 	return NULL;
1193 }
1194 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1195 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1196 {
1197 	return -ENODEV;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200 int tracing_snapshot_cond_disable(struct trace_array *tr)
1201 {
1202 	return false;
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1205 #endif /* CONFIG_TRACER_SNAPSHOT */
1206 
1207 void tracer_tracing_off(struct trace_array *tr)
1208 {
1209 	if (tr->trace_buffer.buffer)
1210 		ring_buffer_record_off(tr->trace_buffer.buffer);
1211 	/*
1212 	 * This flag is looked at when buffers haven't been allocated
1213 	 * yet, or by some tracers (like irqsoff), that just want to
1214 	 * know if the ring buffer has been disabled, but it can handle
1215 	 * races of where it gets disabled but we still do a record.
1216 	 * As the check is in the fast path of the tracers, it is more
1217 	 * important to be fast than accurate.
1218 	 */
1219 	tr->buffer_disabled = 1;
1220 	/* Make the flag seen by readers */
1221 	smp_wmb();
1222 }
1223 
1224 /**
1225  * tracing_off - turn off tracing buffers
1226  *
1227  * This function stops the tracing buffers from recording data.
1228  * It does not disable any overhead the tracers themselves may
1229  * be causing. This function simply causes all recording to
1230  * the ring buffers to fail.
1231  */
1232 void tracing_off(void)
1233 {
1234 	tracer_tracing_off(&global_trace);
1235 }
1236 EXPORT_SYMBOL_GPL(tracing_off);
1237 
1238 void disable_trace_on_warning(void)
1239 {
1240 	if (__disable_trace_on_warning)
1241 		tracing_off();
1242 }
1243 
1244 /**
1245  * tracer_tracing_is_on - show real state of ring buffer enabled
1246  * @tr : the trace array to know if ring buffer is enabled
1247  *
1248  * Shows real state of the ring buffer if it is enabled or not.
1249  */
1250 bool tracer_tracing_is_on(struct trace_array *tr)
1251 {
1252 	if (tr->trace_buffer.buffer)
1253 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1254 	return !tr->buffer_disabled;
1255 }
1256 
1257 /**
1258  * tracing_is_on - show state of ring buffers enabled
1259  */
1260 int tracing_is_on(void)
1261 {
1262 	return tracer_tracing_is_on(&global_trace);
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_is_on);
1265 
1266 static int __init set_buf_size(char *str)
1267 {
1268 	unsigned long buf_size;
1269 
1270 	if (!str)
1271 		return 0;
1272 	buf_size = memparse(str, &str);
1273 	/* nr_entries can not be zero */
1274 	if (buf_size == 0)
1275 		return 0;
1276 	trace_buf_size = buf_size;
1277 	return 1;
1278 }
1279 __setup("trace_buf_size=", set_buf_size);
1280 
1281 static int __init set_tracing_thresh(char *str)
1282 {
1283 	unsigned long threshold;
1284 	int ret;
1285 
1286 	if (!str)
1287 		return 0;
1288 	ret = kstrtoul(str, 0, &threshold);
1289 	if (ret < 0)
1290 		return 0;
1291 	tracing_thresh = threshold * 1000;
1292 	return 1;
1293 }
1294 __setup("tracing_thresh=", set_tracing_thresh);
1295 
1296 unsigned long nsecs_to_usecs(unsigned long nsecs)
1297 {
1298 	return nsecs / 1000;
1299 }
1300 
1301 /*
1302  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1303  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1304  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1305  * of strings in the order that the evals (enum) were defined.
1306  */
1307 #undef C
1308 #define C(a, b) b
1309 
1310 /* These must match the bit postions in trace_iterator_flags */
1311 static const char *trace_options[] = {
1312 	TRACE_FLAGS
1313 	NULL
1314 };
1315 
1316 static struct {
1317 	u64 (*func)(void);
1318 	const char *name;
1319 	int in_ns;		/* is this clock in nanoseconds? */
1320 } trace_clocks[] = {
1321 	{ trace_clock_local,		"local",	1 },
1322 	{ trace_clock_global,		"global",	1 },
1323 	{ trace_clock_counter,		"counter",	0 },
1324 	{ trace_clock_jiffies,		"uptime",	0 },
1325 	{ trace_clock,			"perf",		1 },
1326 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1327 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1328 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1329 	ARCH_TRACE_CLOCKS
1330 };
1331 
1332 bool trace_clock_in_ns(struct trace_array *tr)
1333 {
1334 	if (trace_clocks[tr->clock_id].in_ns)
1335 		return true;
1336 
1337 	return false;
1338 }
1339 
1340 /*
1341  * trace_parser_get_init - gets the buffer for trace parser
1342  */
1343 int trace_parser_get_init(struct trace_parser *parser, int size)
1344 {
1345 	memset(parser, 0, sizeof(*parser));
1346 
1347 	parser->buffer = kmalloc(size, GFP_KERNEL);
1348 	if (!parser->buffer)
1349 		return 1;
1350 
1351 	parser->size = size;
1352 	return 0;
1353 }
1354 
1355 /*
1356  * trace_parser_put - frees the buffer for trace parser
1357  */
1358 void trace_parser_put(struct trace_parser *parser)
1359 {
1360 	kfree(parser->buffer);
1361 	parser->buffer = NULL;
1362 }
1363 
1364 /*
1365  * trace_get_user - reads the user input string separated by  space
1366  * (matched by isspace(ch))
1367  *
1368  * For each string found the 'struct trace_parser' is updated,
1369  * and the function returns.
1370  *
1371  * Returns number of bytes read.
1372  *
1373  * See kernel/trace/trace.h for 'struct trace_parser' details.
1374  */
1375 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1376 	size_t cnt, loff_t *ppos)
1377 {
1378 	char ch;
1379 	size_t read = 0;
1380 	ssize_t ret;
1381 
1382 	if (!*ppos)
1383 		trace_parser_clear(parser);
1384 
1385 	ret = get_user(ch, ubuf++);
1386 	if (ret)
1387 		goto out;
1388 
1389 	read++;
1390 	cnt--;
1391 
1392 	/*
1393 	 * The parser is not finished with the last write,
1394 	 * continue reading the user input without skipping spaces.
1395 	 */
1396 	if (!parser->cont) {
1397 		/* skip white space */
1398 		while (cnt && isspace(ch)) {
1399 			ret = get_user(ch, ubuf++);
1400 			if (ret)
1401 				goto out;
1402 			read++;
1403 			cnt--;
1404 		}
1405 
1406 		parser->idx = 0;
1407 
1408 		/* only spaces were written */
1409 		if (isspace(ch) || !ch) {
1410 			*ppos += read;
1411 			ret = read;
1412 			goto out;
1413 		}
1414 	}
1415 
1416 	/* read the non-space input */
1417 	while (cnt && !isspace(ch) && ch) {
1418 		if (parser->idx < parser->size - 1)
1419 			parser->buffer[parser->idx++] = ch;
1420 		else {
1421 			ret = -EINVAL;
1422 			goto out;
1423 		}
1424 		ret = get_user(ch, ubuf++);
1425 		if (ret)
1426 			goto out;
1427 		read++;
1428 		cnt--;
1429 	}
1430 
1431 	/* We either got finished input or we have to wait for another call. */
1432 	if (isspace(ch) || !ch) {
1433 		parser->buffer[parser->idx] = 0;
1434 		parser->cont = false;
1435 	} else if (parser->idx < parser->size - 1) {
1436 		parser->cont = true;
1437 		parser->buffer[parser->idx++] = ch;
1438 		/* Make sure the parsed string always terminates with '\0'. */
1439 		parser->buffer[parser->idx] = 0;
1440 	} else {
1441 		ret = -EINVAL;
1442 		goto out;
1443 	}
1444 
1445 	*ppos += read;
1446 	ret = read;
1447 
1448 out:
1449 	return ret;
1450 }
1451 
1452 /* TODO add a seq_buf_to_buffer() */
1453 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1454 {
1455 	int len;
1456 
1457 	if (trace_seq_used(s) <= s->seq.readpos)
1458 		return -EBUSY;
1459 
1460 	len = trace_seq_used(s) - s->seq.readpos;
1461 	if (cnt > len)
1462 		cnt = len;
1463 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1464 
1465 	s->seq.readpos += cnt;
1466 	return cnt;
1467 }
1468 
1469 unsigned long __read_mostly	tracing_thresh;
1470 
1471 #ifdef CONFIG_TRACER_MAX_TRACE
1472 /*
1473  * Copy the new maximum trace into the separate maximum-trace
1474  * structure. (this way the maximum trace is permanently saved,
1475  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1476  */
1477 static void
1478 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1479 {
1480 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1481 	struct trace_buffer *max_buf = &tr->max_buffer;
1482 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1483 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1484 
1485 	max_buf->cpu = cpu;
1486 	max_buf->time_start = data->preempt_timestamp;
1487 
1488 	max_data->saved_latency = tr->max_latency;
1489 	max_data->critical_start = data->critical_start;
1490 	max_data->critical_end = data->critical_end;
1491 
1492 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1493 	max_data->pid = tsk->pid;
1494 	/*
1495 	 * If tsk == current, then use current_uid(), as that does not use
1496 	 * RCU. The irq tracer can be called out of RCU scope.
1497 	 */
1498 	if (tsk == current)
1499 		max_data->uid = current_uid();
1500 	else
1501 		max_data->uid = task_uid(tsk);
1502 
1503 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1504 	max_data->policy = tsk->policy;
1505 	max_data->rt_priority = tsk->rt_priority;
1506 
1507 	/* record this tasks comm */
1508 	tracing_record_cmdline(tsk);
1509 }
1510 
1511 /**
1512  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1513  * @tr: tracer
1514  * @tsk: the task with the latency
1515  * @cpu: The cpu that initiated the trace.
1516  * @cond_data: User data associated with a conditional snapshot
1517  *
1518  * Flip the buffers between the @tr and the max_tr and record information
1519  * about which task was the cause of this latency.
1520  */
1521 void
1522 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1523 	      void *cond_data)
1524 {
1525 	if (tr->stop_count)
1526 		return;
1527 
1528 	WARN_ON_ONCE(!irqs_disabled());
1529 
1530 	if (!tr->allocated_snapshot) {
1531 		/* Only the nop tracer should hit this when disabling */
1532 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1533 		return;
1534 	}
1535 
1536 	arch_spin_lock(&tr->max_lock);
1537 
1538 	/* Inherit the recordable setting from trace_buffer */
1539 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1540 		ring_buffer_record_on(tr->max_buffer.buffer);
1541 	else
1542 		ring_buffer_record_off(tr->max_buffer.buffer);
1543 
1544 #ifdef CONFIG_TRACER_SNAPSHOT
1545 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1546 		goto out_unlock;
1547 #endif
1548 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1549 
1550 	__update_max_tr(tr, tsk, cpu);
1551 
1552  out_unlock:
1553 	arch_spin_unlock(&tr->max_lock);
1554 }
1555 
1556 /**
1557  * update_max_tr_single - only copy one trace over, and reset the rest
1558  * @tr - tracer
1559  * @tsk - task with the latency
1560  * @cpu - the cpu of the buffer to copy.
1561  *
1562  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1563  */
1564 void
1565 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1566 {
1567 	int ret;
1568 
1569 	if (tr->stop_count)
1570 		return;
1571 
1572 	WARN_ON_ONCE(!irqs_disabled());
1573 	if (!tr->allocated_snapshot) {
1574 		/* Only the nop tracer should hit this when disabling */
1575 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1576 		return;
1577 	}
1578 
1579 	arch_spin_lock(&tr->max_lock);
1580 
1581 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1582 
1583 	if (ret == -EBUSY) {
1584 		/*
1585 		 * We failed to swap the buffer due to a commit taking
1586 		 * place on this CPU. We fail to record, but we reset
1587 		 * the max trace buffer (no one writes directly to it)
1588 		 * and flag that it failed.
1589 		 */
1590 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1591 			"Failed to swap buffers due to commit in progress\n");
1592 	}
1593 
1594 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1595 
1596 	__update_max_tr(tr, tsk, cpu);
1597 	arch_spin_unlock(&tr->max_lock);
1598 }
1599 #endif /* CONFIG_TRACER_MAX_TRACE */
1600 
1601 static int wait_on_pipe(struct trace_iterator *iter, int full)
1602 {
1603 	/* Iterators are static, they should be filled or empty */
1604 	if (trace_buffer_iter(iter, iter->cpu_file))
1605 		return 0;
1606 
1607 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1608 				full);
1609 }
1610 
1611 #ifdef CONFIG_FTRACE_STARTUP_TEST
1612 static bool selftests_can_run;
1613 
1614 struct trace_selftests {
1615 	struct list_head		list;
1616 	struct tracer			*type;
1617 };
1618 
1619 static LIST_HEAD(postponed_selftests);
1620 
1621 static int save_selftest(struct tracer *type)
1622 {
1623 	struct trace_selftests *selftest;
1624 
1625 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1626 	if (!selftest)
1627 		return -ENOMEM;
1628 
1629 	selftest->type = type;
1630 	list_add(&selftest->list, &postponed_selftests);
1631 	return 0;
1632 }
1633 
1634 static int run_tracer_selftest(struct tracer *type)
1635 {
1636 	struct trace_array *tr = &global_trace;
1637 	struct tracer *saved_tracer = tr->current_trace;
1638 	int ret;
1639 
1640 	if (!type->selftest || tracing_selftest_disabled)
1641 		return 0;
1642 
1643 	/*
1644 	 * If a tracer registers early in boot up (before scheduling is
1645 	 * initialized and such), then do not run its selftests yet.
1646 	 * Instead, run it a little later in the boot process.
1647 	 */
1648 	if (!selftests_can_run)
1649 		return save_selftest(type);
1650 
1651 	/*
1652 	 * Run a selftest on this tracer.
1653 	 * Here we reset the trace buffer, and set the current
1654 	 * tracer to be this tracer. The tracer can then run some
1655 	 * internal tracing to verify that everything is in order.
1656 	 * If we fail, we do not register this tracer.
1657 	 */
1658 	tracing_reset_online_cpus(&tr->trace_buffer);
1659 
1660 	tr->current_trace = type;
1661 
1662 #ifdef CONFIG_TRACER_MAX_TRACE
1663 	if (type->use_max_tr) {
1664 		/* If we expanded the buffers, make sure the max is expanded too */
1665 		if (ring_buffer_expanded)
1666 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1667 					   RING_BUFFER_ALL_CPUS);
1668 		tr->allocated_snapshot = true;
1669 	}
1670 #endif
1671 
1672 	/* the test is responsible for initializing and enabling */
1673 	pr_info("Testing tracer %s: ", type->name);
1674 	ret = type->selftest(type, tr);
1675 	/* the test is responsible for resetting too */
1676 	tr->current_trace = saved_tracer;
1677 	if (ret) {
1678 		printk(KERN_CONT "FAILED!\n");
1679 		/* Add the warning after printing 'FAILED' */
1680 		WARN_ON(1);
1681 		return -1;
1682 	}
1683 	/* Only reset on passing, to avoid touching corrupted buffers */
1684 	tracing_reset_online_cpus(&tr->trace_buffer);
1685 
1686 #ifdef CONFIG_TRACER_MAX_TRACE
1687 	if (type->use_max_tr) {
1688 		tr->allocated_snapshot = false;
1689 
1690 		/* Shrink the max buffer again */
1691 		if (ring_buffer_expanded)
1692 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1693 					   RING_BUFFER_ALL_CPUS);
1694 	}
1695 #endif
1696 
1697 	printk(KERN_CONT "PASSED\n");
1698 	return 0;
1699 }
1700 
1701 static __init int init_trace_selftests(void)
1702 {
1703 	struct trace_selftests *p, *n;
1704 	struct tracer *t, **last;
1705 	int ret;
1706 
1707 	selftests_can_run = true;
1708 
1709 	mutex_lock(&trace_types_lock);
1710 
1711 	if (list_empty(&postponed_selftests))
1712 		goto out;
1713 
1714 	pr_info("Running postponed tracer tests:\n");
1715 
1716 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1717 		ret = run_tracer_selftest(p->type);
1718 		/* If the test fails, then warn and remove from available_tracers */
1719 		if (ret < 0) {
1720 			WARN(1, "tracer: %s failed selftest, disabling\n",
1721 			     p->type->name);
1722 			last = &trace_types;
1723 			for (t = trace_types; t; t = t->next) {
1724 				if (t == p->type) {
1725 					*last = t->next;
1726 					break;
1727 				}
1728 				last = &t->next;
1729 			}
1730 		}
1731 		list_del(&p->list);
1732 		kfree(p);
1733 	}
1734 
1735  out:
1736 	mutex_unlock(&trace_types_lock);
1737 
1738 	return 0;
1739 }
1740 core_initcall(init_trace_selftests);
1741 #else
1742 static inline int run_tracer_selftest(struct tracer *type)
1743 {
1744 	return 0;
1745 }
1746 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1747 
1748 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1749 
1750 static void __init apply_trace_boot_options(void);
1751 
1752 /**
1753  * register_tracer - register a tracer with the ftrace system.
1754  * @type - the plugin for the tracer
1755  *
1756  * Register a new plugin tracer.
1757  */
1758 int __init register_tracer(struct tracer *type)
1759 {
1760 	struct tracer *t;
1761 	int ret = 0;
1762 
1763 	if (!type->name) {
1764 		pr_info("Tracer must have a name\n");
1765 		return -1;
1766 	}
1767 
1768 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1769 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1770 		return -1;
1771 	}
1772 
1773 	mutex_lock(&trace_types_lock);
1774 
1775 	tracing_selftest_running = true;
1776 
1777 	for (t = trace_types; t; t = t->next) {
1778 		if (strcmp(type->name, t->name) == 0) {
1779 			/* already found */
1780 			pr_info("Tracer %s already registered\n",
1781 				type->name);
1782 			ret = -1;
1783 			goto out;
1784 		}
1785 	}
1786 
1787 	if (!type->set_flag)
1788 		type->set_flag = &dummy_set_flag;
1789 	if (!type->flags) {
1790 		/*allocate a dummy tracer_flags*/
1791 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1792 		if (!type->flags) {
1793 			ret = -ENOMEM;
1794 			goto out;
1795 		}
1796 		type->flags->val = 0;
1797 		type->flags->opts = dummy_tracer_opt;
1798 	} else
1799 		if (!type->flags->opts)
1800 			type->flags->opts = dummy_tracer_opt;
1801 
1802 	/* store the tracer for __set_tracer_option */
1803 	type->flags->trace = type;
1804 
1805 	ret = run_tracer_selftest(type);
1806 	if (ret < 0)
1807 		goto out;
1808 
1809 	type->next = trace_types;
1810 	trace_types = type;
1811 	add_tracer_options(&global_trace, type);
1812 
1813  out:
1814 	tracing_selftest_running = false;
1815 	mutex_unlock(&trace_types_lock);
1816 
1817 	if (ret || !default_bootup_tracer)
1818 		goto out_unlock;
1819 
1820 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1821 		goto out_unlock;
1822 
1823 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1824 	/* Do we want this tracer to start on bootup? */
1825 	tracing_set_tracer(&global_trace, type->name);
1826 	default_bootup_tracer = NULL;
1827 
1828 	apply_trace_boot_options();
1829 
1830 	/* disable other selftests, since this will break it. */
1831 	tracing_selftest_disabled = true;
1832 #ifdef CONFIG_FTRACE_STARTUP_TEST
1833 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1834 	       type->name);
1835 #endif
1836 
1837  out_unlock:
1838 	return ret;
1839 }
1840 
1841 void tracing_reset(struct trace_buffer *buf, int cpu)
1842 {
1843 	struct ring_buffer *buffer = buf->buffer;
1844 
1845 	if (!buffer)
1846 		return;
1847 
1848 	ring_buffer_record_disable(buffer);
1849 
1850 	/* Make sure all commits have finished */
1851 	synchronize_rcu();
1852 	ring_buffer_reset_cpu(buffer, cpu);
1853 
1854 	ring_buffer_record_enable(buffer);
1855 }
1856 
1857 void tracing_reset_online_cpus(struct trace_buffer *buf)
1858 {
1859 	struct ring_buffer *buffer = buf->buffer;
1860 	int cpu;
1861 
1862 	if (!buffer)
1863 		return;
1864 
1865 	ring_buffer_record_disable(buffer);
1866 
1867 	/* Make sure all commits have finished */
1868 	synchronize_rcu();
1869 
1870 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1871 
1872 	for_each_online_cpu(cpu)
1873 		ring_buffer_reset_cpu(buffer, cpu);
1874 
1875 	ring_buffer_record_enable(buffer);
1876 }
1877 
1878 /* Must have trace_types_lock held */
1879 void tracing_reset_all_online_cpus(void)
1880 {
1881 	struct trace_array *tr;
1882 
1883 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1884 		if (!tr->clear_trace)
1885 			continue;
1886 		tr->clear_trace = false;
1887 		tracing_reset_online_cpus(&tr->trace_buffer);
1888 #ifdef CONFIG_TRACER_MAX_TRACE
1889 		tracing_reset_online_cpus(&tr->max_buffer);
1890 #endif
1891 	}
1892 }
1893 
1894 static int *tgid_map;
1895 
1896 #define SAVED_CMDLINES_DEFAULT 128
1897 #define NO_CMDLINE_MAP UINT_MAX
1898 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1899 struct saved_cmdlines_buffer {
1900 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1901 	unsigned *map_cmdline_to_pid;
1902 	unsigned cmdline_num;
1903 	int cmdline_idx;
1904 	char *saved_cmdlines;
1905 };
1906 static struct saved_cmdlines_buffer *savedcmd;
1907 
1908 /* temporary disable recording */
1909 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1910 
1911 static inline char *get_saved_cmdlines(int idx)
1912 {
1913 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1914 }
1915 
1916 static inline void set_cmdline(int idx, const char *cmdline)
1917 {
1918 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1919 }
1920 
1921 static int allocate_cmdlines_buffer(unsigned int val,
1922 				    struct saved_cmdlines_buffer *s)
1923 {
1924 	s->map_cmdline_to_pid = kmalloc_array(val,
1925 					      sizeof(*s->map_cmdline_to_pid),
1926 					      GFP_KERNEL);
1927 	if (!s->map_cmdline_to_pid)
1928 		return -ENOMEM;
1929 
1930 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1931 	if (!s->saved_cmdlines) {
1932 		kfree(s->map_cmdline_to_pid);
1933 		return -ENOMEM;
1934 	}
1935 
1936 	s->cmdline_idx = 0;
1937 	s->cmdline_num = val;
1938 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1939 	       sizeof(s->map_pid_to_cmdline));
1940 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1941 	       val * sizeof(*s->map_cmdline_to_pid));
1942 
1943 	return 0;
1944 }
1945 
1946 static int trace_create_savedcmd(void)
1947 {
1948 	int ret;
1949 
1950 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1951 	if (!savedcmd)
1952 		return -ENOMEM;
1953 
1954 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1955 	if (ret < 0) {
1956 		kfree(savedcmd);
1957 		savedcmd = NULL;
1958 		return -ENOMEM;
1959 	}
1960 
1961 	return 0;
1962 }
1963 
1964 int is_tracing_stopped(void)
1965 {
1966 	return global_trace.stop_count;
1967 }
1968 
1969 /**
1970  * tracing_start - quick start of the tracer
1971  *
1972  * If tracing is enabled but was stopped by tracing_stop,
1973  * this will start the tracer back up.
1974  */
1975 void tracing_start(void)
1976 {
1977 	struct ring_buffer *buffer;
1978 	unsigned long flags;
1979 
1980 	if (tracing_disabled)
1981 		return;
1982 
1983 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1984 	if (--global_trace.stop_count) {
1985 		if (global_trace.stop_count < 0) {
1986 			/* Someone screwed up their debugging */
1987 			WARN_ON_ONCE(1);
1988 			global_trace.stop_count = 0;
1989 		}
1990 		goto out;
1991 	}
1992 
1993 	/* Prevent the buffers from switching */
1994 	arch_spin_lock(&global_trace.max_lock);
1995 
1996 	buffer = global_trace.trace_buffer.buffer;
1997 	if (buffer)
1998 		ring_buffer_record_enable(buffer);
1999 
2000 #ifdef CONFIG_TRACER_MAX_TRACE
2001 	buffer = global_trace.max_buffer.buffer;
2002 	if (buffer)
2003 		ring_buffer_record_enable(buffer);
2004 #endif
2005 
2006 	arch_spin_unlock(&global_trace.max_lock);
2007 
2008  out:
2009 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2010 }
2011 
2012 static void tracing_start_tr(struct trace_array *tr)
2013 {
2014 	struct ring_buffer *buffer;
2015 	unsigned long flags;
2016 
2017 	if (tracing_disabled)
2018 		return;
2019 
2020 	/* If global, we need to also start the max tracer */
2021 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2022 		return tracing_start();
2023 
2024 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2025 
2026 	if (--tr->stop_count) {
2027 		if (tr->stop_count < 0) {
2028 			/* Someone screwed up their debugging */
2029 			WARN_ON_ONCE(1);
2030 			tr->stop_count = 0;
2031 		}
2032 		goto out;
2033 	}
2034 
2035 	buffer = tr->trace_buffer.buffer;
2036 	if (buffer)
2037 		ring_buffer_record_enable(buffer);
2038 
2039  out:
2040 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2041 }
2042 
2043 /**
2044  * tracing_stop - quick stop of the tracer
2045  *
2046  * Light weight way to stop tracing. Use in conjunction with
2047  * tracing_start.
2048  */
2049 void tracing_stop(void)
2050 {
2051 	struct ring_buffer *buffer;
2052 	unsigned long flags;
2053 
2054 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2055 	if (global_trace.stop_count++)
2056 		goto out;
2057 
2058 	/* Prevent the buffers from switching */
2059 	arch_spin_lock(&global_trace.max_lock);
2060 
2061 	buffer = global_trace.trace_buffer.buffer;
2062 	if (buffer)
2063 		ring_buffer_record_disable(buffer);
2064 
2065 #ifdef CONFIG_TRACER_MAX_TRACE
2066 	buffer = global_trace.max_buffer.buffer;
2067 	if (buffer)
2068 		ring_buffer_record_disable(buffer);
2069 #endif
2070 
2071 	arch_spin_unlock(&global_trace.max_lock);
2072 
2073  out:
2074 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2075 }
2076 
2077 static void tracing_stop_tr(struct trace_array *tr)
2078 {
2079 	struct ring_buffer *buffer;
2080 	unsigned long flags;
2081 
2082 	/* If global, we need to also stop the max tracer */
2083 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2084 		return tracing_stop();
2085 
2086 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2087 	if (tr->stop_count++)
2088 		goto out;
2089 
2090 	buffer = tr->trace_buffer.buffer;
2091 	if (buffer)
2092 		ring_buffer_record_disable(buffer);
2093 
2094  out:
2095 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2096 }
2097 
2098 static int trace_save_cmdline(struct task_struct *tsk)
2099 {
2100 	unsigned pid, idx;
2101 
2102 	/* treat recording of idle task as a success */
2103 	if (!tsk->pid)
2104 		return 1;
2105 
2106 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2107 		return 0;
2108 
2109 	/*
2110 	 * It's not the end of the world if we don't get
2111 	 * the lock, but we also don't want to spin
2112 	 * nor do we want to disable interrupts,
2113 	 * so if we miss here, then better luck next time.
2114 	 */
2115 	if (!arch_spin_trylock(&trace_cmdline_lock))
2116 		return 0;
2117 
2118 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2119 	if (idx == NO_CMDLINE_MAP) {
2120 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2121 
2122 		/*
2123 		 * Check whether the cmdline buffer at idx has a pid
2124 		 * mapped. We are going to overwrite that entry so we
2125 		 * need to clear the map_pid_to_cmdline. Otherwise we
2126 		 * would read the new comm for the old pid.
2127 		 */
2128 		pid = savedcmd->map_cmdline_to_pid[idx];
2129 		if (pid != NO_CMDLINE_MAP)
2130 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2131 
2132 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2133 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2134 
2135 		savedcmd->cmdline_idx = idx;
2136 	}
2137 
2138 	set_cmdline(idx, tsk->comm);
2139 
2140 	arch_spin_unlock(&trace_cmdline_lock);
2141 
2142 	return 1;
2143 }
2144 
2145 static void __trace_find_cmdline(int pid, char comm[])
2146 {
2147 	unsigned map;
2148 
2149 	if (!pid) {
2150 		strcpy(comm, "<idle>");
2151 		return;
2152 	}
2153 
2154 	if (WARN_ON_ONCE(pid < 0)) {
2155 		strcpy(comm, "<XXX>");
2156 		return;
2157 	}
2158 
2159 	if (pid > PID_MAX_DEFAULT) {
2160 		strcpy(comm, "<...>");
2161 		return;
2162 	}
2163 
2164 	map = savedcmd->map_pid_to_cmdline[pid];
2165 	if (map != NO_CMDLINE_MAP)
2166 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2167 	else
2168 		strcpy(comm, "<...>");
2169 }
2170 
2171 void trace_find_cmdline(int pid, char comm[])
2172 {
2173 	preempt_disable();
2174 	arch_spin_lock(&trace_cmdline_lock);
2175 
2176 	__trace_find_cmdline(pid, comm);
2177 
2178 	arch_spin_unlock(&trace_cmdline_lock);
2179 	preempt_enable();
2180 }
2181 
2182 int trace_find_tgid(int pid)
2183 {
2184 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2185 		return 0;
2186 
2187 	return tgid_map[pid];
2188 }
2189 
2190 static int trace_save_tgid(struct task_struct *tsk)
2191 {
2192 	/* treat recording of idle task as a success */
2193 	if (!tsk->pid)
2194 		return 1;
2195 
2196 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2197 		return 0;
2198 
2199 	tgid_map[tsk->pid] = tsk->tgid;
2200 	return 1;
2201 }
2202 
2203 static bool tracing_record_taskinfo_skip(int flags)
2204 {
2205 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2206 		return true;
2207 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2208 		return true;
2209 	if (!__this_cpu_read(trace_taskinfo_save))
2210 		return true;
2211 	return false;
2212 }
2213 
2214 /**
2215  * tracing_record_taskinfo - record the task info of a task
2216  *
2217  * @task  - task to record
2218  * @flags - TRACE_RECORD_CMDLINE for recording comm
2219  *        - TRACE_RECORD_TGID for recording tgid
2220  */
2221 void tracing_record_taskinfo(struct task_struct *task, int flags)
2222 {
2223 	bool done;
2224 
2225 	if (tracing_record_taskinfo_skip(flags))
2226 		return;
2227 
2228 	/*
2229 	 * Record as much task information as possible. If some fail, continue
2230 	 * to try to record the others.
2231 	 */
2232 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2233 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2234 
2235 	/* If recording any information failed, retry again soon. */
2236 	if (!done)
2237 		return;
2238 
2239 	__this_cpu_write(trace_taskinfo_save, false);
2240 }
2241 
2242 /**
2243  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2244  *
2245  * @prev - previous task during sched_switch
2246  * @next - next task during sched_switch
2247  * @flags - TRACE_RECORD_CMDLINE for recording comm
2248  *          TRACE_RECORD_TGID for recording tgid
2249  */
2250 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2251 					  struct task_struct *next, int flags)
2252 {
2253 	bool done;
2254 
2255 	if (tracing_record_taskinfo_skip(flags))
2256 		return;
2257 
2258 	/*
2259 	 * Record as much task information as possible. If some fail, continue
2260 	 * to try to record the others.
2261 	 */
2262 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2263 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2264 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2265 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2266 
2267 	/* If recording any information failed, retry again soon. */
2268 	if (!done)
2269 		return;
2270 
2271 	__this_cpu_write(trace_taskinfo_save, false);
2272 }
2273 
2274 /* Helpers to record a specific task information */
2275 void tracing_record_cmdline(struct task_struct *task)
2276 {
2277 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2278 }
2279 
2280 void tracing_record_tgid(struct task_struct *task)
2281 {
2282 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2283 }
2284 
2285 /*
2286  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2287  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2288  * simplifies those functions and keeps them in sync.
2289  */
2290 enum print_line_t trace_handle_return(struct trace_seq *s)
2291 {
2292 	return trace_seq_has_overflowed(s) ?
2293 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2294 }
2295 EXPORT_SYMBOL_GPL(trace_handle_return);
2296 
2297 void
2298 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2299 			     int pc)
2300 {
2301 	struct task_struct *tsk = current;
2302 
2303 	entry->preempt_count		= pc & 0xff;
2304 	entry->pid			= (tsk) ? tsk->pid : 0;
2305 	entry->flags =
2306 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2307 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2308 #else
2309 		TRACE_FLAG_IRQS_NOSUPPORT |
2310 #endif
2311 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2312 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2313 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2314 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2315 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2316 }
2317 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2318 
2319 struct ring_buffer_event *
2320 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2321 			  int type,
2322 			  unsigned long len,
2323 			  unsigned long flags, int pc)
2324 {
2325 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2326 }
2327 
2328 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2329 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2330 static int trace_buffered_event_ref;
2331 
2332 /**
2333  * trace_buffered_event_enable - enable buffering events
2334  *
2335  * When events are being filtered, it is quicker to use a temporary
2336  * buffer to write the event data into if there's a likely chance
2337  * that it will not be committed. The discard of the ring buffer
2338  * is not as fast as committing, and is much slower than copying
2339  * a commit.
2340  *
2341  * When an event is to be filtered, allocate per cpu buffers to
2342  * write the event data into, and if the event is filtered and discarded
2343  * it is simply dropped, otherwise, the entire data is to be committed
2344  * in one shot.
2345  */
2346 void trace_buffered_event_enable(void)
2347 {
2348 	struct ring_buffer_event *event;
2349 	struct page *page;
2350 	int cpu;
2351 
2352 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2353 
2354 	if (trace_buffered_event_ref++)
2355 		return;
2356 
2357 	for_each_tracing_cpu(cpu) {
2358 		page = alloc_pages_node(cpu_to_node(cpu),
2359 					GFP_KERNEL | __GFP_NORETRY, 0);
2360 		if (!page)
2361 			goto failed;
2362 
2363 		event = page_address(page);
2364 		memset(event, 0, sizeof(*event));
2365 
2366 		per_cpu(trace_buffered_event, cpu) = event;
2367 
2368 		preempt_disable();
2369 		if (cpu == smp_processor_id() &&
2370 		    this_cpu_read(trace_buffered_event) !=
2371 		    per_cpu(trace_buffered_event, cpu))
2372 			WARN_ON_ONCE(1);
2373 		preempt_enable();
2374 	}
2375 
2376 	return;
2377  failed:
2378 	trace_buffered_event_disable();
2379 }
2380 
2381 static void enable_trace_buffered_event(void *data)
2382 {
2383 	/* Probably not needed, but do it anyway */
2384 	smp_rmb();
2385 	this_cpu_dec(trace_buffered_event_cnt);
2386 }
2387 
2388 static void disable_trace_buffered_event(void *data)
2389 {
2390 	this_cpu_inc(trace_buffered_event_cnt);
2391 }
2392 
2393 /**
2394  * trace_buffered_event_disable - disable buffering events
2395  *
2396  * When a filter is removed, it is faster to not use the buffered
2397  * events, and to commit directly into the ring buffer. Free up
2398  * the temp buffers when there are no more users. This requires
2399  * special synchronization with current events.
2400  */
2401 void trace_buffered_event_disable(void)
2402 {
2403 	int cpu;
2404 
2405 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2406 
2407 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2408 		return;
2409 
2410 	if (--trace_buffered_event_ref)
2411 		return;
2412 
2413 	preempt_disable();
2414 	/* For each CPU, set the buffer as used. */
2415 	smp_call_function_many(tracing_buffer_mask,
2416 			       disable_trace_buffered_event, NULL, 1);
2417 	preempt_enable();
2418 
2419 	/* Wait for all current users to finish */
2420 	synchronize_rcu();
2421 
2422 	for_each_tracing_cpu(cpu) {
2423 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2424 		per_cpu(trace_buffered_event, cpu) = NULL;
2425 	}
2426 	/*
2427 	 * Make sure trace_buffered_event is NULL before clearing
2428 	 * trace_buffered_event_cnt.
2429 	 */
2430 	smp_wmb();
2431 
2432 	preempt_disable();
2433 	/* Do the work on each cpu */
2434 	smp_call_function_many(tracing_buffer_mask,
2435 			       enable_trace_buffered_event, NULL, 1);
2436 	preempt_enable();
2437 }
2438 
2439 static struct ring_buffer *temp_buffer;
2440 
2441 struct ring_buffer_event *
2442 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2443 			  struct trace_event_file *trace_file,
2444 			  int type, unsigned long len,
2445 			  unsigned long flags, int pc)
2446 {
2447 	struct ring_buffer_event *entry;
2448 	int val;
2449 
2450 	*current_rb = trace_file->tr->trace_buffer.buffer;
2451 
2452 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2453 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2454 	    (entry = this_cpu_read(trace_buffered_event))) {
2455 		/* Try to use the per cpu buffer first */
2456 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2457 		if (val == 1) {
2458 			trace_event_setup(entry, type, flags, pc);
2459 			entry->array[0] = len;
2460 			return entry;
2461 		}
2462 		this_cpu_dec(trace_buffered_event_cnt);
2463 	}
2464 
2465 	entry = __trace_buffer_lock_reserve(*current_rb,
2466 					    type, len, flags, pc);
2467 	/*
2468 	 * If tracing is off, but we have triggers enabled
2469 	 * we still need to look at the event data. Use the temp_buffer
2470 	 * to store the trace event for the tigger to use. It's recusive
2471 	 * safe and will not be recorded anywhere.
2472 	 */
2473 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2474 		*current_rb = temp_buffer;
2475 		entry = __trace_buffer_lock_reserve(*current_rb,
2476 						    type, len, flags, pc);
2477 	}
2478 	return entry;
2479 }
2480 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2481 
2482 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2483 static DEFINE_MUTEX(tracepoint_printk_mutex);
2484 
2485 static void output_printk(struct trace_event_buffer *fbuffer)
2486 {
2487 	struct trace_event_call *event_call;
2488 	struct trace_event *event;
2489 	unsigned long flags;
2490 	struct trace_iterator *iter = tracepoint_print_iter;
2491 
2492 	/* We should never get here if iter is NULL */
2493 	if (WARN_ON_ONCE(!iter))
2494 		return;
2495 
2496 	event_call = fbuffer->trace_file->event_call;
2497 	if (!event_call || !event_call->event.funcs ||
2498 	    !event_call->event.funcs->trace)
2499 		return;
2500 
2501 	event = &fbuffer->trace_file->event_call->event;
2502 
2503 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2504 	trace_seq_init(&iter->seq);
2505 	iter->ent = fbuffer->entry;
2506 	event_call->event.funcs->trace(iter, 0, event);
2507 	trace_seq_putc(&iter->seq, 0);
2508 	printk("%s", iter->seq.buffer);
2509 
2510 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2511 }
2512 
2513 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2514 			     void __user *buffer, size_t *lenp,
2515 			     loff_t *ppos)
2516 {
2517 	int save_tracepoint_printk;
2518 	int ret;
2519 
2520 	mutex_lock(&tracepoint_printk_mutex);
2521 	save_tracepoint_printk = tracepoint_printk;
2522 
2523 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2524 
2525 	/*
2526 	 * This will force exiting early, as tracepoint_printk
2527 	 * is always zero when tracepoint_printk_iter is not allocated
2528 	 */
2529 	if (!tracepoint_print_iter)
2530 		tracepoint_printk = 0;
2531 
2532 	if (save_tracepoint_printk == tracepoint_printk)
2533 		goto out;
2534 
2535 	if (tracepoint_printk)
2536 		static_key_enable(&tracepoint_printk_key.key);
2537 	else
2538 		static_key_disable(&tracepoint_printk_key.key);
2539 
2540  out:
2541 	mutex_unlock(&tracepoint_printk_mutex);
2542 
2543 	return ret;
2544 }
2545 
2546 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2547 {
2548 	if (static_key_false(&tracepoint_printk_key.key))
2549 		output_printk(fbuffer);
2550 
2551 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2552 				    fbuffer->event, fbuffer->entry,
2553 				    fbuffer->flags, fbuffer->pc);
2554 }
2555 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2556 
2557 /*
2558  * Skip 3:
2559  *
2560  *   trace_buffer_unlock_commit_regs()
2561  *   trace_event_buffer_commit()
2562  *   trace_event_raw_event_xxx()
2563  */
2564 # define STACK_SKIP 3
2565 
2566 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2567 				     struct ring_buffer *buffer,
2568 				     struct ring_buffer_event *event,
2569 				     unsigned long flags, int pc,
2570 				     struct pt_regs *regs)
2571 {
2572 	__buffer_unlock_commit(buffer, event);
2573 
2574 	/*
2575 	 * If regs is not set, then skip the necessary functions.
2576 	 * Note, we can still get here via blktrace, wakeup tracer
2577 	 * and mmiotrace, but that's ok if they lose a function or
2578 	 * two. They are not that meaningful.
2579 	 */
2580 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2581 	ftrace_trace_userstack(buffer, flags, pc);
2582 }
2583 
2584 /*
2585  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2586  */
2587 void
2588 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2589 				   struct ring_buffer_event *event)
2590 {
2591 	__buffer_unlock_commit(buffer, event);
2592 }
2593 
2594 static void
2595 trace_process_export(struct trace_export *export,
2596 	       struct ring_buffer_event *event)
2597 {
2598 	struct trace_entry *entry;
2599 	unsigned int size = 0;
2600 
2601 	entry = ring_buffer_event_data(event);
2602 	size = ring_buffer_event_length(event);
2603 	export->write(export, entry, size);
2604 }
2605 
2606 static DEFINE_MUTEX(ftrace_export_lock);
2607 
2608 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2609 
2610 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2611 
2612 static inline void ftrace_exports_enable(void)
2613 {
2614 	static_branch_enable(&ftrace_exports_enabled);
2615 }
2616 
2617 static inline void ftrace_exports_disable(void)
2618 {
2619 	static_branch_disable(&ftrace_exports_enabled);
2620 }
2621 
2622 static void ftrace_exports(struct ring_buffer_event *event)
2623 {
2624 	struct trace_export *export;
2625 
2626 	preempt_disable_notrace();
2627 
2628 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2629 	while (export) {
2630 		trace_process_export(export, event);
2631 		export = rcu_dereference_raw_notrace(export->next);
2632 	}
2633 
2634 	preempt_enable_notrace();
2635 }
2636 
2637 static inline void
2638 add_trace_export(struct trace_export **list, struct trace_export *export)
2639 {
2640 	rcu_assign_pointer(export->next, *list);
2641 	/*
2642 	 * We are entering export into the list but another
2643 	 * CPU might be walking that list. We need to make sure
2644 	 * the export->next pointer is valid before another CPU sees
2645 	 * the export pointer included into the list.
2646 	 */
2647 	rcu_assign_pointer(*list, export);
2648 }
2649 
2650 static inline int
2651 rm_trace_export(struct trace_export **list, struct trace_export *export)
2652 {
2653 	struct trace_export **p;
2654 
2655 	for (p = list; *p != NULL; p = &(*p)->next)
2656 		if (*p == export)
2657 			break;
2658 
2659 	if (*p != export)
2660 		return -1;
2661 
2662 	rcu_assign_pointer(*p, (*p)->next);
2663 
2664 	return 0;
2665 }
2666 
2667 static inline void
2668 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670 	if (*list == NULL)
2671 		ftrace_exports_enable();
2672 
2673 	add_trace_export(list, export);
2674 }
2675 
2676 static inline int
2677 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2678 {
2679 	int ret;
2680 
2681 	ret = rm_trace_export(list, export);
2682 	if (*list == NULL)
2683 		ftrace_exports_disable();
2684 
2685 	return ret;
2686 }
2687 
2688 int register_ftrace_export(struct trace_export *export)
2689 {
2690 	if (WARN_ON_ONCE(!export->write))
2691 		return -1;
2692 
2693 	mutex_lock(&ftrace_export_lock);
2694 
2695 	add_ftrace_export(&ftrace_exports_list, export);
2696 
2697 	mutex_unlock(&ftrace_export_lock);
2698 
2699 	return 0;
2700 }
2701 EXPORT_SYMBOL_GPL(register_ftrace_export);
2702 
2703 int unregister_ftrace_export(struct trace_export *export)
2704 {
2705 	int ret;
2706 
2707 	mutex_lock(&ftrace_export_lock);
2708 
2709 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2710 
2711 	mutex_unlock(&ftrace_export_lock);
2712 
2713 	return ret;
2714 }
2715 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2716 
2717 void
2718 trace_function(struct trace_array *tr,
2719 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2720 	       int pc)
2721 {
2722 	struct trace_event_call *call = &event_function;
2723 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2724 	struct ring_buffer_event *event;
2725 	struct ftrace_entry *entry;
2726 
2727 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2728 					    flags, pc);
2729 	if (!event)
2730 		return;
2731 	entry	= ring_buffer_event_data(event);
2732 	entry->ip			= ip;
2733 	entry->parent_ip		= parent_ip;
2734 
2735 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2736 		if (static_branch_unlikely(&ftrace_exports_enabled))
2737 			ftrace_exports(event);
2738 		__buffer_unlock_commit(buffer, event);
2739 	}
2740 }
2741 
2742 #ifdef CONFIG_STACKTRACE
2743 
2744 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2745 struct ftrace_stack {
2746 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2747 };
2748 
2749 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2750 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2751 
2752 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2753 				 unsigned long flags,
2754 				 int skip, int pc, struct pt_regs *regs)
2755 {
2756 	struct trace_event_call *call = &event_kernel_stack;
2757 	struct ring_buffer_event *event;
2758 	struct stack_entry *entry;
2759 	struct stack_trace trace;
2760 	int use_stack;
2761 	int size = FTRACE_STACK_ENTRIES;
2762 
2763 	trace.nr_entries	= 0;
2764 	trace.skip		= skip;
2765 
2766 	/*
2767 	 * Add one, for this function and the call to save_stack_trace()
2768 	 * If regs is set, then these functions will not be in the way.
2769 	 */
2770 #ifndef CONFIG_UNWINDER_ORC
2771 	if (!regs)
2772 		trace.skip++;
2773 #endif
2774 
2775 	/*
2776 	 * Since events can happen in NMIs there's no safe way to
2777 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2778 	 * or NMI comes in, it will just have to use the default
2779 	 * FTRACE_STACK_SIZE.
2780 	 */
2781 	preempt_disable_notrace();
2782 
2783 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2784 	/*
2785 	 * We don't need any atomic variables, just a barrier.
2786 	 * If an interrupt comes in, we don't care, because it would
2787 	 * have exited and put the counter back to what we want.
2788 	 * We just need a barrier to keep gcc from moving things
2789 	 * around.
2790 	 */
2791 	barrier();
2792 	if (use_stack == 1) {
2793 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2794 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2795 
2796 		if (regs)
2797 			save_stack_trace_regs(regs, &trace);
2798 		else
2799 			save_stack_trace(&trace);
2800 
2801 		if (trace.nr_entries > size)
2802 			size = trace.nr_entries;
2803 	} else
2804 		/* From now on, use_stack is a boolean */
2805 		use_stack = 0;
2806 
2807 	size *= sizeof(unsigned long);
2808 
2809 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2810 					    sizeof(*entry) + size, flags, pc);
2811 	if (!event)
2812 		goto out;
2813 	entry = ring_buffer_event_data(event);
2814 
2815 	memset(&entry->caller, 0, size);
2816 
2817 	if (use_stack)
2818 		memcpy(&entry->caller, trace.entries,
2819 		       trace.nr_entries * sizeof(unsigned long));
2820 	else {
2821 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2822 		trace.entries		= entry->caller;
2823 		if (regs)
2824 			save_stack_trace_regs(regs, &trace);
2825 		else
2826 			save_stack_trace(&trace);
2827 	}
2828 
2829 	entry->size = trace.nr_entries;
2830 
2831 	if (!call_filter_check_discard(call, entry, buffer, event))
2832 		__buffer_unlock_commit(buffer, event);
2833 
2834  out:
2835 	/* Again, don't let gcc optimize things here */
2836 	barrier();
2837 	__this_cpu_dec(ftrace_stack_reserve);
2838 	preempt_enable_notrace();
2839 
2840 }
2841 
2842 static inline void ftrace_trace_stack(struct trace_array *tr,
2843 				      struct ring_buffer *buffer,
2844 				      unsigned long flags,
2845 				      int skip, int pc, struct pt_regs *regs)
2846 {
2847 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2848 		return;
2849 
2850 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2851 }
2852 
2853 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2854 		   int pc)
2855 {
2856 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2857 
2858 	if (rcu_is_watching()) {
2859 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2860 		return;
2861 	}
2862 
2863 	/*
2864 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2865 	 * but if the above rcu_is_watching() failed, then the NMI
2866 	 * triggered someplace critical, and rcu_irq_enter() should
2867 	 * not be called from NMI.
2868 	 */
2869 	if (unlikely(in_nmi()))
2870 		return;
2871 
2872 	rcu_irq_enter_irqson();
2873 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2874 	rcu_irq_exit_irqson();
2875 }
2876 
2877 /**
2878  * trace_dump_stack - record a stack back trace in the trace buffer
2879  * @skip: Number of functions to skip (helper handlers)
2880  */
2881 void trace_dump_stack(int skip)
2882 {
2883 	unsigned long flags;
2884 
2885 	if (tracing_disabled || tracing_selftest_running)
2886 		return;
2887 
2888 	local_save_flags(flags);
2889 
2890 #ifndef CONFIG_UNWINDER_ORC
2891 	/* Skip 1 to skip this function. */
2892 	skip++;
2893 #endif
2894 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2895 			     flags, skip, preempt_count(), NULL);
2896 }
2897 EXPORT_SYMBOL_GPL(trace_dump_stack);
2898 
2899 static DEFINE_PER_CPU(int, user_stack_count);
2900 
2901 void
2902 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2903 {
2904 	struct trace_event_call *call = &event_user_stack;
2905 	struct ring_buffer_event *event;
2906 	struct userstack_entry *entry;
2907 	struct stack_trace trace;
2908 
2909 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2910 		return;
2911 
2912 	/*
2913 	 * NMIs can not handle page faults, even with fix ups.
2914 	 * The save user stack can (and often does) fault.
2915 	 */
2916 	if (unlikely(in_nmi()))
2917 		return;
2918 
2919 	/*
2920 	 * prevent recursion, since the user stack tracing may
2921 	 * trigger other kernel events.
2922 	 */
2923 	preempt_disable();
2924 	if (__this_cpu_read(user_stack_count))
2925 		goto out;
2926 
2927 	__this_cpu_inc(user_stack_count);
2928 
2929 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2930 					    sizeof(*entry), flags, pc);
2931 	if (!event)
2932 		goto out_drop_count;
2933 	entry	= ring_buffer_event_data(event);
2934 
2935 	entry->tgid		= current->tgid;
2936 	memset(&entry->caller, 0, sizeof(entry->caller));
2937 
2938 	trace.nr_entries	= 0;
2939 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2940 	trace.skip		= 0;
2941 	trace.entries		= entry->caller;
2942 
2943 	save_stack_trace_user(&trace);
2944 	if (!call_filter_check_discard(call, entry, buffer, event))
2945 		__buffer_unlock_commit(buffer, event);
2946 
2947  out_drop_count:
2948 	__this_cpu_dec(user_stack_count);
2949  out:
2950 	preempt_enable();
2951 }
2952 
2953 #ifdef UNUSED
2954 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2955 {
2956 	ftrace_trace_userstack(tr, flags, preempt_count());
2957 }
2958 #endif /* UNUSED */
2959 
2960 #endif /* CONFIG_STACKTRACE */
2961 
2962 /* created for use with alloc_percpu */
2963 struct trace_buffer_struct {
2964 	int nesting;
2965 	char buffer[4][TRACE_BUF_SIZE];
2966 };
2967 
2968 static struct trace_buffer_struct *trace_percpu_buffer;
2969 
2970 /*
2971  * Thise allows for lockless recording.  If we're nested too deeply, then
2972  * this returns NULL.
2973  */
2974 static char *get_trace_buf(void)
2975 {
2976 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2977 
2978 	if (!buffer || buffer->nesting >= 4)
2979 		return NULL;
2980 
2981 	buffer->nesting++;
2982 
2983 	/* Interrupts must see nesting incremented before we use the buffer */
2984 	barrier();
2985 	return &buffer->buffer[buffer->nesting][0];
2986 }
2987 
2988 static void put_trace_buf(void)
2989 {
2990 	/* Don't let the decrement of nesting leak before this */
2991 	barrier();
2992 	this_cpu_dec(trace_percpu_buffer->nesting);
2993 }
2994 
2995 static int alloc_percpu_trace_buffer(void)
2996 {
2997 	struct trace_buffer_struct *buffers;
2998 
2999 	buffers = alloc_percpu(struct trace_buffer_struct);
3000 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3001 		return -ENOMEM;
3002 
3003 	trace_percpu_buffer = buffers;
3004 	return 0;
3005 }
3006 
3007 static int buffers_allocated;
3008 
3009 void trace_printk_init_buffers(void)
3010 {
3011 	if (buffers_allocated)
3012 		return;
3013 
3014 	if (alloc_percpu_trace_buffer())
3015 		return;
3016 
3017 	/* trace_printk() is for debug use only. Don't use it in production. */
3018 
3019 	pr_warn("\n");
3020 	pr_warn("**********************************************************\n");
3021 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3022 	pr_warn("**                                                      **\n");
3023 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3024 	pr_warn("**                                                      **\n");
3025 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3026 	pr_warn("** unsafe for production use.                           **\n");
3027 	pr_warn("**                                                      **\n");
3028 	pr_warn("** If you see this message and you are not debugging    **\n");
3029 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3030 	pr_warn("**                                                      **\n");
3031 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3032 	pr_warn("**********************************************************\n");
3033 
3034 	/* Expand the buffers to set size */
3035 	tracing_update_buffers();
3036 
3037 	buffers_allocated = 1;
3038 
3039 	/*
3040 	 * trace_printk_init_buffers() can be called by modules.
3041 	 * If that happens, then we need to start cmdline recording
3042 	 * directly here. If the global_trace.buffer is already
3043 	 * allocated here, then this was called by module code.
3044 	 */
3045 	if (global_trace.trace_buffer.buffer)
3046 		tracing_start_cmdline_record();
3047 }
3048 
3049 void trace_printk_start_comm(void)
3050 {
3051 	/* Start tracing comms if trace printk is set */
3052 	if (!buffers_allocated)
3053 		return;
3054 	tracing_start_cmdline_record();
3055 }
3056 
3057 static void trace_printk_start_stop_comm(int enabled)
3058 {
3059 	if (!buffers_allocated)
3060 		return;
3061 
3062 	if (enabled)
3063 		tracing_start_cmdline_record();
3064 	else
3065 		tracing_stop_cmdline_record();
3066 }
3067 
3068 /**
3069  * trace_vbprintk - write binary msg to tracing buffer
3070  *
3071  */
3072 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3073 {
3074 	struct trace_event_call *call = &event_bprint;
3075 	struct ring_buffer_event *event;
3076 	struct ring_buffer *buffer;
3077 	struct trace_array *tr = &global_trace;
3078 	struct bprint_entry *entry;
3079 	unsigned long flags;
3080 	char *tbuffer;
3081 	int len = 0, size, pc;
3082 
3083 	if (unlikely(tracing_selftest_running || tracing_disabled))
3084 		return 0;
3085 
3086 	/* Don't pollute graph traces with trace_vprintk internals */
3087 	pause_graph_tracing();
3088 
3089 	pc = preempt_count();
3090 	preempt_disable_notrace();
3091 
3092 	tbuffer = get_trace_buf();
3093 	if (!tbuffer) {
3094 		len = 0;
3095 		goto out_nobuffer;
3096 	}
3097 
3098 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3099 
3100 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3101 		goto out;
3102 
3103 	local_save_flags(flags);
3104 	size = sizeof(*entry) + sizeof(u32) * len;
3105 	buffer = tr->trace_buffer.buffer;
3106 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3107 					    flags, pc);
3108 	if (!event)
3109 		goto out;
3110 	entry = ring_buffer_event_data(event);
3111 	entry->ip			= ip;
3112 	entry->fmt			= fmt;
3113 
3114 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3115 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3116 		__buffer_unlock_commit(buffer, event);
3117 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3118 	}
3119 
3120 out:
3121 	put_trace_buf();
3122 
3123 out_nobuffer:
3124 	preempt_enable_notrace();
3125 	unpause_graph_tracing();
3126 
3127 	return len;
3128 }
3129 EXPORT_SYMBOL_GPL(trace_vbprintk);
3130 
3131 __printf(3, 0)
3132 static int
3133 __trace_array_vprintk(struct ring_buffer *buffer,
3134 		      unsigned long ip, const char *fmt, va_list args)
3135 {
3136 	struct trace_event_call *call = &event_print;
3137 	struct ring_buffer_event *event;
3138 	int len = 0, size, pc;
3139 	struct print_entry *entry;
3140 	unsigned long flags;
3141 	char *tbuffer;
3142 
3143 	if (tracing_disabled || tracing_selftest_running)
3144 		return 0;
3145 
3146 	/* Don't pollute graph traces with trace_vprintk internals */
3147 	pause_graph_tracing();
3148 
3149 	pc = preempt_count();
3150 	preempt_disable_notrace();
3151 
3152 
3153 	tbuffer = get_trace_buf();
3154 	if (!tbuffer) {
3155 		len = 0;
3156 		goto out_nobuffer;
3157 	}
3158 
3159 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3160 
3161 	local_save_flags(flags);
3162 	size = sizeof(*entry) + len + 1;
3163 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3164 					    flags, pc);
3165 	if (!event)
3166 		goto out;
3167 	entry = ring_buffer_event_data(event);
3168 	entry->ip = ip;
3169 
3170 	memcpy(&entry->buf, tbuffer, len + 1);
3171 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3172 		__buffer_unlock_commit(buffer, event);
3173 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3174 	}
3175 
3176 out:
3177 	put_trace_buf();
3178 
3179 out_nobuffer:
3180 	preempt_enable_notrace();
3181 	unpause_graph_tracing();
3182 
3183 	return len;
3184 }
3185 
3186 __printf(3, 0)
3187 int trace_array_vprintk(struct trace_array *tr,
3188 			unsigned long ip, const char *fmt, va_list args)
3189 {
3190 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3191 }
3192 
3193 __printf(3, 0)
3194 int trace_array_printk(struct trace_array *tr,
3195 		       unsigned long ip, const char *fmt, ...)
3196 {
3197 	int ret;
3198 	va_list ap;
3199 
3200 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3201 		return 0;
3202 
3203 	va_start(ap, fmt);
3204 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3205 	va_end(ap);
3206 	return ret;
3207 }
3208 
3209 __printf(3, 4)
3210 int trace_array_printk_buf(struct ring_buffer *buffer,
3211 			   unsigned long ip, const char *fmt, ...)
3212 {
3213 	int ret;
3214 	va_list ap;
3215 
3216 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3217 		return 0;
3218 
3219 	va_start(ap, fmt);
3220 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3221 	va_end(ap);
3222 	return ret;
3223 }
3224 
3225 __printf(2, 0)
3226 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3227 {
3228 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3229 }
3230 EXPORT_SYMBOL_GPL(trace_vprintk);
3231 
3232 static void trace_iterator_increment(struct trace_iterator *iter)
3233 {
3234 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3235 
3236 	iter->idx++;
3237 	if (buf_iter)
3238 		ring_buffer_read(buf_iter, NULL);
3239 }
3240 
3241 static struct trace_entry *
3242 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3243 		unsigned long *lost_events)
3244 {
3245 	struct ring_buffer_event *event;
3246 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3247 
3248 	if (buf_iter)
3249 		event = ring_buffer_iter_peek(buf_iter, ts);
3250 	else
3251 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3252 					 lost_events);
3253 
3254 	if (event) {
3255 		iter->ent_size = ring_buffer_event_length(event);
3256 		return ring_buffer_event_data(event);
3257 	}
3258 	iter->ent_size = 0;
3259 	return NULL;
3260 }
3261 
3262 static struct trace_entry *
3263 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3264 		  unsigned long *missing_events, u64 *ent_ts)
3265 {
3266 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3267 	struct trace_entry *ent, *next = NULL;
3268 	unsigned long lost_events = 0, next_lost = 0;
3269 	int cpu_file = iter->cpu_file;
3270 	u64 next_ts = 0, ts;
3271 	int next_cpu = -1;
3272 	int next_size = 0;
3273 	int cpu;
3274 
3275 	/*
3276 	 * If we are in a per_cpu trace file, don't bother by iterating over
3277 	 * all cpu and peek directly.
3278 	 */
3279 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3280 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3281 			return NULL;
3282 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3283 		if (ent_cpu)
3284 			*ent_cpu = cpu_file;
3285 
3286 		return ent;
3287 	}
3288 
3289 	for_each_tracing_cpu(cpu) {
3290 
3291 		if (ring_buffer_empty_cpu(buffer, cpu))
3292 			continue;
3293 
3294 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3295 
3296 		/*
3297 		 * Pick the entry with the smallest timestamp:
3298 		 */
3299 		if (ent && (!next || ts < next_ts)) {
3300 			next = ent;
3301 			next_cpu = cpu;
3302 			next_ts = ts;
3303 			next_lost = lost_events;
3304 			next_size = iter->ent_size;
3305 		}
3306 	}
3307 
3308 	iter->ent_size = next_size;
3309 
3310 	if (ent_cpu)
3311 		*ent_cpu = next_cpu;
3312 
3313 	if (ent_ts)
3314 		*ent_ts = next_ts;
3315 
3316 	if (missing_events)
3317 		*missing_events = next_lost;
3318 
3319 	return next;
3320 }
3321 
3322 /* Find the next real entry, without updating the iterator itself */
3323 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3324 					  int *ent_cpu, u64 *ent_ts)
3325 {
3326 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3327 }
3328 
3329 /* Find the next real entry, and increment the iterator to the next entry */
3330 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3331 {
3332 	iter->ent = __find_next_entry(iter, &iter->cpu,
3333 				      &iter->lost_events, &iter->ts);
3334 
3335 	if (iter->ent)
3336 		trace_iterator_increment(iter);
3337 
3338 	return iter->ent ? iter : NULL;
3339 }
3340 
3341 static void trace_consume(struct trace_iterator *iter)
3342 {
3343 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3344 			    &iter->lost_events);
3345 }
3346 
3347 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3348 {
3349 	struct trace_iterator *iter = m->private;
3350 	int i = (int)*pos;
3351 	void *ent;
3352 
3353 	WARN_ON_ONCE(iter->leftover);
3354 
3355 	(*pos)++;
3356 
3357 	/* can't go backwards */
3358 	if (iter->idx > i)
3359 		return NULL;
3360 
3361 	if (iter->idx < 0)
3362 		ent = trace_find_next_entry_inc(iter);
3363 	else
3364 		ent = iter;
3365 
3366 	while (ent && iter->idx < i)
3367 		ent = trace_find_next_entry_inc(iter);
3368 
3369 	iter->pos = *pos;
3370 
3371 	return ent;
3372 }
3373 
3374 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3375 {
3376 	struct ring_buffer_event *event;
3377 	struct ring_buffer_iter *buf_iter;
3378 	unsigned long entries = 0;
3379 	u64 ts;
3380 
3381 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3382 
3383 	buf_iter = trace_buffer_iter(iter, cpu);
3384 	if (!buf_iter)
3385 		return;
3386 
3387 	ring_buffer_iter_reset(buf_iter);
3388 
3389 	/*
3390 	 * We could have the case with the max latency tracers
3391 	 * that a reset never took place on a cpu. This is evident
3392 	 * by the timestamp being before the start of the buffer.
3393 	 */
3394 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3395 		if (ts >= iter->trace_buffer->time_start)
3396 			break;
3397 		entries++;
3398 		ring_buffer_read(buf_iter, NULL);
3399 	}
3400 
3401 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3402 }
3403 
3404 /*
3405  * The current tracer is copied to avoid a global locking
3406  * all around.
3407  */
3408 static void *s_start(struct seq_file *m, loff_t *pos)
3409 {
3410 	struct trace_iterator *iter = m->private;
3411 	struct trace_array *tr = iter->tr;
3412 	int cpu_file = iter->cpu_file;
3413 	void *p = NULL;
3414 	loff_t l = 0;
3415 	int cpu;
3416 
3417 	/*
3418 	 * copy the tracer to avoid using a global lock all around.
3419 	 * iter->trace is a copy of current_trace, the pointer to the
3420 	 * name may be used instead of a strcmp(), as iter->trace->name
3421 	 * will point to the same string as current_trace->name.
3422 	 */
3423 	mutex_lock(&trace_types_lock);
3424 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3425 		*iter->trace = *tr->current_trace;
3426 	mutex_unlock(&trace_types_lock);
3427 
3428 #ifdef CONFIG_TRACER_MAX_TRACE
3429 	if (iter->snapshot && iter->trace->use_max_tr)
3430 		return ERR_PTR(-EBUSY);
3431 #endif
3432 
3433 	if (!iter->snapshot)
3434 		atomic_inc(&trace_record_taskinfo_disabled);
3435 
3436 	if (*pos != iter->pos) {
3437 		iter->ent = NULL;
3438 		iter->cpu = 0;
3439 		iter->idx = -1;
3440 
3441 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3442 			for_each_tracing_cpu(cpu)
3443 				tracing_iter_reset(iter, cpu);
3444 		} else
3445 			tracing_iter_reset(iter, cpu_file);
3446 
3447 		iter->leftover = 0;
3448 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3449 			;
3450 
3451 	} else {
3452 		/*
3453 		 * If we overflowed the seq_file before, then we want
3454 		 * to just reuse the trace_seq buffer again.
3455 		 */
3456 		if (iter->leftover)
3457 			p = iter;
3458 		else {
3459 			l = *pos - 1;
3460 			p = s_next(m, p, &l);
3461 		}
3462 	}
3463 
3464 	trace_event_read_lock();
3465 	trace_access_lock(cpu_file);
3466 	return p;
3467 }
3468 
3469 static void s_stop(struct seq_file *m, void *p)
3470 {
3471 	struct trace_iterator *iter = m->private;
3472 
3473 #ifdef CONFIG_TRACER_MAX_TRACE
3474 	if (iter->snapshot && iter->trace->use_max_tr)
3475 		return;
3476 #endif
3477 
3478 	if (!iter->snapshot)
3479 		atomic_dec(&trace_record_taskinfo_disabled);
3480 
3481 	trace_access_unlock(iter->cpu_file);
3482 	trace_event_read_unlock();
3483 }
3484 
3485 static void
3486 get_total_entries(struct trace_buffer *buf,
3487 		  unsigned long *total, unsigned long *entries)
3488 {
3489 	unsigned long count;
3490 	int cpu;
3491 
3492 	*total = 0;
3493 	*entries = 0;
3494 
3495 	for_each_tracing_cpu(cpu) {
3496 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3497 		/*
3498 		 * If this buffer has skipped entries, then we hold all
3499 		 * entries for the trace and we need to ignore the
3500 		 * ones before the time stamp.
3501 		 */
3502 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3503 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3504 			/* total is the same as the entries */
3505 			*total += count;
3506 		} else
3507 			*total += count +
3508 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3509 		*entries += count;
3510 	}
3511 }
3512 
3513 static void print_lat_help_header(struct seq_file *m)
3514 {
3515 	seq_puts(m, "#                  _------=> CPU#            \n"
3516 		    "#                 / _-----=> irqs-off        \n"
3517 		    "#                | / _----=> need-resched    \n"
3518 		    "#                || / _---=> hardirq/softirq \n"
3519 		    "#                ||| / _--=> preempt-depth   \n"
3520 		    "#                |||| /     delay            \n"
3521 		    "#  cmd     pid   ||||| time  |   caller      \n"
3522 		    "#     \\   /      |||||  \\    |   /         \n");
3523 }
3524 
3525 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3526 {
3527 	unsigned long total;
3528 	unsigned long entries;
3529 
3530 	get_total_entries(buf, &total, &entries);
3531 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3532 		   entries, total, num_online_cpus());
3533 	seq_puts(m, "#\n");
3534 }
3535 
3536 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3537 				   unsigned int flags)
3538 {
3539 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3540 
3541 	print_event_info(buf, m);
3542 
3543 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3544 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3545 }
3546 
3547 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3548 				       unsigned int flags)
3549 {
3550 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3551 	const char tgid_space[] = "          ";
3552 	const char space[] = "  ";
3553 
3554 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3555 		   tgid ? tgid_space : space);
3556 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3557 		   tgid ? tgid_space : space);
3558 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3559 		   tgid ? tgid_space : space);
3560 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3561 		   tgid ? tgid_space : space);
3562 	seq_printf(m, "#                          %s||| /     delay\n",
3563 		   tgid ? tgid_space : space);
3564 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3565 		   tgid ? "   TGID   " : space);
3566 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3567 		   tgid ? "     |    " : space);
3568 }
3569 
3570 void
3571 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3572 {
3573 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3574 	struct trace_buffer *buf = iter->trace_buffer;
3575 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3576 	struct tracer *type = iter->trace;
3577 	unsigned long entries;
3578 	unsigned long total;
3579 	const char *name = "preemption";
3580 
3581 	name = type->name;
3582 
3583 	get_total_entries(buf, &total, &entries);
3584 
3585 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3586 		   name, UTS_RELEASE);
3587 	seq_puts(m, "# -----------------------------------"
3588 		 "---------------------------------\n");
3589 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3590 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3591 		   nsecs_to_usecs(data->saved_latency),
3592 		   entries,
3593 		   total,
3594 		   buf->cpu,
3595 #if defined(CONFIG_PREEMPT_NONE)
3596 		   "server",
3597 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3598 		   "desktop",
3599 #elif defined(CONFIG_PREEMPT)
3600 		   "preempt",
3601 #else
3602 		   "unknown",
3603 #endif
3604 		   /* These are reserved for later use */
3605 		   0, 0, 0, 0);
3606 #ifdef CONFIG_SMP
3607 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3608 #else
3609 	seq_puts(m, ")\n");
3610 #endif
3611 	seq_puts(m, "#    -----------------\n");
3612 	seq_printf(m, "#    | task: %.16s-%d "
3613 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3614 		   data->comm, data->pid,
3615 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3616 		   data->policy, data->rt_priority);
3617 	seq_puts(m, "#    -----------------\n");
3618 
3619 	if (data->critical_start) {
3620 		seq_puts(m, "#  => started at: ");
3621 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3622 		trace_print_seq(m, &iter->seq);
3623 		seq_puts(m, "\n#  => ended at:   ");
3624 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3625 		trace_print_seq(m, &iter->seq);
3626 		seq_puts(m, "\n#\n");
3627 	}
3628 
3629 	seq_puts(m, "#\n");
3630 }
3631 
3632 static void test_cpu_buff_start(struct trace_iterator *iter)
3633 {
3634 	struct trace_seq *s = &iter->seq;
3635 	struct trace_array *tr = iter->tr;
3636 
3637 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3638 		return;
3639 
3640 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3641 		return;
3642 
3643 	if (cpumask_available(iter->started) &&
3644 	    cpumask_test_cpu(iter->cpu, iter->started))
3645 		return;
3646 
3647 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3648 		return;
3649 
3650 	if (cpumask_available(iter->started))
3651 		cpumask_set_cpu(iter->cpu, iter->started);
3652 
3653 	/* Don't print started cpu buffer for the first entry of the trace */
3654 	if (iter->idx > 1)
3655 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3656 				iter->cpu);
3657 }
3658 
3659 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3660 {
3661 	struct trace_array *tr = iter->tr;
3662 	struct trace_seq *s = &iter->seq;
3663 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3664 	struct trace_entry *entry;
3665 	struct trace_event *event;
3666 
3667 	entry = iter->ent;
3668 
3669 	test_cpu_buff_start(iter);
3670 
3671 	event = ftrace_find_event(entry->type);
3672 
3673 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3674 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3675 			trace_print_lat_context(iter);
3676 		else
3677 			trace_print_context(iter);
3678 	}
3679 
3680 	if (trace_seq_has_overflowed(s))
3681 		return TRACE_TYPE_PARTIAL_LINE;
3682 
3683 	if (event)
3684 		return event->funcs->trace(iter, sym_flags, event);
3685 
3686 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3687 
3688 	return trace_handle_return(s);
3689 }
3690 
3691 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3692 {
3693 	struct trace_array *tr = iter->tr;
3694 	struct trace_seq *s = &iter->seq;
3695 	struct trace_entry *entry;
3696 	struct trace_event *event;
3697 
3698 	entry = iter->ent;
3699 
3700 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3701 		trace_seq_printf(s, "%d %d %llu ",
3702 				 entry->pid, iter->cpu, iter->ts);
3703 
3704 	if (trace_seq_has_overflowed(s))
3705 		return TRACE_TYPE_PARTIAL_LINE;
3706 
3707 	event = ftrace_find_event(entry->type);
3708 	if (event)
3709 		return event->funcs->raw(iter, 0, event);
3710 
3711 	trace_seq_printf(s, "%d ?\n", entry->type);
3712 
3713 	return trace_handle_return(s);
3714 }
3715 
3716 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3717 {
3718 	struct trace_array *tr = iter->tr;
3719 	struct trace_seq *s = &iter->seq;
3720 	unsigned char newline = '\n';
3721 	struct trace_entry *entry;
3722 	struct trace_event *event;
3723 
3724 	entry = iter->ent;
3725 
3726 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3727 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3728 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3729 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3730 		if (trace_seq_has_overflowed(s))
3731 			return TRACE_TYPE_PARTIAL_LINE;
3732 	}
3733 
3734 	event = ftrace_find_event(entry->type);
3735 	if (event) {
3736 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3737 		if (ret != TRACE_TYPE_HANDLED)
3738 			return ret;
3739 	}
3740 
3741 	SEQ_PUT_FIELD(s, newline);
3742 
3743 	return trace_handle_return(s);
3744 }
3745 
3746 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3747 {
3748 	struct trace_array *tr = iter->tr;
3749 	struct trace_seq *s = &iter->seq;
3750 	struct trace_entry *entry;
3751 	struct trace_event *event;
3752 
3753 	entry = iter->ent;
3754 
3755 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3756 		SEQ_PUT_FIELD(s, entry->pid);
3757 		SEQ_PUT_FIELD(s, iter->cpu);
3758 		SEQ_PUT_FIELD(s, iter->ts);
3759 		if (trace_seq_has_overflowed(s))
3760 			return TRACE_TYPE_PARTIAL_LINE;
3761 	}
3762 
3763 	event = ftrace_find_event(entry->type);
3764 	return event ? event->funcs->binary(iter, 0, event) :
3765 		TRACE_TYPE_HANDLED;
3766 }
3767 
3768 int trace_empty(struct trace_iterator *iter)
3769 {
3770 	struct ring_buffer_iter *buf_iter;
3771 	int cpu;
3772 
3773 	/* If we are looking at one CPU buffer, only check that one */
3774 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3775 		cpu = iter->cpu_file;
3776 		buf_iter = trace_buffer_iter(iter, cpu);
3777 		if (buf_iter) {
3778 			if (!ring_buffer_iter_empty(buf_iter))
3779 				return 0;
3780 		} else {
3781 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3782 				return 0;
3783 		}
3784 		return 1;
3785 	}
3786 
3787 	for_each_tracing_cpu(cpu) {
3788 		buf_iter = trace_buffer_iter(iter, cpu);
3789 		if (buf_iter) {
3790 			if (!ring_buffer_iter_empty(buf_iter))
3791 				return 0;
3792 		} else {
3793 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3794 				return 0;
3795 		}
3796 	}
3797 
3798 	return 1;
3799 }
3800 
3801 /*  Called with trace_event_read_lock() held. */
3802 enum print_line_t print_trace_line(struct trace_iterator *iter)
3803 {
3804 	struct trace_array *tr = iter->tr;
3805 	unsigned long trace_flags = tr->trace_flags;
3806 	enum print_line_t ret;
3807 
3808 	if (iter->lost_events) {
3809 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3810 				 iter->cpu, iter->lost_events);
3811 		if (trace_seq_has_overflowed(&iter->seq))
3812 			return TRACE_TYPE_PARTIAL_LINE;
3813 	}
3814 
3815 	if (iter->trace && iter->trace->print_line) {
3816 		ret = iter->trace->print_line(iter);
3817 		if (ret != TRACE_TYPE_UNHANDLED)
3818 			return ret;
3819 	}
3820 
3821 	if (iter->ent->type == TRACE_BPUTS &&
3822 			trace_flags & TRACE_ITER_PRINTK &&
3823 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3824 		return trace_print_bputs_msg_only(iter);
3825 
3826 	if (iter->ent->type == TRACE_BPRINT &&
3827 			trace_flags & TRACE_ITER_PRINTK &&
3828 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3829 		return trace_print_bprintk_msg_only(iter);
3830 
3831 	if (iter->ent->type == TRACE_PRINT &&
3832 			trace_flags & TRACE_ITER_PRINTK &&
3833 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3834 		return trace_print_printk_msg_only(iter);
3835 
3836 	if (trace_flags & TRACE_ITER_BIN)
3837 		return print_bin_fmt(iter);
3838 
3839 	if (trace_flags & TRACE_ITER_HEX)
3840 		return print_hex_fmt(iter);
3841 
3842 	if (trace_flags & TRACE_ITER_RAW)
3843 		return print_raw_fmt(iter);
3844 
3845 	return print_trace_fmt(iter);
3846 }
3847 
3848 void trace_latency_header(struct seq_file *m)
3849 {
3850 	struct trace_iterator *iter = m->private;
3851 	struct trace_array *tr = iter->tr;
3852 
3853 	/* print nothing if the buffers are empty */
3854 	if (trace_empty(iter))
3855 		return;
3856 
3857 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3858 		print_trace_header(m, iter);
3859 
3860 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3861 		print_lat_help_header(m);
3862 }
3863 
3864 void trace_default_header(struct seq_file *m)
3865 {
3866 	struct trace_iterator *iter = m->private;
3867 	struct trace_array *tr = iter->tr;
3868 	unsigned long trace_flags = tr->trace_flags;
3869 
3870 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3871 		return;
3872 
3873 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3874 		/* print nothing if the buffers are empty */
3875 		if (trace_empty(iter))
3876 			return;
3877 		print_trace_header(m, iter);
3878 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3879 			print_lat_help_header(m);
3880 	} else {
3881 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3882 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3883 				print_func_help_header_irq(iter->trace_buffer,
3884 							   m, trace_flags);
3885 			else
3886 				print_func_help_header(iter->trace_buffer, m,
3887 						       trace_flags);
3888 		}
3889 	}
3890 }
3891 
3892 static void test_ftrace_alive(struct seq_file *m)
3893 {
3894 	if (!ftrace_is_dead())
3895 		return;
3896 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3897 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3898 }
3899 
3900 #ifdef CONFIG_TRACER_MAX_TRACE
3901 static void show_snapshot_main_help(struct seq_file *m)
3902 {
3903 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3904 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3905 		    "#                      Takes a snapshot of the main buffer.\n"
3906 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3907 		    "#                      (Doesn't have to be '2' works with any number that\n"
3908 		    "#                       is not a '0' or '1')\n");
3909 }
3910 
3911 static void show_snapshot_percpu_help(struct seq_file *m)
3912 {
3913 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3914 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3915 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3916 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3917 #else
3918 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3919 		    "#                     Must use main snapshot file to allocate.\n");
3920 #endif
3921 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3922 		    "#                      (Doesn't have to be '2' works with any number that\n"
3923 		    "#                       is not a '0' or '1')\n");
3924 }
3925 
3926 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3927 {
3928 	if (iter->tr->allocated_snapshot)
3929 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3930 	else
3931 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3932 
3933 	seq_puts(m, "# Snapshot commands:\n");
3934 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3935 		show_snapshot_main_help(m);
3936 	else
3937 		show_snapshot_percpu_help(m);
3938 }
3939 #else
3940 /* Should never be called */
3941 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3942 #endif
3943 
3944 static int s_show(struct seq_file *m, void *v)
3945 {
3946 	struct trace_iterator *iter = v;
3947 	int ret;
3948 
3949 	if (iter->ent == NULL) {
3950 		if (iter->tr) {
3951 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3952 			seq_puts(m, "#\n");
3953 			test_ftrace_alive(m);
3954 		}
3955 		if (iter->snapshot && trace_empty(iter))
3956 			print_snapshot_help(m, iter);
3957 		else if (iter->trace && iter->trace->print_header)
3958 			iter->trace->print_header(m);
3959 		else
3960 			trace_default_header(m);
3961 
3962 	} else if (iter->leftover) {
3963 		/*
3964 		 * If we filled the seq_file buffer earlier, we
3965 		 * want to just show it now.
3966 		 */
3967 		ret = trace_print_seq(m, &iter->seq);
3968 
3969 		/* ret should this time be zero, but you never know */
3970 		iter->leftover = ret;
3971 
3972 	} else {
3973 		print_trace_line(iter);
3974 		ret = trace_print_seq(m, &iter->seq);
3975 		/*
3976 		 * If we overflow the seq_file buffer, then it will
3977 		 * ask us for this data again at start up.
3978 		 * Use that instead.
3979 		 *  ret is 0 if seq_file write succeeded.
3980 		 *        -1 otherwise.
3981 		 */
3982 		iter->leftover = ret;
3983 	}
3984 
3985 	return 0;
3986 }
3987 
3988 /*
3989  * Should be used after trace_array_get(), trace_types_lock
3990  * ensures that i_cdev was already initialized.
3991  */
3992 static inline int tracing_get_cpu(struct inode *inode)
3993 {
3994 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3995 		return (long)inode->i_cdev - 1;
3996 	return RING_BUFFER_ALL_CPUS;
3997 }
3998 
3999 static const struct seq_operations tracer_seq_ops = {
4000 	.start		= s_start,
4001 	.next		= s_next,
4002 	.stop		= s_stop,
4003 	.show		= s_show,
4004 };
4005 
4006 static struct trace_iterator *
4007 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4008 {
4009 	struct trace_array *tr = inode->i_private;
4010 	struct trace_iterator *iter;
4011 	int cpu;
4012 
4013 	if (tracing_disabled)
4014 		return ERR_PTR(-ENODEV);
4015 
4016 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4017 	if (!iter)
4018 		return ERR_PTR(-ENOMEM);
4019 
4020 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4021 				    GFP_KERNEL);
4022 	if (!iter->buffer_iter)
4023 		goto release;
4024 
4025 	/*
4026 	 * We make a copy of the current tracer to avoid concurrent
4027 	 * changes on it while we are reading.
4028 	 */
4029 	mutex_lock(&trace_types_lock);
4030 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4031 	if (!iter->trace)
4032 		goto fail;
4033 
4034 	*iter->trace = *tr->current_trace;
4035 
4036 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4037 		goto fail;
4038 
4039 	iter->tr = tr;
4040 
4041 #ifdef CONFIG_TRACER_MAX_TRACE
4042 	/* Currently only the top directory has a snapshot */
4043 	if (tr->current_trace->print_max || snapshot)
4044 		iter->trace_buffer = &tr->max_buffer;
4045 	else
4046 #endif
4047 		iter->trace_buffer = &tr->trace_buffer;
4048 	iter->snapshot = snapshot;
4049 	iter->pos = -1;
4050 	iter->cpu_file = tracing_get_cpu(inode);
4051 	mutex_init(&iter->mutex);
4052 
4053 	/* Notify the tracer early; before we stop tracing. */
4054 	if (iter->trace && iter->trace->open)
4055 		iter->trace->open(iter);
4056 
4057 	/* Annotate start of buffers if we had overruns */
4058 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4059 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4060 
4061 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4062 	if (trace_clocks[tr->clock_id].in_ns)
4063 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4064 
4065 	/* stop the trace while dumping if we are not opening "snapshot" */
4066 	if (!iter->snapshot)
4067 		tracing_stop_tr(tr);
4068 
4069 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4070 		for_each_tracing_cpu(cpu) {
4071 			iter->buffer_iter[cpu] =
4072 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
4073 		}
4074 		ring_buffer_read_prepare_sync();
4075 		for_each_tracing_cpu(cpu) {
4076 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4077 			tracing_iter_reset(iter, cpu);
4078 		}
4079 	} else {
4080 		cpu = iter->cpu_file;
4081 		iter->buffer_iter[cpu] =
4082 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
4083 		ring_buffer_read_prepare_sync();
4084 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4085 		tracing_iter_reset(iter, cpu);
4086 	}
4087 
4088 	mutex_unlock(&trace_types_lock);
4089 
4090 	return iter;
4091 
4092  fail:
4093 	mutex_unlock(&trace_types_lock);
4094 	kfree(iter->trace);
4095 	kfree(iter->buffer_iter);
4096 release:
4097 	seq_release_private(inode, file);
4098 	return ERR_PTR(-ENOMEM);
4099 }
4100 
4101 int tracing_open_generic(struct inode *inode, struct file *filp)
4102 {
4103 	if (tracing_disabled)
4104 		return -ENODEV;
4105 
4106 	filp->private_data = inode->i_private;
4107 	return 0;
4108 }
4109 
4110 bool tracing_is_disabled(void)
4111 {
4112 	return (tracing_disabled) ? true: false;
4113 }
4114 
4115 /*
4116  * Open and update trace_array ref count.
4117  * Must have the current trace_array passed to it.
4118  */
4119 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4120 {
4121 	struct trace_array *tr = inode->i_private;
4122 
4123 	if (tracing_disabled)
4124 		return -ENODEV;
4125 
4126 	if (trace_array_get(tr) < 0)
4127 		return -ENODEV;
4128 
4129 	filp->private_data = inode->i_private;
4130 
4131 	return 0;
4132 }
4133 
4134 static int tracing_release(struct inode *inode, struct file *file)
4135 {
4136 	struct trace_array *tr = inode->i_private;
4137 	struct seq_file *m = file->private_data;
4138 	struct trace_iterator *iter;
4139 	int cpu;
4140 
4141 	if (!(file->f_mode & FMODE_READ)) {
4142 		trace_array_put(tr);
4143 		return 0;
4144 	}
4145 
4146 	/* Writes do not use seq_file */
4147 	iter = m->private;
4148 	mutex_lock(&trace_types_lock);
4149 
4150 	for_each_tracing_cpu(cpu) {
4151 		if (iter->buffer_iter[cpu])
4152 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4153 	}
4154 
4155 	if (iter->trace && iter->trace->close)
4156 		iter->trace->close(iter);
4157 
4158 	if (!iter->snapshot)
4159 		/* reenable tracing if it was previously enabled */
4160 		tracing_start_tr(tr);
4161 
4162 	__trace_array_put(tr);
4163 
4164 	mutex_unlock(&trace_types_lock);
4165 
4166 	mutex_destroy(&iter->mutex);
4167 	free_cpumask_var(iter->started);
4168 	kfree(iter->trace);
4169 	kfree(iter->buffer_iter);
4170 	seq_release_private(inode, file);
4171 
4172 	return 0;
4173 }
4174 
4175 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4176 {
4177 	struct trace_array *tr = inode->i_private;
4178 
4179 	trace_array_put(tr);
4180 	return 0;
4181 }
4182 
4183 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4184 {
4185 	struct trace_array *tr = inode->i_private;
4186 
4187 	trace_array_put(tr);
4188 
4189 	return single_release(inode, file);
4190 }
4191 
4192 static int tracing_open(struct inode *inode, struct file *file)
4193 {
4194 	struct trace_array *tr = inode->i_private;
4195 	struct trace_iterator *iter;
4196 	int ret = 0;
4197 
4198 	if (trace_array_get(tr) < 0)
4199 		return -ENODEV;
4200 
4201 	/* If this file was open for write, then erase contents */
4202 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4203 		int cpu = tracing_get_cpu(inode);
4204 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4205 
4206 #ifdef CONFIG_TRACER_MAX_TRACE
4207 		if (tr->current_trace->print_max)
4208 			trace_buf = &tr->max_buffer;
4209 #endif
4210 
4211 		if (cpu == RING_BUFFER_ALL_CPUS)
4212 			tracing_reset_online_cpus(trace_buf);
4213 		else
4214 			tracing_reset(trace_buf, cpu);
4215 	}
4216 
4217 	if (file->f_mode & FMODE_READ) {
4218 		iter = __tracing_open(inode, file, false);
4219 		if (IS_ERR(iter))
4220 			ret = PTR_ERR(iter);
4221 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4222 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4223 	}
4224 
4225 	if (ret < 0)
4226 		trace_array_put(tr);
4227 
4228 	return ret;
4229 }
4230 
4231 /*
4232  * Some tracers are not suitable for instance buffers.
4233  * A tracer is always available for the global array (toplevel)
4234  * or if it explicitly states that it is.
4235  */
4236 static bool
4237 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4238 {
4239 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4240 }
4241 
4242 /* Find the next tracer that this trace array may use */
4243 static struct tracer *
4244 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4245 {
4246 	while (t && !trace_ok_for_array(t, tr))
4247 		t = t->next;
4248 
4249 	return t;
4250 }
4251 
4252 static void *
4253 t_next(struct seq_file *m, void *v, loff_t *pos)
4254 {
4255 	struct trace_array *tr = m->private;
4256 	struct tracer *t = v;
4257 
4258 	(*pos)++;
4259 
4260 	if (t)
4261 		t = get_tracer_for_array(tr, t->next);
4262 
4263 	return t;
4264 }
4265 
4266 static void *t_start(struct seq_file *m, loff_t *pos)
4267 {
4268 	struct trace_array *tr = m->private;
4269 	struct tracer *t;
4270 	loff_t l = 0;
4271 
4272 	mutex_lock(&trace_types_lock);
4273 
4274 	t = get_tracer_for_array(tr, trace_types);
4275 	for (; t && l < *pos; t = t_next(m, t, &l))
4276 			;
4277 
4278 	return t;
4279 }
4280 
4281 static void t_stop(struct seq_file *m, void *p)
4282 {
4283 	mutex_unlock(&trace_types_lock);
4284 }
4285 
4286 static int t_show(struct seq_file *m, void *v)
4287 {
4288 	struct tracer *t = v;
4289 
4290 	if (!t)
4291 		return 0;
4292 
4293 	seq_puts(m, t->name);
4294 	if (t->next)
4295 		seq_putc(m, ' ');
4296 	else
4297 		seq_putc(m, '\n');
4298 
4299 	return 0;
4300 }
4301 
4302 static const struct seq_operations show_traces_seq_ops = {
4303 	.start		= t_start,
4304 	.next		= t_next,
4305 	.stop		= t_stop,
4306 	.show		= t_show,
4307 };
4308 
4309 static int show_traces_open(struct inode *inode, struct file *file)
4310 {
4311 	struct trace_array *tr = inode->i_private;
4312 	struct seq_file *m;
4313 	int ret;
4314 
4315 	if (tracing_disabled)
4316 		return -ENODEV;
4317 
4318 	ret = seq_open(file, &show_traces_seq_ops);
4319 	if (ret)
4320 		return ret;
4321 
4322 	m = file->private_data;
4323 	m->private = tr;
4324 
4325 	return 0;
4326 }
4327 
4328 static ssize_t
4329 tracing_write_stub(struct file *filp, const char __user *ubuf,
4330 		   size_t count, loff_t *ppos)
4331 {
4332 	return count;
4333 }
4334 
4335 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4336 {
4337 	int ret;
4338 
4339 	if (file->f_mode & FMODE_READ)
4340 		ret = seq_lseek(file, offset, whence);
4341 	else
4342 		file->f_pos = ret = 0;
4343 
4344 	return ret;
4345 }
4346 
4347 static const struct file_operations tracing_fops = {
4348 	.open		= tracing_open,
4349 	.read		= seq_read,
4350 	.write		= tracing_write_stub,
4351 	.llseek		= tracing_lseek,
4352 	.release	= tracing_release,
4353 };
4354 
4355 static const struct file_operations show_traces_fops = {
4356 	.open		= show_traces_open,
4357 	.read		= seq_read,
4358 	.release	= seq_release,
4359 	.llseek		= seq_lseek,
4360 };
4361 
4362 static ssize_t
4363 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4364 		     size_t count, loff_t *ppos)
4365 {
4366 	struct trace_array *tr = file_inode(filp)->i_private;
4367 	char *mask_str;
4368 	int len;
4369 
4370 	len = snprintf(NULL, 0, "%*pb\n",
4371 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4372 	mask_str = kmalloc(len, GFP_KERNEL);
4373 	if (!mask_str)
4374 		return -ENOMEM;
4375 
4376 	len = snprintf(mask_str, len, "%*pb\n",
4377 		       cpumask_pr_args(tr->tracing_cpumask));
4378 	if (len >= count) {
4379 		count = -EINVAL;
4380 		goto out_err;
4381 	}
4382 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4383 
4384 out_err:
4385 	kfree(mask_str);
4386 
4387 	return count;
4388 }
4389 
4390 static ssize_t
4391 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4392 		      size_t count, loff_t *ppos)
4393 {
4394 	struct trace_array *tr = file_inode(filp)->i_private;
4395 	cpumask_var_t tracing_cpumask_new;
4396 	int err, cpu;
4397 
4398 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4399 		return -ENOMEM;
4400 
4401 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4402 	if (err)
4403 		goto err_unlock;
4404 
4405 	local_irq_disable();
4406 	arch_spin_lock(&tr->max_lock);
4407 	for_each_tracing_cpu(cpu) {
4408 		/*
4409 		 * Increase/decrease the disabled counter if we are
4410 		 * about to flip a bit in the cpumask:
4411 		 */
4412 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4413 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4414 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4415 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4416 		}
4417 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4418 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4419 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4420 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4421 		}
4422 	}
4423 	arch_spin_unlock(&tr->max_lock);
4424 	local_irq_enable();
4425 
4426 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4427 	free_cpumask_var(tracing_cpumask_new);
4428 
4429 	return count;
4430 
4431 err_unlock:
4432 	free_cpumask_var(tracing_cpumask_new);
4433 
4434 	return err;
4435 }
4436 
4437 static const struct file_operations tracing_cpumask_fops = {
4438 	.open		= tracing_open_generic_tr,
4439 	.read		= tracing_cpumask_read,
4440 	.write		= tracing_cpumask_write,
4441 	.release	= tracing_release_generic_tr,
4442 	.llseek		= generic_file_llseek,
4443 };
4444 
4445 static int tracing_trace_options_show(struct seq_file *m, void *v)
4446 {
4447 	struct tracer_opt *trace_opts;
4448 	struct trace_array *tr = m->private;
4449 	u32 tracer_flags;
4450 	int i;
4451 
4452 	mutex_lock(&trace_types_lock);
4453 	tracer_flags = tr->current_trace->flags->val;
4454 	trace_opts = tr->current_trace->flags->opts;
4455 
4456 	for (i = 0; trace_options[i]; i++) {
4457 		if (tr->trace_flags & (1 << i))
4458 			seq_printf(m, "%s\n", trace_options[i]);
4459 		else
4460 			seq_printf(m, "no%s\n", trace_options[i]);
4461 	}
4462 
4463 	for (i = 0; trace_opts[i].name; i++) {
4464 		if (tracer_flags & trace_opts[i].bit)
4465 			seq_printf(m, "%s\n", trace_opts[i].name);
4466 		else
4467 			seq_printf(m, "no%s\n", trace_opts[i].name);
4468 	}
4469 	mutex_unlock(&trace_types_lock);
4470 
4471 	return 0;
4472 }
4473 
4474 static int __set_tracer_option(struct trace_array *tr,
4475 			       struct tracer_flags *tracer_flags,
4476 			       struct tracer_opt *opts, int neg)
4477 {
4478 	struct tracer *trace = tracer_flags->trace;
4479 	int ret;
4480 
4481 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4482 	if (ret)
4483 		return ret;
4484 
4485 	if (neg)
4486 		tracer_flags->val &= ~opts->bit;
4487 	else
4488 		tracer_flags->val |= opts->bit;
4489 	return 0;
4490 }
4491 
4492 /* Try to assign a tracer specific option */
4493 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4494 {
4495 	struct tracer *trace = tr->current_trace;
4496 	struct tracer_flags *tracer_flags = trace->flags;
4497 	struct tracer_opt *opts = NULL;
4498 	int i;
4499 
4500 	for (i = 0; tracer_flags->opts[i].name; i++) {
4501 		opts = &tracer_flags->opts[i];
4502 
4503 		if (strcmp(cmp, opts->name) == 0)
4504 			return __set_tracer_option(tr, trace->flags, opts, neg);
4505 	}
4506 
4507 	return -EINVAL;
4508 }
4509 
4510 /* Some tracers require overwrite to stay enabled */
4511 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4512 {
4513 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4514 		return -1;
4515 
4516 	return 0;
4517 }
4518 
4519 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4520 {
4521 	/* do nothing if flag is already set */
4522 	if (!!(tr->trace_flags & mask) == !!enabled)
4523 		return 0;
4524 
4525 	/* Give the tracer a chance to approve the change */
4526 	if (tr->current_trace->flag_changed)
4527 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4528 			return -EINVAL;
4529 
4530 	if (enabled)
4531 		tr->trace_flags |= mask;
4532 	else
4533 		tr->trace_flags &= ~mask;
4534 
4535 	if (mask == TRACE_ITER_RECORD_CMD)
4536 		trace_event_enable_cmd_record(enabled);
4537 
4538 	if (mask == TRACE_ITER_RECORD_TGID) {
4539 		if (!tgid_map)
4540 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4541 					   sizeof(*tgid_map),
4542 					   GFP_KERNEL);
4543 		if (!tgid_map) {
4544 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4545 			return -ENOMEM;
4546 		}
4547 
4548 		trace_event_enable_tgid_record(enabled);
4549 	}
4550 
4551 	if (mask == TRACE_ITER_EVENT_FORK)
4552 		trace_event_follow_fork(tr, enabled);
4553 
4554 	if (mask == TRACE_ITER_FUNC_FORK)
4555 		ftrace_pid_follow_fork(tr, enabled);
4556 
4557 	if (mask == TRACE_ITER_OVERWRITE) {
4558 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4559 #ifdef CONFIG_TRACER_MAX_TRACE
4560 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4561 #endif
4562 	}
4563 
4564 	if (mask == TRACE_ITER_PRINTK) {
4565 		trace_printk_start_stop_comm(enabled);
4566 		trace_printk_control(enabled);
4567 	}
4568 
4569 	return 0;
4570 }
4571 
4572 static int trace_set_options(struct trace_array *tr, char *option)
4573 {
4574 	char *cmp;
4575 	int neg = 0;
4576 	int ret;
4577 	size_t orig_len = strlen(option);
4578 	int len;
4579 
4580 	cmp = strstrip(option);
4581 
4582 	len = str_has_prefix(cmp, "no");
4583 	if (len)
4584 		neg = 1;
4585 
4586 	cmp += len;
4587 
4588 	mutex_lock(&trace_types_lock);
4589 
4590 	ret = match_string(trace_options, -1, cmp);
4591 	/* If no option could be set, test the specific tracer options */
4592 	if (ret < 0)
4593 		ret = set_tracer_option(tr, cmp, neg);
4594 	else
4595 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4596 
4597 	mutex_unlock(&trace_types_lock);
4598 
4599 	/*
4600 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4601 	 * turn it back into a space.
4602 	 */
4603 	if (orig_len > strlen(option))
4604 		option[strlen(option)] = ' ';
4605 
4606 	return ret;
4607 }
4608 
4609 static void __init apply_trace_boot_options(void)
4610 {
4611 	char *buf = trace_boot_options_buf;
4612 	char *option;
4613 
4614 	while (true) {
4615 		option = strsep(&buf, ",");
4616 
4617 		if (!option)
4618 			break;
4619 
4620 		if (*option)
4621 			trace_set_options(&global_trace, option);
4622 
4623 		/* Put back the comma to allow this to be called again */
4624 		if (buf)
4625 			*(buf - 1) = ',';
4626 	}
4627 }
4628 
4629 static ssize_t
4630 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4631 			size_t cnt, loff_t *ppos)
4632 {
4633 	struct seq_file *m = filp->private_data;
4634 	struct trace_array *tr = m->private;
4635 	char buf[64];
4636 	int ret;
4637 
4638 	if (cnt >= sizeof(buf))
4639 		return -EINVAL;
4640 
4641 	if (copy_from_user(buf, ubuf, cnt))
4642 		return -EFAULT;
4643 
4644 	buf[cnt] = 0;
4645 
4646 	ret = trace_set_options(tr, buf);
4647 	if (ret < 0)
4648 		return ret;
4649 
4650 	*ppos += cnt;
4651 
4652 	return cnt;
4653 }
4654 
4655 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4656 {
4657 	struct trace_array *tr = inode->i_private;
4658 	int ret;
4659 
4660 	if (tracing_disabled)
4661 		return -ENODEV;
4662 
4663 	if (trace_array_get(tr) < 0)
4664 		return -ENODEV;
4665 
4666 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4667 	if (ret < 0)
4668 		trace_array_put(tr);
4669 
4670 	return ret;
4671 }
4672 
4673 static const struct file_operations tracing_iter_fops = {
4674 	.open		= tracing_trace_options_open,
4675 	.read		= seq_read,
4676 	.llseek		= seq_lseek,
4677 	.release	= tracing_single_release_tr,
4678 	.write		= tracing_trace_options_write,
4679 };
4680 
4681 static const char readme_msg[] =
4682 	"tracing mini-HOWTO:\n\n"
4683 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4684 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4685 	" Important files:\n"
4686 	"  trace\t\t\t- The static contents of the buffer\n"
4687 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4688 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4689 	"  current_tracer\t- function and latency tracers\n"
4690 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4691 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4692 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4693 	"  trace_clock\t\t-change the clock used to order events\n"
4694 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4695 	"      global:   Synced across CPUs but slows tracing down.\n"
4696 	"     counter:   Not a clock, but just an increment\n"
4697 	"      uptime:   Jiffy counter from time of boot\n"
4698 	"        perf:   Same clock that perf events use\n"
4699 #ifdef CONFIG_X86_64
4700 	"     x86-tsc:   TSC cycle counter\n"
4701 #endif
4702 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4703 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4704 	"    absolute:   Absolute (standalone) timestamp\n"
4705 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4706 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4707 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4708 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4709 	"\t\t\t  Remove sub-buffer with rmdir\n"
4710 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4711 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4712 	"\t\t\t  option name\n"
4713 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4714 #ifdef CONFIG_DYNAMIC_FTRACE
4715 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4716 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4717 	"\t\t\t  functions\n"
4718 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4719 	"\t     modules: Can select a group via module\n"
4720 	"\t      Format: :mod:<module-name>\n"
4721 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4722 	"\t    triggers: a command to perform when function is hit\n"
4723 	"\t      Format: <function>:<trigger>[:count]\n"
4724 	"\t     trigger: traceon, traceoff\n"
4725 	"\t\t      enable_event:<system>:<event>\n"
4726 	"\t\t      disable_event:<system>:<event>\n"
4727 #ifdef CONFIG_STACKTRACE
4728 	"\t\t      stacktrace\n"
4729 #endif
4730 #ifdef CONFIG_TRACER_SNAPSHOT
4731 	"\t\t      snapshot\n"
4732 #endif
4733 	"\t\t      dump\n"
4734 	"\t\t      cpudump\n"
4735 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4736 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4737 	"\t     The first one will disable tracing every time do_fault is hit\n"
4738 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4739 	"\t       The first time do trap is hit and it disables tracing, the\n"
4740 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4741 	"\t       the counter will not decrement. It only decrements when the\n"
4742 	"\t       trigger did work\n"
4743 	"\t     To remove trigger without count:\n"
4744 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4745 	"\t     To remove trigger with a count:\n"
4746 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4747 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4748 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4749 	"\t    modules: Can select a group via module command :mod:\n"
4750 	"\t    Does not accept triggers\n"
4751 #endif /* CONFIG_DYNAMIC_FTRACE */
4752 #ifdef CONFIG_FUNCTION_TRACER
4753 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4754 	"\t\t    (function)\n"
4755 #endif
4756 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4757 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4758 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4759 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4760 #endif
4761 #ifdef CONFIG_TRACER_SNAPSHOT
4762 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4763 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4764 	"\t\t\t  information\n"
4765 #endif
4766 #ifdef CONFIG_STACK_TRACER
4767 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4768 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4769 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4770 	"\t\t\t  new trace)\n"
4771 #ifdef CONFIG_DYNAMIC_FTRACE
4772 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4773 	"\t\t\t  traces\n"
4774 #endif
4775 #endif /* CONFIG_STACK_TRACER */
4776 #ifdef CONFIG_DYNAMIC_EVENTS
4777 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4778 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4779 #endif
4780 #ifdef CONFIG_KPROBE_EVENTS
4781 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4782 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4783 #endif
4784 #ifdef CONFIG_UPROBE_EVENTS
4785 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4786 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4787 #endif
4788 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4789 	"\t  accepts: event-definitions (one definition per line)\n"
4790 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4791 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4792 #ifdef CONFIG_HIST_TRIGGERS
4793 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4794 #endif
4795 	"\t           -:[<group>/]<event>\n"
4796 #ifdef CONFIG_KPROBE_EVENTS
4797 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4798   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4799 #endif
4800 #ifdef CONFIG_UPROBE_EVENTS
4801   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4802 #endif
4803 	"\t     args: <name>=fetcharg[:type]\n"
4804 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4805 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4806 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4807 #else
4808 	"\t           $stack<index>, $stack, $retval, $comm\n"
4809 #endif
4810 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4811 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4812 	"\t           <type>\\[<array-size>\\]\n"
4813 #ifdef CONFIG_HIST_TRIGGERS
4814 	"\t    field: <stype> <name>;\n"
4815 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4816 	"\t           [unsigned] char/int/long\n"
4817 #endif
4818 #endif
4819 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4820 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4821 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4822 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4823 	"\t\t\t  events\n"
4824 	"      filter\t\t- If set, only events passing filter are traced\n"
4825 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4826 	"\t\t\t  <event>:\n"
4827 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4828 	"      filter\t\t- If set, only events passing filter are traced\n"
4829 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4830 	"\t    Format: <trigger>[:count][if <filter>]\n"
4831 	"\t   trigger: traceon, traceoff\n"
4832 	"\t            enable_event:<system>:<event>\n"
4833 	"\t            disable_event:<system>:<event>\n"
4834 #ifdef CONFIG_HIST_TRIGGERS
4835 	"\t            enable_hist:<system>:<event>\n"
4836 	"\t            disable_hist:<system>:<event>\n"
4837 #endif
4838 #ifdef CONFIG_STACKTRACE
4839 	"\t\t    stacktrace\n"
4840 #endif
4841 #ifdef CONFIG_TRACER_SNAPSHOT
4842 	"\t\t    snapshot\n"
4843 #endif
4844 #ifdef CONFIG_HIST_TRIGGERS
4845 	"\t\t    hist (see below)\n"
4846 #endif
4847 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4848 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4849 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4850 	"\t                  events/block/block_unplug/trigger\n"
4851 	"\t   The first disables tracing every time block_unplug is hit.\n"
4852 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4853 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4854 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4855 	"\t   Like function triggers, the counter is only decremented if it\n"
4856 	"\t    enabled or disabled tracing.\n"
4857 	"\t   To remove a trigger without a count:\n"
4858 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4859 	"\t   To remove a trigger with a count:\n"
4860 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4861 	"\t   Filters can be ignored when removing a trigger.\n"
4862 #ifdef CONFIG_HIST_TRIGGERS
4863 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4864 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4865 	"\t            [:values=<field1[,field2,...]>]\n"
4866 	"\t            [:sort=<field1[,field2,...]>]\n"
4867 	"\t            [:size=#entries]\n"
4868 	"\t            [:pause][:continue][:clear]\n"
4869 	"\t            [:name=histname1]\n"
4870 	"\t            [:<handler>.<action>]\n"
4871 	"\t            [if <filter>]\n\n"
4872 	"\t    When a matching event is hit, an entry is added to a hash\n"
4873 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4874 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4875 	"\t    correspond to fields in the event's format description.  Keys\n"
4876 	"\t    can be any field, or the special string 'stacktrace'.\n"
4877 	"\t    Compound keys consisting of up to two fields can be specified\n"
4878 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4879 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4880 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4881 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4882 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4883 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4884 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4885 	"\t    its histogram data will be shared with other triggers of the\n"
4886 	"\t    same name, and trigger hits will update this common data.\n\n"
4887 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4888 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4889 	"\t    triggers attached to an event, there will be a table for each\n"
4890 	"\t    trigger in the output.  The table displayed for a named\n"
4891 	"\t    trigger will be the same as any other instance having the\n"
4892 	"\t    same name.  The default format used to display a given field\n"
4893 	"\t    can be modified by appending any of the following modifiers\n"
4894 	"\t    to the field name, as applicable:\n\n"
4895 	"\t            .hex        display a number as a hex value\n"
4896 	"\t            .sym        display an address as a symbol\n"
4897 	"\t            .sym-offset display an address as a symbol and offset\n"
4898 	"\t            .execname   display a common_pid as a program name\n"
4899 	"\t            .syscall    display a syscall id as a syscall name\n"
4900 	"\t            .log2       display log2 value rather than raw number\n"
4901 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4902 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4903 	"\t    trigger or to start a hist trigger but not log any events\n"
4904 	"\t    until told to do so.  'continue' can be used to start or\n"
4905 	"\t    restart a paused hist trigger.\n\n"
4906 	"\t    The 'clear' parameter will clear the contents of a running\n"
4907 	"\t    hist trigger and leave its current paused/active state\n"
4908 	"\t    unchanged.\n\n"
4909 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4910 	"\t    have one event conditionally start and stop another event's\n"
4911 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4912 	"\t    the enable_event and disable_event triggers.\n\n"
4913 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4914 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4915 	"\t        <handler>.<action>\n\n"
4916 	"\t    The available handlers are:\n\n"
4917 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4918 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4919 	"\t        onchange(var)            - invoke action if var changes\n\n"
4920 	"\t    The available actions are:\n\n"
4921 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4922 	"\t        save(field,...)                      - save current event fields\n"
4923 #ifdef CONFIG_TRACER_SNAPSHOT
4924 	"\t        snapshot()                           - snapshot the trace buffer\n"
4925 #endif
4926 #endif
4927 ;
4928 
4929 static ssize_t
4930 tracing_readme_read(struct file *filp, char __user *ubuf,
4931 		       size_t cnt, loff_t *ppos)
4932 {
4933 	return simple_read_from_buffer(ubuf, cnt, ppos,
4934 					readme_msg, strlen(readme_msg));
4935 }
4936 
4937 static const struct file_operations tracing_readme_fops = {
4938 	.open		= tracing_open_generic,
4939 	.read		= tracing_readme_read,
4940 	.llseek		= generic_file_llseek,
4941 };
4942 
4943 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4944 {
4945 	int *ptr = v;
4946 
4947 	if (*pos || m->count)
4948 		ptr++;
4949 
4950 	(*pos)++;
4951 
4952 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4953 		if (trace_find_tgid(*ptr))
4954 			return ptr;
4955 	}
4956 
4957 	return NULL;
4958 }
4959 
4960 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4961 {
4962 	void *v;
4963 	loff_t l = 0;
4964 
4965 	if (!tgid_map)
4966 		return NULL;
4967 
4968 	v = &tgid_map[0];
4969 	while (l <= *pos) {
4970 		v = saved_tgids_next(m, v, &l);
4971 		if (!v)
4972 			return NULL;
4973 	}
4974 
4975 	return v;
4976 }
4977 
4978 static void saved_tgids_stop(struct seq_file *m, void *v)
4979 {
4980 }
4981 
4982 static int saved_tgids_show(struct seq_file *m, void *v)
4983 {
4984 	int pid = (int *)v - tgid_map;
4985 
4986 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4987 	return 0;
4988 }
4989 
4990 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4991 	.start		= saved_tgids_start,
4992 	.stop		= saved_tgids_stop,
4993 	.next		= saved_tgids_next,
4994 	.show		= saved_tgids_show,
4995 };
4996 
4997 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4998 {
4999 	if (tracing_disabled)
5000 		return -ENODEV;
5001 
5002 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5003 }
5004 
5005 
5006 static const struct file_operations tracing_saved_tgids_fops = {
5007 	.open		= tracing_saved_tgids_open,
5008 	.read		= seq_read,
5009 	.llseek		= seq_lseek,
5010 	.release	= seq_release,
5011 };
5012 
5013 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5014 {
5015 	unsigned int *ptr = v;
5016 
5017 	if (*pos || m->count)
5018 		ptr++;
5019 
5020 	(*pos)++;
5021 
5022 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5023 	     ptr++) {
5024 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5025 			continue;
5026 
5027 		return ptr;
5028 	}
5029 
5030 	return NULL;
5031 }
5032 
5033 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5034 {
5035 	void *v;
5036 	loff_t l = 0;
5037 
5038 	preempt_disable();
5039 	arch_spin_lock(&trace_cmdline_lock);
5040 
5041 	v = &savedcmd->map_cmdline_to_pid[0];
5042 	while (l <= *pos) {
5043 		v = saved_cmdlines_next(m, v, &l);
5044 		if (!v)
5045 			return NULL;
5046 	}
5047 
5048 	return v;
5049 }
5050 
5051 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5052 {
5053 	arch_spin_unlock(&trace_cmdline_lock);
5054 	preempt_enable();
5055 }
5056 
5057 static int saved_cmdlines_show(struct seq_file *m, void *v)
5058 {
5059 	char buf[TASK_COMM_LEN];
5060 	unsigned int *pid = v;
5061 
5062 	__trace_find_cmdline(*pid, buf);
5063 	seq_printf(m, "%d %s\n", *pid, buf);
5064 	return 0;
5065 }
5066 
5067 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5068 	.start		= saved_cmdlines_start,
5069 	.next		= saved_cmdlines_next,
5070 	.stop		= saved_cmdlines_stop,
5071 	.show		= saved_cmdlines_show,
5072 };
5073 
5074 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5075 {
5076 	if (tracing_disabled)
5077 		return -ENODEV;
5078 
5079 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5080 }
5081 
5082 static const struct file_operations tracing_saved_cmdlines_fops = {
5083 	.open		= tracing_saved_cmdlines_open,
5084 	.read		= seq_read,
5085 	.llseek		= seq_lseek,
5086 	.release	= seq_release,
5087 };
5088 
5089 static ssize_t
5090 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5091 				 size_t cnt, loff_t *ppos)
5092 {
5093 	char buf[64];
5094 	int r;
5095 
5096 	arch_spin_lock(&trace_cmdline_lock);
5097 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5098 	arch_spin_unlock(&trace_cmdline_lock);
5099 
5100 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5101 }
5102 
5103 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5104 {
5105 	kfree(s->saved_cmdlines);
5106 	kfree(s->map_cmdline_to_pid);
5107 	kfree(s);
5108 }
5109 
5110 static int tracing_resize_saved_cmdlines(unsigned int val)
5111 {
5112 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5113 
5114 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5115 	if (!s)
5116 		return -ENOMEM;
5117 
5118 	if (allocate_cmdlines_buffer(val, s) < 0) {
5119 		kfree(s);
5120 		return -ENOMEM;
5121 	}
5122 
5123 	arch_spin_lock(&trace_cmdline_lock);
5124 	savedcmd_temp = savedcmd;
5125 	savedcmd = s;
5126 	arch_spin_unlock(&trace_cmdline_lock);
5127 	free_saved_cmdlines_buffer(savedcmd_temp);
5128 
5129 	return 0;
5130 }
5131 
5132 static ssize_t
5133 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5134 				  size_t cnt, loff_t *ppos)
5135 {
5136 	unsigned long val;
5137 	int ret;
5138 
5139 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5140 	if (ret)
5141 		return ret;
5142 
5143 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5144 	if (!val || val > PID_MAX_DEFAULT)
5145 		return -EINVAL;
5146 
5147 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5148 	if (ret < 0)
5149 		return ret;
5150 
5151 	*ppos += cnt;
5152 
5153 	return cnt;
5154 }
5155 
5156 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5157 	.open		= tracing_open_generic,
5158 	.read		= tracing_saved_cmdlines_size_read,
5159 	.write		= tracing_saved_cmdlines_size_write,
5160 };
5161 
5162 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5163 static union trace_eval_map_item *
5164 update_eval_map(union trace_eval_map_item *ptr)
5165 {
5166 	if (!ptr->map.eval_string) {
5167 		if (ptr->tail.next) {
5168 			ptr = ptr->tail.next;
5169 			/* Set ptr to the next real item (skip head) */
5170 			ptr++;
5171 		} else
5172 			return NULL;
5173 	}
5174 	return ptr;
5175 }
5176 
5177 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5178 {
5179 	union trace_eval_map_item *ptr = v;
5180 
5181 	/*
5182 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5183 	 * This really should never happen.
5184 	 */
5185 	ptr = update_eval_map(ptr);
5186 	if (WARN_ON_ONCE(!ptr))
5187 		return NULL;
5188 
5189 	ptr++;
5190 
5191 	(*pos)++;
5192 
5193 	ptr = update_eval_map(ptr);
5194 
5195 	return ptr;
5196 }
5197 
5198 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5199 {
5200 	union trace_eval_map_item *v;
5201 	loff_t l = 0;
5202 
5203 	mutex_lock(&trace_eval_mutex);
5204 
5205 	v = trace_eval_maps;
5206 	if (v)
5207 		v++;
5208 
5209 	while (v && l < *pos) {
5210 		v = eval_map_next(m, v, &l);
5211 	}
5212 
5213 	return v;
5214 }
5215 
5216 static void eval_map_stop(struct seq_file *m, void *v)
5217 {
5218 	mutex_unlock(&trace_eval_mutex);
5219 }
5220 
5221 static int eval_map_show(struct seq_file *m, void *v)
5222 {
5223 	union trace_eval_map_item *ptr = v;
5224 
5225 	seq_printf(m, "%s %ld (%s)\n",
5226 		   ptr->map.eval_string, ptr->map.eval_value,
5227 		   ptr->map.system);
5228 
5229 	return 0;
5230 }
5231 
5232 static const struct seq_operations tracing_eval_map_seq_ops = {
5233 	.start		= eval_map_start,
5234 	.next		= eval_map_next,
5235 	.stop		= eval_map_stop,
5236 	.show		= eval_map_show,
5237 };
5238 
5239 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5240 {
5241 	if (tracing_disabled)
5242 		return -ENODEV;
5243 
5244 	return seq_open(filp, &tracing_eval_map_seq_ops);
5245 }
5246 
5247 static const struct file_operations tracing_eval_map_fops = {
5248 	.open		= tracing_eval_map_open,
5249 	.read		= seq_read,
5250 	.llseek		= seq_lseek,
5251 	.release	= seq_release,
5252 };
5253 
5254 static inline union trace_eval_map_item *
5255 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5256 {
5257 	/* Return tail of array given the head */
5258 	return ptr + ptr->head.length + 1;
5259 }
5260 
5261 static void
5262 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5263 			   int len)
5264 {
5265 	struct trace_eval_map **stop;
5266 	struct trace_eval_map **map;
5267 	union trace_eval_map_item *map_array;
5268 	union trace_eval_map_item *ptr;
5269 
5270 	stop = start + len;
5271 
5272 	/*
5273 	 * The trace_eval_maps contains the map plus a head and tail item,
5274 	 * where the head holds the module and length of array, and the
5275 	 * tail holds a pointer to the next list.
5276 	 */
5277 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5278 	if (!map_array) {
5279 		pr_warn("Unable to allocate trace eval mapping\n");
5280 		return;
5281 	}
5282 
5283 	mutex_lock(&trace_eval_mutex);
5284 
5285 	if (!trace_eval_maps)
5286 		trace_eval_maps = map_array;
5287 	else {
5288 		ptr = trace_eval_maps;
5289 		for (;;) {
5290 			ptr = trace_eval_jmp_to_tail(ptr);
5291 			if (!ptr->tail.next)
5292 				break;
5293 			ptr = ptr->tail.next;
5294 
5295 		}
5296 		ptr->tail.next = map_array;
5297 	}
5298 	map_array->head.mod = mod;
5299 	map_array->head.length = len;
5300 	map_array++;
5301 
5302 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5303 		map_array->map = **map;
5304 		map_array++;
5305 	}
5306 	memset(map_array, 0, sizeof(*map_array));
5307 
5308 	mutex_unlock(&trace_eval_mutex);
5309 }
5310 
5311 static void trace_create_eval_file(struct dentry *d_tracer)
5312 {
5313 	trace_create_file("eval_map", 0444, d_tracer,
5314 			  NULL, &tracing_eval_map_fops);
5315 }
5316 
5317 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5318 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5319 static inline void trace_insert_eval_map_file(struct module *mod,
5320 			      struct trace_eval_map **start, int len) { }
5321 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5322 
5323 static void trace_insert_eval_map(struct module *mod,
5324 				  struct trace_eval_map **start, int len)
5325 {
5326 	struct trace_eval_map **map;
5327 
5328 	if (len <= 0)
5329 		return;
5330 
5331 	map = start;
5332 
5333 	trace_event_eval_update(map, len);
5334 
5335 	trace_insert_eval_map_file(mod, start, len);
5336 }
5337 
5338 static ssize_t
5339 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5340 		       size_t cnt, loff_t *ppos)
5341 {
5342 	struct trace_array *tr = filp->private_data;
5343 	char buf[MAX_TRACER_SIZE+2];
5344 	int r;
5345 
5346 	mutex_lock(&trace_types_lock);
5347 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5348 	mutex_unlock(&trace_types_lock);
5349 
5350 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5351 }
5352 
5353 int tracer_init(struct tracer *t, struct trace_array *tr)
5354 {
5355 	tracing_reset_online_cpus(&tr->trace_buffer);
5356 	return t->init(tr);
5357 }
5358 
5359 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5360 {
5361 	int cpu;
5362 
5363 	for_each_tracing_cpu(cpu)
5364 		per_cpu_ptr(buf->data, cpu)->entries = val;
5365 }
5366 
5367 #ifdef CONFIG_TRACER_MAX_TRACE
5368 /* resize @tr's buffer to the size of @size_tr's entries */
5369 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5370 					struct trace_buffer *size_buf, int cpu_id)
5371 {
5372 	int cpu, ret = 0;
5373 
5374 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5375 		for_each_tracing_cpu(cpu) {
5376 			ret = ring_buffer_resize(trace_buf->buffer,
5377 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5378 			if (ret < 0)
5379 				break;
5380 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5381 				per_cpu_ptr(size_buf->data, cpu)->entries;
5382 		}
5383 	} else {
5384 		ret = ring_buffer_resize(trace_buf->buffer,
5385 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5386 		if (ret == 0)
5387 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5388 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5389 	}
5390 
5391 	return ret;
5392 }
5393 #endif /* CONFIG_TRACER_MAX_TRACE */
5394 
5395 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5396 					unsigned long size, int cpu)
5397 {
5398 	int ret;
5399 
5400 	/*
5401 	 * If kernel or user changes the size of the ring buffer
5402 	 * we use the size that was given, and we can forget about
5403 	 * expanding it later.
5404 	 */
5405 	ring_buffer_expanded = true;
5406 
5407 	/* May be called before buffers are initialized */
5408 	if (!tr->trace_buffer.buffer)
5409 		return 0;
5410 
5411 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5412 	if (ret < 0)
5413 		return ret;
5414 
5415 #ifdef CONFIG_TRACER_MAX_TRACE
5416 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5417 	    !tr->current_trace->use_max_tr)
5418 		goto out;
5419 
5420 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5421 	if (ret < 0) {
5422 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5423 						     &tr->trace_buffer, cpu);
5424 		if (r < 0) {
5425 			/*
5426 			 * AARGH! We are left with different
5427 			 * size max buffer!!!!
5428 			 * The max buffer is our "snapshot" buffer.
5429 			 * When a tracer needs a snapshot (one of the
5430 			 * latency tracers), it swaps the max buffer
5431 			 * with the saved snap shot. We succeeded to
5432 			 * update the size of the main buffer, but failed to
5433 			 * update the size of the max buffer. But when we tried
5434 			 * to reset the main buffer to the original size, we
5435 			 * failed there too. This is very unlikely to
5436 			 * happen, but if it does, warn and kill all
5437 			 * tracing.
5438 			 */
5439 			WARN_ON(1);
5440 			tracing_disabled = 1;
5441 		}
5442 		return ret;
5443 	}
5444 
5445 	if (cpu == RING_BUFFER_ALL_CPUS)
5446 		set_buffer_entries(&tr->max_buffer, size);
5447 	else
5448 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5449 
5450  out:
5451 #endif /* CONFIG_TRACER_MAX_TRACE */
5452 
5453 	if (cpu == RING_BUFFER_ALL_CPUS)
5454 		set_buffer_entries(&tr->trace_buffer, size);
5455 	else
5456 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5457 
5458 	return ret;
5459 }
5460 
5461 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5462 					  unsigned long size, int cpu_id)
5463 {
5464 	int ret = size;
5465 
5466 	mutex_lock(&trace_types_lock);
5467 
5468 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5469 		/* make sure, this cpu is enabled in the mask */
5470 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5471 			ret = -EINVAL;
5472 			goto out;
5473 		}
5474 	}
5475 
5476 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5477 	if (ret < 0)
5478 		ret = -ENOMEM;
5479 
5480 out:
5481 	mutex_unlock(&trace_types_lock);
5482 
5483 	return ret;
5484 }
5485 
5486 
5487 /**
5488  * tracing_update_buffers - used by tracing facility to expand ring buffers
5489  *
5490  * To save on memory when the tracing is never used on a system with it
5491  * configured in. The ring buffers are set to a minimum size. But once
5492  * a user starts to use the tracing facility, then they need to grow
5493  * to their default size.
5494  *
5495  * This function is to be called when a tracer is about to be used.
5496  */
5497 int tracing_update_buffers(void)
5498 {
5499 	int ret = 0;
5500 
5501 	mutex_lock(&trace_types_lock);
5502 	if (!ring_buffer_expanded)
5503 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5504 						RING_BUFFER_ALL_CPUS);
5505 	mutex_unlock(&trace_types_lock);
5506 
5507 	return ret;
5508 }
5509 
5510 struct trace_option_dentry;
5511 
5512 static void
5513 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5514 
5515 /*
5516  * Used to clear out the tracer before deletion of an instance.
5517  * Must have trace_types_lock held.
5518  */
5519 static void tracing_set_nop(struct trace_array *tr)
5520 {
5521 	if (tr->current_trace == &nop_trace)
5522 		return;
5523 
5524 	tr->current_trace->enabled--;
5525 
5526 	if (tr->current_trace->reset)
5527 		tr->current_trace->reset(tr);
5528 
5529 	tr->current_trace = &nop_trace;
5530 }
5531 
5532 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5533 {
5534 	/* Only enable if the directory has been created already. */
5535 	if (!tr->dir)
5536 		return;
5537 
5538 	create_trace_option_files(tr, t);
5539 }
5540 
5541 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5542 {
5543 	struct tracer *t;
5544 #ifdef CONFIG_TRACER_MAX_TRACE
5545 	bool had_max_tr;
5546 #endif
5547 	int ret = 0;
5548 
5549 	mutex_lock(&trace_types_lock);
5550 
5551 	if (!ring_buffer_expanded) {
5552 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5553 						RING_BUFFER_ALL_CPUS);
5554 		if (ret < 0)
5555 			goto out;
5556 		ret = 0;
5557 	}
5558 
5559 	for (t = trace_types; t; t = t->next) {
5560 		if (strcmp(t->name, buf) == 0)
5561 			break;
5562 	}
5563 	if (!t) {
5564 		ret = -EINVAL;
5565 		goto out;
5566 	}
5567 	if (t == tr->current_trace)
5568 		goto out;
5569 
5570 #ifdef CONFIG_TRACER_SNAPSHOT
5571 	if (t->use_max_tr) {
5572 		arch_spin_lock(&tr->max_lock);
5573 		if (tr->cond_snapshot)
5574 			ret = -EBUSY;
5575 		arch_spin_unlock(&tr->max_lock);
5576 		if (ret)
5577 			goto out;
5578 	}
5579 #endif
5580 	/* Some tracers won't work on kernel command line */
5581 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5582 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5583 			t->name);
5584 		goto out;
5585 	}
5586 
5587 	/* Some tracers are only allowed for the top level buffer */
5588 	if (!trace_ok_for_array(t, tr)) {
5589 		ret = -EINVAL;
5590 		goto out;
5591 	}
5592 
5593 	/* If trace pipe files are being read, we can't change the tracer */
5594 	if (tr->current_trace->ref) {
5595 		ret = -EBUSY;
5596 		goto out;
5597 	}
5598 
5599 	trace_branch_disable();
5600 
5601 	tr->current_trace->enabled--;
5602 
5603 	if (tr->current_trace->reset)
5604 		tr->current_trace->reset(tr);
5605 
5606 	/* Current trace needs to be nop_trace before synchronize_rcu */
5607 	tr->current_trace = &nop_trace;
5608 
5609 #ifdef CONFIG_TRACER_MAX_TRACE
5610 	had_max_tr = tr->allocated_snapshot;
5611 
5612 	if (had_max_tr && !t->use_max_tr) {
5613 		/*
5614 		 * We need to make sure that the update_max_tr sees that
5615 		 * current_trace changed to nop_trace to keep it from
5616 		 * swapping the buffers after we resize it.
5617 		 * The update_max_tr is called from interrupts disabled
5618 		 * so a synchronized_sched() is sufficient.
5619 		 */
5620 		synchronize_rcu();
5621 		free_snapshot(tr);
5622 	}
5623 #endif
5624 
5625 #ifdef CONFIG_TRACER_MAX_TRACE
5626 	if (t->use_max_tr && !had_max_tr) {
5627 		ret = tracing_alloc_snapshot_instance(tr);
5628 		if (ret < 0)
5629 			goto out;
5630 	}
5631 #endif
5632 
5633 	if (t->init) {
5634 		ret = tracer_init(t, tr);
5635 		if (ret)
5636 			goto out;
5637 	}
5638 
5639 	tr->current_trace = t;
5640 	tr->current_trace->enabled++;
5641 	trace_branch_enable(tr);
5642  out:
5643 	mutex_unlock(&trace_types_lock);
5644 
5645 	return ret;
5646 }
5647 
5648 static ssize_t
5649 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5650 			size_t cnt, loff_t *ppos)
5651 {
5652 	struct trace_array *tr = filp->private_data;
5653 	char buf[MAX_TRACER_SIZE+1];
5654 	int i;
5655 	size_t ret;
5656 	int err;
5657 
5658 	ret = cnt;
5659 
5660 	if (cnt > MAX_TRACER_SIZE)
5661 		cnt = MAX_TRACER_SIZE;
5662 
5663 	if (copy_from_user(buf, ubuf, cnt))
5664 		return -EFAULT;
5665 
5666 	buf[cnt] = 0;
5667 
5668 	/* strip ending whitespace. */
5669 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5670 		buf[i] = 0;
5671 
5672 	err = tracing_set_tracer(tr, buf);
5673 	if (err)
5674 		return err;
5675 
5676 	*ppos += ret;
5677 
5678 	return ret;
5679 }
5680 
5681 static ssize_t
5682 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5683 		   size_t cnt, loff_t *ppos)
5684 {
5685 	char buf[64];
5686 	int r;
5687 
5688 	r = snprintf(buf, sizeof(buf), "%ld\n",
5689 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5690 	if (r > sizeof(buf))
5691 		r = sizeof(buf);
5692 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5693 }
5694 
5695 static ssize_t
5696 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5697 		    size_t cnt, loff_t *ppos)
5698 {
5699 	unsigned long val;
5700 	int ret;
5701 
5702 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5703 	if (ret)
5704 		return ret;
5705 
5706 	*ptr = val * 1000;
5707 
5708 	return cnt;
5709 }
5710 
5711 static ssize_t
5712 tracing_thresh_read(struct file *filp, char __user *ubuf,
5713 		    size_t cnt, loff_t *ppos)
5714 {
5715 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5716 }
5717 
5718 static ssize_t
5719 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5720 		     size_t cnt, loff_t *ppos)
5721 {
5722 	struct trace_array *tr = filp->private_data;
5723 	int ret;
5724 
5725 	mutex_lock(&trace_types_lock);
5726 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5727 	if (ret < 0)
5728 		goto out;
5729 
5730 	if (tr->current_trace->update_thresh) {
5731 		ret = tr->current_trace->update_thresh(tr);
5732 		if (ret < 0)
5733 			goto out;
5734 	}
5735 
5736 	ret = cnt;
5737 out:
5738 	mutex_unlock(&trace_types_lock);
5739 
5740 	return ret;
5741 }
5742 
5743 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5744 
5745 static ssize_t
5746 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5747 		     size_t cnt, loff_t *ppos)
5748 {
5749 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5750 }
5751 
5752 static ssize_t
5753 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5754 		      size_t cnt, loff_t *ppos)
5755 {
5756 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5757 }
5758 
5759 #endif
5760 
5761 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5762 {
5763 	struct trace_array *tr = inode->i_private;
5764 	struct trace_iterator *iter;
5765 	int ret = 0;
5766 
5767 	if (tracing_disabled)
5768 		return -ENODEV;
5769 
5770 	if (trace_array_get(tr) < 0)
5771 		return -ENODEV;
5772 
5773 	mutex_lock(&trace_types_lock);
5774 
5775 	/* create a buffer to store the information to pass to userspace */
5776 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5777 	if (!iter) {
5778 		ret = -ENOMEM;
5779 		__trace_array_put(tr);
5780 		goto out;
5781 	}
5782 
5783 	trace_seq_init(&iter->seq);
5784 	iter->trace = tr->current_trace;
5785 
5786 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5787 		ret = -ENOMEM;
5788 		goto fail;
5789 	}
5790 
5791 	/* trace pipe does not show start of buffer */
5792 	cpumask_setall(iter->started);
5793 
5794 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5795 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5796 
5797 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5798 	if (trace_clocks[tr->clock_id].in_ns)
5799 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5800 
5801 	iter->tr = tr;
5802 	iter->trace_buffer = &tr->trace_buffer;
5803 	iter->cpu_file = tracing_get_cpu(inode);
5804 	mutex_init(&iter->mutex);
5805 	filp->private_data = iter;
5806 
5807 	if (iter->trace->pipe_open)
5808 		iter->trace->pipe_open(iter);
5809 
5810 	nonseekable_open(inode, filp);
5811 
5812 	tr->current_trace->ref++;
5813 out:
5814 	mutex_unlock(&trace_types_lock);
5815 	return ret;
5816 
5817 fail:
5818 	kfree(iter);
5819 	__trace_array_put(tr);
5820 	mutex_unlock(&trace_types_lock);
5821 	return ret;
5822 }
5823 
5824 static int tracing_release_pipe(struct inode *inode, struct file *file)
5825 {
5826 	struct trace_iterator *iter = file->private_data;
5827 	struct trace_array *tr = inode->i_private;
5828 
5829 	mutex_lock(&trace_types_lock);
5830 
5831 	tr->current_trace->ref--;
5832 
5833 	if (iter->trace->pipe_close)
5834 		iter->trace->pipe_close(iter);
5835 
5836 	mutex_unlock(&trace_types_lock);
5837 
5838 	free_cpumask_var(iter->started);
5839 	mutex_destroy(&iter->mutex);
5840 	kfree(iter);
5841 
5842 	trace_array_put(tr);
5843 
5844 	return 0;
5845 }
5846 
5847 static __poll_t
5848 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5849 {
5850 	struct trace_array *tr = iter->tr;
5851 
5852 	/* Iterators are static, they should be filled or empty */
5853 	if (trace_buffer_iter(iter, iter->cpu_file))
5854 		return EPOLLIN | EPOLLRDNORM;
5855 
5856 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5857 		/*
5858 		 * Always select as readable when in blocking mode
5859 		 */
5860 		return EPOLLIN | EPOLLRDNORM;
5861 	else
5862 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5863 					     filp, poll_table);
5864 }
5865 
5866 static __poll_t
5867 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5868 {
5869 	struct trace_iterator *iter = filp->private_data;
5870 
5871 	return trace_poll(iter, filp, poll_table);
5872 }
5873 
5874 /* Must be called with iter->mutex held. */
5875 static int tracing_wait_pipe(struct file *filp)
5876 {
5877 	struct trace_iterator *iter = filp->private_data;
5878 	int ret;
5879 
5880 	while (trace_empty(iter)) {
5881 
5882 		if ((filp->f_flags & O_NONBLOCK)) {
5883 			return -EAGAIN;
5884 		}
5885 
5886 		/*
5887 		 * We block until we read something and tracing is disabled.
5888 		 * We still block if tracing is disabled, but we have never
5889 		 * read anything. This allows a user to cat this file, and
5890 		 * then enable tracing. But after we have read something,
5891 		 * we give an EOF when tracing is again disabled.
5892 		 *
5893 		 * iter->pos will be 0 if we haven't read anything.
5894 		 */
5895 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5896 			break;
5897 
5898 		mutex_unlock(&iter->mutex);
5899 
5900 		ret = wait_on_pipe(iter, 0);
5901 
5902 		mutex_lock(&iter->mutex);
5903 
5904 		if (ret)
5905 			return ret;
5906 	}
5907 
5908 	return 1;
5909 }
5910 
5911 /*
5912  * Consumer reader.
5913  */
5914 static ssize_t
5915 tracing_read_pipe(struct file *filp, char __user *ubuf,
5916 		  size_t cnt, loff_t *ppos)
5917 {
5918 	struct trace_iterator *iter = filp->private_data;
5919 	ssize_t sret;
5920 
5921 	/*
5922 	 * Avoid more than one consumer on a single file descriptor
5923 	 * This is just a matter of traces coherency, the ring buffer itself
5924 	 * is protected.
5925 	 */
5926 	mutex_lock(&iter->mutex);
5927 
5928 	/* return any leftover data */
5929 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5930 	if (sret != -EBUSY)
5931 		goto out;
5932 
5933 	trace_seq_init(&iter->seq);
5934 
5935 	if (iter->trace->read) {
5936 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5937 		if (sret)
5938 			goto out;
5939 	}
5940 
5941 waitagain:
5942 	sret = tracing_wait_pipe(filp);
5943 	if (sret <= 0)
5944 		goto out;
5945 
5946 	/* stop when tracing is finished */
5947 	if (trace_empty(iter)) {
5948 		sret = 0;
5949 		goto out;
5950 	}
5951 
5952 	if (cnt >= PAGE_SIZE)
5953 		cnt = PAGE_SIZE - 1;
5954 
5955 	/* reset all but tr, trace, and overruns */
5956 	memset(&iter->seq, 0,
5957 	       sizeof(struct trace_iterator) -
5958 	       offsetof(struct trace_iterator, seq));
5959 	cpumask_clear(iter->started);
5960 	iter->pos = -1;
5961 
5962 	trace_event_read_lock();
5963 	trace_access_lock(iter->cpu_file);
5964 	while (trace_find_next_entry_inc(iter) != NULL) {
5965 		enum print_line_t ret;
5966 		int save_len = iter->seq.seq.len;
5967 
5968 		ret = print_trace_line(iter);
5969 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5970 			/* don't print partial lines */
5971 			iter->seq.seq.len = save_len;
5972 			break;
5973 		}
5974 		if (ret != TRACE_TYPE_NO_CONSUME)
5975 			trace_consume(iter);
5976 
5977 		if (trace_seq_used(&iter->seq) >= cnt)
5978 			break;
5979 
5980 		/*
5981 		 * Setting the full flag means we reached the trace_seq buffer
5982 		 * size and we should leave by partial output condition above.
5983 		 * One of the trace_seq_* functions is not used properly.
5984 		 */
5985 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5986 			  iter->ent->type);
5987 	}
5988 	trace_access_unlock(iter->cpu_file);
5989 	trace_event_read_unlock();
5990 
5991 	/* Now copy what we have to the user */
5992 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5993 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5994 		trace_seq_init(&iter->seq);
5995 
5996 	/*
5997 	 * If there was nothing to send to user, in spite of consuming trace
5998 	 * entries, go back to wait for more entries.
5999 	 */
6000 	if (sret == -EBUSY)
6001 		goto waitagain;
6002 
6003 out:
6004 	mutex_unlock(&iter->mutex);
6005 
6006 	return sret;
6007 }
6008 
6009 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6010 				     unsigned int idx)
6011 {
6012 	__free_page(spd->pages[idx]);
6013 }
6014 
6015 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6016 	.can_merge		= 0,
6017 	.confirm		= generic_pipe_buf_confirm,
6018 	.release		= generic_pipe_buf_release,
6019 	.steal			= generic_pipe_buf_steal,
6020 	.get			= generic_pipe_buf_get,
6021 };
6022 
6023 static size_t
6024 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6025 {
6026 	size_t count;
6027 	int save_len;
6028 	int ret;
6029 
6030 	/* Seq buffer is page-sized, exactly what we need. */
6031 	for (;;) {
6032 		save_len = iter->seq.seq.len;
6033 		ret = print_trace_line(iter);
6034 
6035 		if (trace_seq_has_overflowed(&iter->seq)) {
6036 			iter->seq.seq.len = save_len;
6037 			break;
6038 		}
6039 
6040 		/*
6041 		 * This should not be hit, because it should only
6042 		 * be set if the iter->seq overflowed. But check it
6043 		 * anyway to be safe.
6044 		 */
6045 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6046 			iter->seq.seq.len = save_len;
6047 			break;
6048 		}
6049 
6050 		count = trace_seq_used(&iter->seq) - save_len;
6051 		if (rem < count) {
6052 			rem = 0;
6053 			iter->seq.seq.len = save_len;
6054 			break;
6055 		}
6056 
6057 		if (ret != TRACE_TYPE_NO_CONSUME)
6058 			trace_consume(iter);
6059 		rem -= count;
6060 		if (!trace_find_next_entry_inc(iter))	{
6061 			rem = 0;
6062 			iter->ent = NULL;
6063 			break;
6064 		}
6065 	}
6066 
6067 	return rem;
6068 }
6069 
6070 static ssize_t tracing_splice_read_pipe(struct file *filp,
6071 					loff_t *ppos,
6072 					struct pipe_inode_info *pipe,
6073 					size_t len,
6074 					unsigned int flags)
6075 {
6076 	struct page *pages_def[PIPE_DEF_BUFFERS];
6077 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6078 	struct trace_iterator *iter = filp->private_data;
6079 	struct splice_pipe_desc spd = {
6080 		.pages		= pages_def,
6081 		.partial	= partial_def,
6082 		.nr_pages	= 0, /* This gets updated below. */
6083 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6084 		.ops		= &tracing_pipe_buf_ops,
6085 		.spd_release	= tracing_spd_release_pipe,
6086 	};
6087 	ssize_t ret;
6088 	size_t rem;
6089 	unsigned int i;
6090 
6091 	if (splice_grow_spd(pipe, &spd))
6092 		return -ENOMEM;
6093 
6094 	mutex_lock(&iter->mutex);
6095 
6096 	if (iter->trace->splice_read) {
6097 		ret = iter->trace->splice_read(iter, filp,
6098 					       ppos, pipe, len, flags);
6099 		if (ret)
6100 			goto out_err;
6101 	}
6102 
6103 	ret = tracing_wait_pipe(filp);
6104 	if (ret <= 0)
6105 		goto out_err;
6106 
6107 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6108 		ret = -EFAULT;
6109 		goto out_err;
6110 	}
6111 
6112 	trace_event_read_lock();
6113 	trace_access_lock(iter->cpu_file);
6114 
6115 	/* Fill as many pages as possible. */
6116 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6117 		spd.pages[i] = alloc_page(GFP_KERNEL);
6118 		if (!spd.pages[i])
6119 			break;
6120 
6121 		rem = tracing_fill_pipe_page(rem, iter);
6122 
6123 		/* Copy the data into the page, so we can start over. */
6124 		ret = trace_seq_to_buffer(&iter->seq,
6125 					  page_address(spd.pages[i]),
6126 					  trace_seq_used(&iter->seq));
6127 		if (ret < 0) {
6128 			__free_page(spd.pages[i]);
6129 			break;
6130 		}
6131 		spd.partial[i].offset = 0;
6132 		spd.partial[i].len = trace_seq_used(&iter->seq);
6133 
6134 		trace_seq_init(&iter->seq);
6135 	}
6136 
6137 	trace_access_unlock(iter->cpu_file);
6138 	trace_event_read_unlock();
6139 	mutex_unlock(&iter->mutex);
6140 
6141 	spd.nr_pages = i;
6142 
6143 	if (i)
6144 		ret = splice_to_pipe(pipe, &spd);
6145 	else
6146 		ret = 0;
6147 out:
6148 	splice_shrink_spd(&spd);
6149 	return ret;
6150 
6151 out_err:
6152 	mutex_unlock(&iter->mutex);
6153 	goto out;
6154 }
6155 
6156 static ssize_t
6157 tracing_entries_read(struct file *filp, char __user *ubuf,
6158 		     size_t cnt, loff_t *ppos)
6159 {
6160 	struct inode *inode = file_inode(filp);
6161 	struct trace_array *tr = inode->i_private;
6162 	int cpu = tracing_get_cpu(inode);
6163 	char buf[64];
6164 	int r = 0;
6165 	ssize_t ret;
6166 
6167 	mutex_lock(&trace_types_lock);
6168 
6169 	if (cpu == RING_BUFFER_ALL_CPUS) {
6170 		int cpu, buf_size_same;
6171 		unsigned long size;
6172 
6173 		size = 0;
6174 		buf_size_same = 1;
6175 		/* check if all cpu sizes are same */
6176 		for_each_tracing_cpu(cpu) {
6177 			/* fill in the size from first enabled cpu */
6178 			if (size == 0)
6179 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6180 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6181 				buf_size_same = 0;
6182 				break;
6183 			}
6184 		}
6185 
6186 		if (buf_size_same) {
6187 			if (!ring_buffer_expanded)
6188 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6189 					    size >> 10,
6190 					    trace_buf_size >> 10);
6191 			else
6192 				r = sprintf(buf, "%lu\n", size >> 10);
6193 		} else
6194 			r = sprintf(buf, "X\n");
6195 	} else
6196 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6197 
6198 	mutex_unlock(&trace_types_lock);
6199 
6200 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6201 	return ret;
6202 }
6203 
6204 static ssize_t
6205 tracing_entries_write(struct file *filp, const char __user *ubuf,
6206 		      size_t cnt, loff_t *ppos)
6207 {
6208 	struct inode *inode = file_inode(filp);
6209 	struct trace_array *tr = inode->i_private;
6210 	unsigned long val;
6211 	int ret;
6212 
6213 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6214 	if (ret)
6215 		return ret;
6216 
6217 	/* must have at least 1 entry */
6218 	if (!val)
6219 		return -EINVAL;
6220 
6221 	/* value is in KB */
6222 	val <<= 10;
6223 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6224 	if (ret < 0)
6225 		return ret;
6226 
6227 	*ppos += cnt;
6228 
6229 	return cnt;
6230 }
6231 
6232 static ssize_t
6233 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6234 				size_t cnt, loff_t *ppos)
6235 {
6236 	struct trace_array *tr = filp->private_data;
6237 	char buf[64];
6238 	int r, cpu;
6239 	unsigned long size = 0, expanded_size = 0;
6240 
6241 	mutex_lock(&trace_types_lock);
6242 	for_each_tracing_cpu(cpu) {
6243 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6244 		if (!ring_buffer_expanded)
6245 			expanded_size += trace_buf_size >> 10;
6246 	}
6247 	if (ring_buffer_expanded)
6248 		r = sprintf(buf, "%lu\n", size);
6249 	else
6250 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6251 	mutex_unlock(&trace_types_lock);
6252 
6253 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6254 }
6255 
6256 static ssize_t
6257 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6258 			  size_t cnt, loff_t *ppos)
6259 {
6260 	/*
6261 	 * There is no need to read what the user has written, this function
6262 	 * is just to make sure that there is no error when "echo" is used
6263 	 */
6264 
6265 	*ppos += cnt;
6266 
6267 	return cnt;
6268 }
6269 
6270 static int
6271 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6272 {
6273 	struct trace_array *tr = inode->i_private;
6274 
6275 	/* disable tracing ? */
6276 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6277 		tracer_tracing_off(tr);
6278 	/* resize the ring buffer to 0 */
6279 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6280 
6281 	trace_array_put(tr);
6282 
6283 	return 0;
6284 }
6285 
6286 static ssize_t
6287 tracing_mark_write(struct file *filp, const char __user *ubuf,
6288 					size_t cnt, loff_t *fpos)
6289 {
6290 	struct trace_array *tr = filp->private_data;
6291 	struct ring_buffer_event *event;
6292 	enum event_trigger_type tt = ETT_NONE;
6293 	struct ring_buffer *buffer;
6294 	struct print_entry *entry;
6295 	unsigned long irq_flags;
6296 	const char faulted[] = "<faulted>";
6297 	ssize_t written;
6298 	int size;
6299 	int len;
6300 
6301 /* Used in tracing_mark_raw_write() as well */
6302 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6303 
6304 	if (tracing_disabled)
6305 		return -EINVAL;
6306 
6307 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6308 		return -EINVAL;
6309 
6310 	if (cnt > TRACE_BUF_SIZE)
6311 		cnt = TRACE_BUF_SIZE;
6312 
6313 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6314 
6315 	local_save_flags(irq_flags);
6316 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6317 
6318 	/* If less than "<faulted>", then make sure we can still add that */
6319 	if (cnt < FAULTED_SIZE)
6320 		size += FAULTED_SIZE - cnt;
6321 
6322 	buffer = tr->trace_buffer.buffer;
6323 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6324 					    irq_flags, preempt_count());
6325 	if (unlikely(!event))
6326 		/* Ring buffer disabled, return as if not open for write */
6327 		return -EBADF;
6328 
6329 	entry = ring_buffer_event_data(event);
6330 	entry->ip = _THIS_IP_;
6331 
6332 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6333 	if (len) {
6334 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6335 		cnt = FAULTED_SIZE;
6336 		written = -EFAULT;
6337 	} else
6338 		written = cnt;
6339 	len = cnt;
6340 
6341 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6342 		/* do not add \n before testing triggers, but add \0 */
6343 		entry->buf[cnt] = '\0';
6344 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6345 	}
6346 
6347 	if (entry->buf[cnt - 1] != '\n') {
6348 		entry->buf[cnt] = '\n';
6349 		entry->buf[cnt + 1] = '\0';
6350 	} else
6351 		entry->buf[cnt] = '\0';
6352 
6353 	__buffer_unlock_commit(buffer, event);
6354 
6355 	if (tt)
6356 		event_triggers_post_call(tr->trace_marker_file, tt);
6357 
6358 	if (written > 0)
6359 		*fpos += written;
6360 
6361 	return written;
6362 }
6363 
6364 /* Limit it for now to 3K (including tag) */
6365 #define RAW_DATA_MAX_SIZE (1024*3)
6366 
6367 static ssize_t
6368 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6369 					size_t cnt, loff_t *fpos)
6370 {
6371 	struct trace_array *tr = filp->private_data;
6372 	struct ring_buffer_event *event;
6373 	struct ring_buffer *buffer;
6374 	struct raw_data_entry *entry;
6375 	const char faulted[] = "<faulted>";
6376 	unsigned long irq_flags;
6377 	ssize_t written;
6378 	int size;
6379 	int len;
6380 
6381 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6382 
6383 	if (tracing_disabled)
6384 		return -EINVAL;
6385 
6386 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6387 		return -EINVAL;
6388 
6389 	/* The marker must at least have a tag id */
6390 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6391 		return -EINVAL;
6392 
6393 	if (cnt > TRACE_BUF_SIZE)
6394 		cnt = TRACE_BUF_SIZE;
6395 
6396 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6397 
6398 	local_save_flags(irq_flags);
6399 	size = sizeof(*entry) + cnt;
6400 	if (cnt < FAULT_SIZE_ID)
6401 		size += FAULT_SIZE_ID - cnt;
6402 
6403 	buffer = tr->trace_buffer.buffer;
6404 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6405 					    irq_flags, preempt_count());
6406 	if (!event)
6407 		/* Ring buffer disabled, return as if not open for write */
6408 		return -EBADF;
6409 
6410 	entry = ring_buffer_event_data(event);
6411 
6412 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6413 	if (len) {
6414 		entry->id = -1;
6415 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6416 		written = -EFAULT;
6417 	} else
6418 		written = cnt;
6419 
6420 	__buffer_unlock_commit(buffer, event);
6421 
6422 	if (written > 0)
6423 		*fpos += written;
6424 
6425 	return written;
6426 }
6427 
6428 static int tracing_clock_show(struct seq_file *m, void *v)
6429 {
6430 	struct trace_array *tr = m->private;
6431 	int i;
6432 
6433 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6434 		seq_printf(m,
6435 			"%s%s%s%s", i ? " " : "",
6436 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6437 			i == tr->clock_id ? "]" : "");
6438 	seq_putc(m, '\n');
6439 
6440 	return 0;
6441 }
6442 
6443 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6444 {
6445 	int i;
6446 
6447 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6448 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6449 			break;
6450 	}
6451 	if (i == ARRAY_SIZE(trace_clocks))
6452 		return -EINVAL;
6453 
6454 	mutex_lock(&trace_types_lock);
6455 
6456 	tr->clock_id = i;
6457 
6458 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6459 
6460 	/*
6461 	 * New clock may not be consistent with the previous clock.
6462 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6463 	 */
6464 	tracing_reset_online_cpus(&tr->trace_buffer);
6465 
6466 #ifdef CONFIG_TRACER_MAX_TRACE
6467 	if (tr->max_buffer.buffer)
6468 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6469 	tracing_reset_online_cpus(&tr->max_buffer);
6470 #endif
6471 
6472 	mutex_unlock(&trace_types_lock);
6473 
6474 	return 0;
6475 }
6476 
6477 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6478 				   size_t cnt, loff_t *fpos)
6479 {
6480 	struct seq_file *m = filp->private_data;
6481 	struct trace_array *tr = m->private;
6482 	char buf[64];
6483 	const char *clockstr;
6484 	int ret;
6485 
6486 	if (cnt >= sizeof(buf))
6487 		return -EINVAL;
6488 
6489 	if (copy_from_user(buf, ubuf, cnt))
6490 		return -EFAULT;
6491 
6492 	buf[cnt] = 0;
6493 
6494 	clockstr = strstrip(buf);
6495 
6496 	ret = tracing_set_clock(tr, clockstr);
6497 	if (ret)
6498 		return ret;
6499 
6500 	*fpos += cnt;
6501 
6502 	return cnt;
6503 }
6504 
6505 static int tracing_clock_open(struct inode *inode, struct file *file)
6506 {
6507 	struct trace_array *tr = inode->i_private;
6508 	int ret;
6509 
6510 	if (tracing_disabled)
6511 		return -ENODEV;
6512 
6513 	if (trace_array_get(tr))
6514 		return -ENODEV;
6515 
6516 	ret = single_open(file, tracing_clock_show, inode->i_private);
6517 	if (ret < 0)
6518 		trace_array_put(tr);
6519 
6520 	return ret;
6521 }
6522 
6523 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6524 {
6525 	struct trace_array *tr = m->private;
6526 
6527 	mutex_lock(&trace_types_lock);
6528 
6529 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6530 		seq_puts(m, "delta [absolute]\n");
6531 	else
6532 		seq_puts(m, "[delta] absolute\n");
6533 
6534 	mutex_unlock(&trace_types_lock);
6535 
6536 	return 0;
6537 }
6538 
6539 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6540 {
6541 	struct trace_array *tr = inode->i_private;
6542 	int ret;
6543 
6544 	if (tracing_disabled)
6545 		return -ENODEV;
6546 
6547 	if (trace_array_get(tr))
6548 		return -ENODEV;
6549 
6550 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6551 	if (ret < 0)
6552 		trace_array_put(tr);
6553 
6554 	return ret;
6555 }
6556 
6557 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6558 {
6559 	int ret = 0;
6560 
6561 	mutex_lock(&trace_types_lock);
6562 
6563 	if (abs && tr->time_stamp_abs_ref++)
6564 		goto out;
6565 
6566 	if (!abs) {
6567 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6568 			ret = -EINVAL;
6569 			goto out;
6570 		}
6571 
6572 		if (--tr->time_stamp_abs_ref)
6573 			goto out;
6574 	}
6575 
6576 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6577 
6578 #ifdef CONFIG_TRACER_MAX_TRACE
6579 	if (tr->max_buffer.buffer)
6580 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6581 #endif
6582  out:
6583 	mutex_unlock(&trace_types_lock);
6584 
6585 	return ret;
6586 }
6587 
6588 struct ftrace_buffer_info {
6589 	struct trace_iterator	iter;
6590 	void			*spare;
6591 	unsigned int		spare_cpu;
6592 	unsigned int		read;
6593 };
6594 
6595 #ifdef CONFIG_TRACER_SNAPSHOT
6596 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6597 {
6598 	struct trace_array *tr = inode->i_private;
6599 	struct trace_iterator *iter;
6600 	struct seq_file *m;
6601 	int ret = 0;
6602 
6603 	if (trace_array_get(tr) < 0)
6604 		return -ENODEV;
6605 
6606 	if (file->f_mode & FMODE_READ) {
6607 		iter = __tracing_open(inode, file, true);
6608 		if (IS_ERR(iter))
6609 			ret = PTR_ERR(iter);
6610 	} else {
6611 		/* Writes still need the seq_file to hold the private data */
6612 		ret = -ENOMEM;
6613 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6614 		if (!m)
6615 			goto out;
6616 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6617 		if (!iter) {
6618 			kfree(m);
6619 			goto out;
6620 		}
6621 		ret = 0;
6622 
6623 		iter->tr = tr;
6624 		iter->trace_buffer = &tr->max_buffer;
6625 		iter->cpu_file = tracing_get_cpu(inode);
6626 		m->private = iter;
6627 		file->private_data = m;
6628 	}
6629 out:
6630 	if (ret < 0)
6631 		trace_array_put(tr);
6632 
6633 	return ret;
6634 }
6635 
6636 static ssize_t
6637 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6638 		       loff_t *ppos)
6639 {
6640 	struct seq_file *m = filp->private_data;
6641 	struct trace_iterator *iter = m->private;
6642 	struct trace_array *tr = iter->tr;
6643 	unsigned long val;
6644 	int ret;
6645 
6646 	ret = tracing_update_buffers();
6647 	if (ret < 0)
6648 		return ret;
6649 
6650 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6651 	if (ret)
6652 		return ret;
6653 
6654 	mutex_lock(&trace_types_lock);
6655 
6656 	if (tr->current_trace->use_max_tr) {
6657 		ret = -EBUSY;
6658 		goto out;
6659 	}
6660 
6661 	arch_spin_lock(&tr->max_lock);
6662 	if (tr->cond_snapshot)
6663 		ret = -EBUSY;
6664 	arch_spin_unlock(&tr->max_lock);
6665 	if (ret)
6666 		goto out;
6667 
6668 	switch (val) {
6669 	case 0:
6670 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6671 			ret = -EINVAL;
6672 			break;
6673 		}
6674 		if (tr->allocated_snapshot)
6675 			free_snapshot(tr);
6676 		break;
6677 	case 1:
6678 /* Only allow per-cpu swap if the ring buffer supports it */
6679 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6680 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6681 			ret = -EINVAL;
6682 			break;
6683 		}
6684 #endif
6685 		if (!tr->allocated_snapshot) {
6686 			ret = tracing_alloc_snapshot_instance(tr);
6687 			if (ret < 0)
6688 				break;
6689 		}
6690 		local_irq_disable();
6691 		/* Now, we're going to swap */
6692 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6693 			update_max_tr(tr, current, smp_processor_id(), NULL);
6694 		else
6695 			update_max_tr_single(tr, current, iter->cpu_file);
6696 		local_irq_enable();
6697 		break;
6698 	default:
6699 		if (tr->allocated_snapshot) {
6700 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6701 				tracing_reset_online_cpus(&tr->max_buffer);
6702 			else
6703 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6704 		}
6705 		break;
6706 	}
6707 
6708 	if (ret >= 0) {
6709 		*ppos += cnt;
6710 		ret = cnt;
6711 	}
6712 out:
6713 	mutex_unlock(&trace_types_lock);
6714 	return ret;
6715 }
6716 
6717 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6718 {
6719 	struct seq_file *m = file->private_data;
6720 	int ret;
6721 
6722 	ret = tracing_release(inode, file);
6723 
6724 	if (file->f_mode & FMODE_READ)
6725 		return ret;
6726 
6727 	/* If write only, the seq_file is just a stub */
6728 	if (m)
6729 		kfree(m->private);
6730 	kfree(m);
6731 
6732 	return 0;
6733 }
6734 
6735 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6736 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6737 				    size_t count, loff_t *ppos);
6738 static int tracing_buffers_release(struct inode *inode, struct file *file);
6739 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6740 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6741 
6742 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6743 {
6744 	struct ftrace_buffer_info *info;
6745 	int ret;
6746 
6747 	ret = tracing_buffers_open(inode, filp);
6748 	if (ret < 0)
6749 		return ret;
6750 
6751 	info = filp->private_data;
6752 
6753 	if (info->iter.trace->use_max_tr) {
6754 		tracing_buffers_release(inode, filp);
6755 		return -EBUSY;
6756 	}
6757 
6758 	info->iter.snapshot = true;
6759 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6760 
6761 	return ret;
6762 }
6763 
6764 #endif /* CONFIG_TRACER_SNAPSHOT */
6765 
6766 
6767 static const struct file_operations tracing_thresh_fops = {
6768 	.open		= tracing_open_generic,
6769 	.read		= tracing_thresh_read,
6770 	.write		= tracing_thresh_write,
6771 	.llseek		= generic_file_llseek,
6772 };
6773 
6774 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6775 static const struct file_operations tracing_max_lat_fops = {
6776 	.open		= tracing_open_generic,
6777 	.read		= tracing_max_lat_read,
6778 	.write		= tracing_max_lat_write,
6779 	.llseek		= generic_file_llseek,
6780 };
6781 #endif
6782 
6783 static const struct file_operations set_tracer_fops = {
6784 	.open		= tracing_open_generic,
6785 	.read		= tracing_set_trace_read,
6786 	.write		= tracing_set_trace_write,
6787 	.llseek		= generic_file_llseek,
6788 };
6789 
6790 static const struct file_operations tracing_pipe_fops = {
6791 	.open		= tracing_open_pipe,
6792 	.poll		= tracing_poll_pipe,
6793 	.read		= tracing_read_pipe,
6794 	.splice_read	= tracing_splice_read_pipe,
6795 	.release	= tracing_release_pipe,
6796 	.llseek		= no_llseek,
6797 };
6798 
6799 static const struct file_operations tracing_entries_fops = {
6800 	.open		= tracing_open_generic_tr,
6801 	.read		= tracing_entries_read,
6802 	.write		= tracing_entries_write,
6803 	.llseek		= generic_file_llseek,
6804 	.release	= tracing_release_generic_tr,
6805 };
6806 
6807 static const struct file_operations tracing_total_entries_fops = {
6808 	.open		= tracing_open_generic_tr,
6809 	.read		= tracing_total_entries_read,
6810 	.llseek		= generic_file_llseek,
6811 	.release	= tracing_release_generic_tr,
6812 };
6813 
6814 static const struct file_operations tracing_free_buffer_fops = {
6815 	.open		= tracing_open_generic_tr,
6816 	.write		= tracing_free_buffer_write,
6817 	.release	= tracing_free_buffer_release,
6818 };
6819 
6820 static const struct file_operations tracing_mark_fops = {
6821 	.open		= tracing_open_generic_tr,
6822 	.write		= tracing_mark_write,
6823 	.llseek		= generic_file_llseek,
6824 	.release	= tracing_release_generic_tr,
6825 };
6826 
6827 static const struct file_operations tracing_mark_raw_fops = {
6828 	.open		= tracing_open_generic_tr,
6829 	.write		= tracing_mark_raw_write,
6830 	.llseek		= generic_file_llseek,
6831 	.release	= tracing_release_generic_tr,
6832 };
6833 
6834 static const struct file_operations trace_clock_fops = {
6835 	.open		= tracing_clock_open,
6836 	.read		= seq_read,
6837 	.llseek		= seq_lseek,
6838 	.release	= tracing_single_release_tr,
6839 	.write		= tracing_clock_write,
6840 };
6841 
6842 static const struct file_operations trace_time_stamp_mode_fops = {
6843 	.open		= tracing_time_stamp_mode_open,
6844 	.read		= seq_read,
6845 	.llseek		= seq_lseek,
6846 	.release	= tracing_single_release_tr,
6847 };
6848 
6849 #ifdef CONFIG_TRACER_SNAPSHOT
6850 static const struct file_operations snapshot_fops = {
6851 	.open		= tracing_snapshot_open,
6852 	.read		= seq_read,
6853 	.write		= tracing_snapshot_write,
6854 	.llseek		= tracing_lseek,
6855 	.release	= tracing_snapshot_release,
6856 };
6857 
6858 static const struct file_operations snapshot_raw_fops = {
6859 	.open		= snapshot_raw_open,
6860 	.read		= tracing_buffers_read,
6861 	.release	= tracing_buffers_release,
6862 	.splice_read	= tracing_buffers_splice_read,
6863 	.llseek		= no_llseek,
6864 };
6865 
6866 #endif /* CONFIG_TRACER_SNAPSHOT */
6867 
6868 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6869 {
6870 	struct trace_array *tr = inode->i_private;
6871 	struct ftrace_buffer_info *info;
6872 	int ret;
6873 
6874 	if (tracing_disabled)
6875 		return -ENODEV;
6876 
6877 	if (trace_array_get(tr) < 0)
6878 		return -ENODEV;
6879 
6880 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6881 	if (!info) {
6882 		trace_array_put(tr);
6883 		return -ENOMEM;
6884 	}
6885 
6886 	mutex_lock(&trace_types_lock);
6887 
6888 	info->iter.tr		= tr;
6889 	info->iter.cpu_file	= tracing_get_cpu(inode);
6890 	info->iter.trace	= tr->current_trace;
6891 	info->iter.trace_buffer = &tr->trace_buffer;
6892 	info->spare		= NULL;
6893 	/* Force reading ring buffer for first read */
6894 	info->read		= (unsigned int)-1;
6895 
6896 	filp->private_data = info;
6897 
6898 	tr->current_trace->ref++;
6899 
6900 	mutex_unlock(&trace_types_lock);
6901 
6902 	ret = nonseekable_open(inode, filp);
6903 	if (ret < 0)
6904 		trace_array_put(tr);
6905 
6906 	return ret;
6907 }
6908 
6909 static __poll_t
6910 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6911 {
6912 	struct ftrace_buffer_info *info = filp->private_data;
6913 	struct trace_iterator *iter = &info->iter;
6914 
6915 	return trace_poll(iter, filp, poll_table);
6916 }
6917 
6918 static ssize_t
6919 tracing_buffers_read(struct file *filp, char __user *ubuf,
6920 		     size_t count, loff_t *ppos)
6921 {
6922 	struct ftrace_buffer_info *info = filp->private_data;
6923 	struct trace_iterator *iter = &info->iter;
6924 	ssize_t ret = 0;
6925 	ssize_t size;
6926 
6927 	if (!count)
6928 		return 0;
6929 
6930 #ifdef CONFIG_TRACER_MAX_TRACE
6931 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6932 		return -EBUSY;
6933 #endif
6934 
6935 	if (!info->spare) {
6936 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6937 							  iter->cpu_file);
6938 		if (IS_ERR(info->spare)) {
6939 			ret = PTR_ERR(info->spare);
6940 			info->spare = NULL;
6941 		} else {
6942 			info->spare_cpu = iter->cpu_file;
6943 		}
6944 	}
6945 	if (!info->spare)
6946 		return ret;
6947 
6948 	/* Do we have previous read data to read? */
6949 	if (info->read < PAGE_SIZE)
6950 		goto read;
6951 
6952  again:
6953 	trace_access_lock(iter->cpu_file);
6954 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6955 				    &info->spare,
6956 				    count,
6957 				    iter->cpu_file, 0);
6958 	trace_access_unlock(iter->cpu_file);
6959 
6960 	if (ret < 0) {
6961 		if (trace_empty(iter)) {
6962 			if ((filp->f_flags & O_NONBLOCK))
6963 				return -EAGAIN;
6964 
6965 			ret = wait_on_pipe(iter, 0);
6966 			if (ret)
6967 				return ret;
6968 
6969 			goto again;
6970 		}
6971 		return 0;
6972 	}
6973 
6974 	info->read = 0;
6975  read:
6976 	size = PAGE_SIZE - info->read;
6977 	if (size > count)
6978 		size = count;
6979 
6980 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6981 	if (ret == size)
6982 		return -EFAULT;
6983 
6984 	size -= ret;
6985 
6986 	*ppos += size;
6987 	info->read += size;
6988 
6989 	return size;
6990 }
6991 
6992 static int tracing_buffers_release(struct inode *inode, struct file *file)
6993 {
6994 	struct ftrace_buffer_info *info = file->private_data;
6995 	struct trace_iterator *iter = &info->iter;
6996 
6997 	mutex_lock(&trace_types_lock);
6998 
6999 	iter->tr->current_trace->ref--;
7000 
7001 	__trace_array_put(iter->tr);
7002 
7003 	if (info->spare)
7004 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7005 					   info->spare_cpu, info->spare);
7006 	kfree(info);
7007 
7008 	mutex_unlock(&trace_types_lock);
7009 
7010 	return 0;
7011 }
7012 
7013 struct buffer_ref {
7014 	struct ring_buffer	*buffer;
7015 	void			*page;
7016 	int			cpu;
7017 	int			ref;
7018 };
7019 
7020 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7021 				    struct pipe_buffer *buf)
7022 {
7023 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7024 
7025 	if (--ref->ref)
7026 		return;
7027 
7028 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7029 	kfree(ref);
7030 	buf->private = 0;
7031 }
7032 
7033 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7034 				struct pipe_buffer *buf)
7035 {
7036 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7037 
7038 	ref->ref++;
7039 }
7040 
7041 /* Pipe buffer operations for a buffer. */
7042 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7043 	.can_merge		= 0,
7044 	.confirm		= generic_pipe_buf_confirm,
7045 	.release		= buffer_pipe_buf_release,
7046 	.steal			= generic_pipe_buf_steal,
7047 	.get			= buffer_pipe_buf_get,
7048 };
7049 
7050 /*
7051  * Callback from splice_to_pipe(), if we need to release some pages
7052  * at the end of the spd in case we error'ed out in filling the pipe.
7053  */
7054 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7055 {
7056 	struct buffer_ref *ref =
7057 		(struct buffer_ref *)spd->partial[i].private;
7058 
7059 	if (--ref->ref)
7060 		return;
7061 
7062 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7063 	kfree(ref);
7064 	spd->partial[i].private = 0;
7065 }
7066 
7067 static ssize_t
7068 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7069 			    struct pipe_inode_info *pipe, size_t len,
7070 			    unsigned int flags)
7071 {
7072 	struct ftrace_buffer_info *info = file->private_data;
7073 	struct trace_iterator *iter = &info->iter;
7074 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7075 	struct page *pages_def[PIPE_DEF_BUFFERS];
7076 	struct splice_pipe_desc spd = {
7077 		.pages		= pages_def,
7078 		.partial	= partial_def,
7079 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7080 		.ops		= &buffer_pipe_buf_ops,
7081 		.spd_release	= buffer_spd_release,
7082 	};
7083 	struct buffer_ref *ref;
7084 	int entries, i;
7085 	ssize_t ret = 0;
7086 
7087 #ifdef CONFIG_TRACER_MAX_TRACE
7088 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7089 		return -EBUSY;
7090 #endif
7091 
7092 	if (*ppos & (PAGE_SIZE - 1))
7093 		return -EINVAL;
7094 
7095 	if (len & (PAGE_SIZE - 1)) {
7096 		if (len < PAGE_SIZE)
7097 			return -EINVAL;
7098 		len &= PAGE_MASK;
7099 	}
7100 
7101 	if (splice_grow_spd(pipe, &spd))
7102 		return -ENOMEM;
7103 
7104  again:
7105 	trace_access_lock(iter->cpu_file);
7106 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7107 
7108 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7109 		struct page *page;
7110 		int r;
7111 
7112 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7113 		if (!ref) {
7114 			ret = -ENOMEM;
7115 			break;
7116 		}
7117 
7118 		ref->ref = 1;
7119 		ref->buffer = iter->trace_buffer->buffer;
7120 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7121 		if (IS_ERR(ref->page)) {
7122 			ret = PTR_ERR(ref->page);
7123 			ref->page = NULL;
7124 			kfree(ref);
7125 			break;
7126 		}
7127 		ref->cpu = iter->cpu_file;
7128 
7129 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7130 					  len, iter->cpu_file, 1);
7131 		if (r < 0) {
7132 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7133 						   ref->page);
7134 			kfree(ref);
7135 			break;
7136 		}
7137 
7138 		page = virt_to_page(ref->page);
7139 
7140 		spd.pages[i] = page;
7141 		spd.partial[i].len = PAGE_SIZE;
7142 		spd.partial[i].offset = 0;
7143 		spd.partial[i].private = (unsigned long)ref;
7144 		spd.nr_pages++;
7145 		*ppos += PAGE_SIZE;
7146 
7147 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7148 	}
7149 
7150 	trace_access_unlock(iter->cpu_file);
7151 	spd.nr_pages = i;
7152 
7153 	/* did we read anything? */
7154 	if (!spd.nr_pages) {
7155 		if (ret)
7156 			goto out;
7157 
7158 		ret = -EAGAIN;
7159 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7160 			goto out;
7161 
7162 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7163 		if (ret)
7164 			goto out;
7165 
7166 		goto again;
7167 	}
7168 
7169 	ret = splice_to_pipe(pipe, &spd);
7170 out:
7171 	splice_shrink_spd(&spd);
7172 
7173 	return ret;
7174 }
7175 
7176 static const struct file_operations tracing_buffers_fops = {
7177 	.open		= tracing_buffers_open,
7178 	.read		= tracing_buffers_read,
7179 	.poll		= tracing_buffers_poll,
7180 	.release	= tracing_buffers_release,
7181 	.splice_read	= tracing_buffers_splice_read,
7182 	.llseek		= no_llseek,
7183 };
7184 
7185 static ssize_t
7186 tracing_stats_read(struct file *filp, char __user *ubuf,
7187 		   size_t count, loff_t *ppos)
7188 {
7189 	struct inode *inode = file_inode(filp);
7190 	struct trace_array *tr = inode->i_private;
7191 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7192 	int cpu = tracing_get_cpu(inode);
7193 	struct trace_seq *s;
7194 	unsigned long cnt;
7195 	unsigned long long t;
7196 	unsigned long usec_rem;
7197 
7198 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7199 	if (!s)
7200 		return -ENOMEM;
7201 
7202 	trace_seq_init(s);
7203 
7204 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7205 	trace_seq_printf(s, "entries: %ld\n", cnt);
7206 
7207 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7208 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7209 
7210 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7211 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7212 
7213 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7214 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7215 
7216 	if (trace_clocks[tr->clock_id].in_ns) {
7217 		/* local or global for trace_clock */
7218 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7219 		usec_rem = do_div(t, USEC_PER_SEC);
7220 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7221 								t, usec_rem);
7222 
7223 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7224 		usec_rem = do_div(t, USEC_PER_SEC);
7225 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7226 	} else {
7227 		/* counter or tsc mode for trace_clock */
7228 		trace_seq_printf(s, "oldest event ts: %llu\n",
7229 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7230 
7231 		trace_seq_printf(s, "now ts: %llu\n",
7232 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7233 	}
7234 
7235 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7236 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7237 
7238 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7239 	trace_seq_printf(s, "read events: %ld\n", cnt);
7240 
7241 	count = simple_read_from_buffer(ubuf, count, ppos,
7242 					s->buffer, trace_seq_used(s));
7243 
7244 	kfree(s);
7245 
7246 	return count;
7247 }
7248 
7249 static const struct file_operations tracing_stats_fops = {
7250 	.open		= tracing_open_generic_tr,
7251 	.read		= tracing_stats_read,
7252 	.llseek		= generic_file_llseek,
7253 	.release	= tracing_release_generic_tr,
7254 };
7255 
7256 #ifdef CONFIG_DYNAMIC_FTRACE
7257 
7258 static ssize_t
7259 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7260 		  size_t cnt, loff_t *ppos)
7261 {
7262 	unsigned long *p = filp->private_data;
7263 	char buf[64]; /* Not too big for a shallow stack */
7264 	int r;
7265 
7266 	r = scnprintf(buf, 63, "%ld", *p);
7267 	buf[r++] = '\n';
7268 
7269 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7270 }
7271 
7272 static const struct file_operations tracing_dyn_info_fops = {
7273 	.open		= tracing_open_generic,
7274 	.read		= tracing_read_dyn_info,
7275 	.llseek		= generic_file_llseek,
7276 };
7277 #endif /* CONFIG_DYNAMIC_FTRACE */
7278 
7279 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7280 static void
7281 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7282 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7283 		void *data)
7284 {
7285 	tracing_snapshot_instance(tr);
7286 }
7287 
7288 static void
7289 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7290 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7291 		      void *data)
7292 {
7293 	struct ftrace_func_mapper *mapper = data;
7294 	long *count = NULL;
7295 
7296 	if (mapper)
7297 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7298 
7299 	if (count) {
7300 
7301 		if (*count <= 0)
7302 			return;
7303 
7304 		(*count)--;
7305 	}
7306 
7307 	tracing_snapshot_instance(tr);
7308 }
7309 
7310 static int
7311 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7312 		      struct ftrace_probe_ops *ops, void *data)
7313 {
7314 	struct ftrace_func_mapper *mapper = data;
7315 	long *count = NULL;
7316 
7317 	seq_printf(m, "%ps:", (void *)ip);
7318 
7319 	seq_puts(m, "snapshot");
7320 
7321 	if (mapper)
7322 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7323 
7324 	if (count)
7325 		seq_printf(m, ":count=%ld\n", *count);
7326 	else
7327 		seq_puts(m, ":unlimited\n");
7328 
7329 	return 0;
7330 }
7331 
7332 static int
7333 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7334 		     unsigned long ip, void *init_data, void **data)
7335 {
7336 	struct ftrace_func_mapper *mapper = *data;
7337 
7338 	if (!mapper) {
7339 		mapper = allocate_ftrace_func_mapper();
7340 		if (!mapper)
7341 			return -ENOMEM;
7342 		*data = mapper;
7343 	}
7344 
7345 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7346 }
7347 
7348 static void
7349 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7350 		     unsigned long ip, void *data)
7351 {
7352 	struct ftrace_func_mapper *mapper = data;
7353 
7354 	if (!ip) {
7355 		if (!mapper)
7356 			return;
7357 		free_ftrace_func_mapper(mapper, NULL);
7358 		return;
7359 	}
7360 
7361 	ftrace_func_mapper_remove_ip(mapper, ip);
7362 }
7363 
7364 static struct ftrace_probe_ops snapshot_probe_ops = {
7365 	.func			= ftrace_snapshot,
7366 	.print			= ftrace_snapshot_print,
7367 };
7368 
7369 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7370 	.func			= ftrace_count_snapshot,
7371 	.print			= ftrace_snapshot_print,
7372 	.init			= ftrace_snapshot_init,
7373 	.free			= ftrace_snapshot_free,
7374 };
7375 
7376 static int
7377 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7378 			       char *glob, char *cmd, char *param, int enable)
7379 {
7380 	struct ftrace_probe_ops *ops;
7381 	void *count = (void *)-1;
7382 	char *number;
7383 	int ret;
7384 
7385 	if (!tr)
7386 		return -ENODEV;
7387 
7388 	/* hash funcs only work with set_ftrace_filter */
7389 	if (!enable)
7390 		return -EINVAL;
7391 
7392 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7393 
7394 	if (glob[0] == '!')
7395 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7396 
7397 	if (!param)
7398 		goto out_reg;
7399 
7400 	number = strsep(&param, ":");
7401 
7402 	if (!strlen(number))
7403 		goto out_reg;
7404 
7405 	/*
7406 	 * We use the callback data field (which is a pointer)
7407 	 * as our counter.
7408 	 */
7409 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7410 	if (ret)
7411 		return ret;
7412 
7413  out_reg:
7414 	ret = tracing_alloc_snapshot_instance(tr);
7415 	if (ret < 0)
7416 		goto out;
7417 
7418 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7419 
7420  out:
7421 	return ret < 0 ? ret : 0;
7422 }
7423 
7424 static struct ftrace_func_command ftrace_snapshot_cmd = {
7425 	.name			= "snapshot",
7426 	.func			= ftrace_trace_snapshot_callback,
7427 };
7428 
7429 static __init int register_snapshot_cmd(void)
7430 {
7431 	return register_ftrace_command(&ftrace_snapshot_cmd);
7432 }
7433 #else
7434 static inline __init int register_snapshot_cmd(void) { return 0; }
7435 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7436 
7437 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7438 {
7439 	if (WARN_ON(!tr->dir))
7440 		return ERR_PTR(-ENODEV);
7441 
7442 	/* Top directory uses NULL as the parent */
7443 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7444 		return NULL;
7445 
7446 	/* All sub buffers have a descriptor */
7447 	return tr->dir;
7448 }
7449 
7450 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7451 {
7452 	struct dentry *d_tracer;
7453 
7454 	if (tr->percpu_dir)
7455 		return tr->percpu_dir;
7456 
7457 	d_tracer = tracing_get_dentry(tr);
7458 	if (IS_ERR(d_tracer))
7459 		return NULL;
7460 
7461 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7462 
7463 	WARN_ONCE(!tr->percpu_dir,
7464 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7465 
7466 	return tr->percpu_dir;
7467 }
7468 
7469 static struct dentry *
7470 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7471 		      void *data, long cpu, const struct file_operations *fops)
7472 {
7473 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7474 
7475 	if (ret) /* See tracing_get_cpu() */
7476 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7477 	return ret;
7478 }
7479 
7480 static void
7481 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7482 {
7483 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7484 	struct dentry *d_cpu;
7485 	char cpu_dir[30]; /* 30 characters should be more than enough */
7486 
7487 	if (!d_percpu)
7488 		return;
7489 
7490 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7491 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7492 	if (!d_cpu) {
7493 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7494 		return;
7495 	}
7496 
7497 	/* per cpu trace_pipe */
7498 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7499 				tr, cpu, &tracing_pipe_fops);
7500 
7501 	/* per cpu trace */
7502 	trace_create_cpu_file("trace", 0644, d_cpu,
7503 				tr, cpu, &tracing_fops);
7504 
7505 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7506 				tr, cpu, &tracing_buffers_fops);
7507 
7508 	trace_create_cpu_file("stats", 0444, d_cpu,
7509 				tr, cpu, &tracing_stats_fops);
7510 
7511 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7512 				tr, cpu, &tracing_entries_fops);
7513 
7514 #ifdef CONFIG_TRACER_SNAPSHOT
7515 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7516 				tr, cpu, &snapshot_fops);
7517 
7518 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7519 				tr, cpu, &snapshot_raw_fops);
7520 #endif
7521 }
7522 
7523 #ifdef CONFIG_FTRACE_SELFTEST
7524 /* Let selftest have access to static functions in this file */
7525 #include "trace_selftest.c"
7526 #endif
7527 
7528 static ssize_t
7529 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7530 			loff_t *ppos)
7531 {
7532 	struct trace_option_dentry *topt = filp->private_data;
7533 	char *buf;
7534 
7535 	if (topt->flags->val & topt->opt->bit)
7536 		buf = "1\n";
7537 	else
7538 		buf = "0\n";
7539 
7540 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7541 }
7542 
7543 static ssize_t
7544 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7545 			 loff_t *ppos)
7546 {
7547 	struct trace_option_dentry *topt = filp->private_data;
7548 	unsigned long val;
7549 	int ret;
7550 
7551 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7552 	if (ret)
7553 		return ret;
7554 
7555 	if (val != 0 && val != 1)
7556 		return -EINVAL;
7557 
7558 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7559 		mutex_lock(&trace_types_lock);
7560 		ret = __set_tracer_option(topt->tr, topt->flags,
7561 					  topt->opt, !val);
7562 		mutex_unlock(&trace_types_lock);
7563 		if (ret)
7564 			return ret;
7565 	}
7566 
7567 	*ppos += cnt;
7568 
7569 	return cnt;
7570 }
7571 
7572 
7573 static const struct file_operations trace_options_fops = {
7574 	.open = tracing_open_generic,
7575 	.read = trace_options_read,
7576 	.write = trace_options_write,
7577 	.llseek	= generic_file_llseek,
7578 };
7579 
7580 /*
7581  * In order to pass in both the trace_array descriptor as well as the index
7582  * to the flag that the trace option file represents, the trace_array
7583  * has a character array of trace_flags_index[], which holds the index
7584  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7585  * The address of this character array is passed to the flag option file
7586  * read/write callbacks.
7587  *
7588  * In order to extract both the index and the trace_array descriptor,
7589  * get_tr_index() uses the following algorithm.
7590  *
7591  *   idx = *ptr;
7592  *
7593  * As the pointer itself contains the address of the index (remember
7594  * index[1] == 1).
7595  *
7596  * Then to get the trace_array descriptor, by subtracting that index
7597  * from the ptr, we get to the start of the index itself.
7598  *
7599  *   ptr - idx == &index[0]
7600  *
7601  * Then a simple container_of() from that pointer gets us to the
7602  * trace_array descriptor.
7603  */
7604 static void get_tr_index(void *data, struct trace_array **ptr,
7605 			 unsigned int *pindex)
7606 {
7607 	*pindex = *(unsigned char *)data;
7608 
7609 	*ptr = container_of(data - *pindex, struct trace_array,
7610 			    trace_flags_index);
7611 }
7612 
7613 static ssize_t
7614 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7615 			loff_t *ppos)
7616 {
7617 	void *tr_index = filp->private_data;
7618 	struct trace_array *tr;
7619 	unsigned int index;
7620 	char *buf;
7621 
7622 	get_tr_index(tr_index, &tr, &index);
7623 
7624 	if (tr->trace_flags & (1 << index))
7625 		buf = "1\n";
7626 	else
7627 		buf = "0\n";
7628 
7629 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7630 }
7631 
7632 static ssize_t
7633 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7634 			 loff_t *ppos)
7635 {
7636 	void *tr_index = filp->private_data;
7637 	struct trace_array *tr;
7638 	unsigned int index;
7639 	unsigned long val;
7640 	int ret;
7641 
7642 	get_tr_index(tr_index, &tr, &index);
7643 
7644 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7645 	if (ret)
7646 		return ret;
7647 
7648 	if (val != 0 && val != 1)
7649 		return -EINVAL;
7650 
7651 	mutex_lock(&trace_types_lock);
7652 	ret = set_tracer_flag(tr, 1 << index, val);
7653 	mutex_unlock(&trace_types_lock);
7654 
7655 	if (ret < 0)
7656 		return ret;
7657 
7658 	*ppos += cnt;
7659 
7660 	return cnt;
7661 }
7662 
7663 static const struct file_operations trace_options_core_fops = {
7664 	.open = tracing_open_generic,
7665 	.read = trace_options_core_read,
7666 	.write = trace_options_core_write,
7667 	.llseek = generic_file_llseek,
7668 };
7669 
7670 struct dentry *trace_create_file(const char *name,
7671 				 umode_t mode,
7672 				 struct dentry *parent,
7673 				 void *data,
7674 				 const struct file_operations *fops)
7675 {
7676 	struct dentry *ret;
7677 
7678 	ret = tracefs_create_file(name, mode, parent, data, fops);
7679 	if (!ret)
7680 		pr_warn("Could not create tracefs '%s' entry\n", name);
7681 
7682 	return ret;
7683 }
7684 
7685 
7686 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7687 {
7688 	struct dentry *d_tracer;
7689 
7690 	if (tr->options)
7691 		return tr->options;
7692 
7693 	d_tracer = tracing_get_dentry(tr);
7694 	if (IS_ERR(d_tracer))
7695 		return NULL;
7696 
7697 	tr->options = tracefs_create_dir("options", d_tracer);
7698 	if (!tr->options) {
7699 		pr_warn("Could not create tracefs directory 'options'\n");
7700 		return NULL;
7701 	}
7702 
7703 	return tr->options;
7704 }
7705 
7706 static void
7707 create_trace_option_file(struct trace_array *tr,
7708 			 struct trace_option_dentry *topt,
7709 			 struct tracer_flags *flags,
7710 			 struct tracer_opt *opt)
7711 {
7712 	struct dentry *t_options;
7713 
7714 	t_options = trace_options_init_dentry(tr);
7715 	if (!t_options)
7716 		return;
7717 
7718 	topt->flags = flags;
7719 	topt->opt = opt;
7720 	topt->tr = tr;
7721 
7722 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7723 				    &trace_options_fops);
7724 
7725 }
7726 
7727 static void
7728 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7729 {
7730 	struct trace_option_dentry *topts;
7731 	struct trace_options *tr_topts;
7732 	struct tracer_flags *flags;
7733 	struct tracer_opt *opts;
7734 	int cnt;
7735 	int i;
7736 
7737 	if (!tracer)
7738 		return;
7739 
7740 	flags = tracer->flags;
7741 
7742 	if (!flags || !flags->opts)
7743 		return;
7744 
7745 	/*
7746 	 * If this is an instance, only create flags for tracers
7747 	 * the instance may have.
7748 	 */
7749 	if (!trace_ok_for_array(tracer, tr))
7750 		return;
7751 
7752 	for (i = 0; i < tr->nr_topts; i++) {
7753 		/* Make sure there's no duplicate flags. */
7754 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7755 			return;
7756 	}
7757 
7758 	opts = flags->opts;
7759 
7760 	for (cnt = 0; opts[cnt].name; cnt++)
7761 		;
7762 
7763 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7764 	if (!topts)
7765 		return;
7766 
7767 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7768 			    GFP_KERNEL);
7769 	if (!tr_topts) {
7770 		kfree(topts);
7771 		return;
7772 	}
7773 
7774 	tr->topts = tr_topts;
7775 	tr->topts[tr->nr_topts].tracer = tracer;
7776 	tr->topts[tr->nr_topts].topts = topts;
7777 	tr->nr_topts++;
7778 
7779 	for (cnt = 0; opts[cnt].name; cnt++) {
7780 		create_trace_option_file(tr, &topts[cnt], flags,
7781 					 &opts[cnt]);
7782 		WARN_ONCE(topts[cnt].entry == NULL,
7783 			  "Failed to create trace option: %s",
7784 			  opts[cnt].name);
7785 	}
7786 }
7787 
7788 static struct dentry *
7789 create_trace_option_core_file(struct trace_array *tr,
7790 			      const char *option, long index)
7791 {
7792 	struct dentry *t_options;
7793 
7794 	t_options = trace_options_init_dentry(tr);
7795 	if (!t_options)
7796 		return NULL;
7797 
7798 	return trace_create_file(option, 0644, t_options,
7799 				 (void *)&tr->trace_flags_index[index],
7800 				 &trace_options_core_fops);
7801 }
7802 
7803 static void create_trace_options_dir(struct trace_array *tr)
7804 {
7805 	struct dentry *t_options;
7806 	bool top_level = tr == &global_trace;
7807 	int i;
7808 
7809 	t_options = trace_options_init_dentry(tr);
7810 	if (!t_options)
7811 		return;
7812 
7813 	for (i = 0; trace_options[i]; i++) {
7814 		if (top_level ||
7815 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7816 			create_trace_option_core_file(tr, trace_options[i], i);
7817 	}
7818 }
7819 
7820 static ssize_t
7821 rb_simple_read(struct file *filp, char __user *ubuf,
7822 	       size_t cnt, loff_t *ppos)
7823 {
7824 	struct trace_array *tr = filp->private_data;
7825 	char buf[64];
7826 	int r;
7827 
7828 	r = tracer_tracing_is_on(tr);
7829 	r = sprintf(buf, "%d\n", r);
7830 
7831 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7832 }
7833 
7834 static ssize_t
7835 rb_simple_write(struct file *filp, const char __user *ubuf,
7836 		size_t cnt, loff_t *ppos)
7837 {
7838 	struct trace_array *tr = filp->private_data;
7839 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7840 	unsigned long val;
7841 	int ret;
7842 
7843 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7844 	if (ret)
7845 		return ret;
7846 
7847 	if (buffer) {
7848 		mutex_lock(&trace_types_lock);
7849 		if (!!val == tracer_tracing_is_on(tr)) {
7850 			val = 0; /* do nothing */
7851 		} else if (val) {
7852 			tracer_tracing_on(tr);
7853 			if (tr->current_trace->start)
7854 				tr->current_trace->start(tr);
7855 		} else {
7856 			tracer_tracing_off(tr);
7857 			if (tr->current_trace->stop)
7858 				tr->current_trace->stop(tr);
7859 		}
7860 		mutex_unlock(&trace_types_lock);
7861 	}
7862 
7863 	(*ppos)++;
7864 
7865 	return cnt;
7866 }
7867 
7868 static const struct file_operations rb_simple_fops = {
7869 	.open		= tracing_open_generic_tr,
7870 	.read		= rb_simple_read,
7871 	.write		= rb_simple_write,
7872 	.release	= tracing_release_generic_tr,
7873 	.llseek		= default_llseek,
7874 };
7875 
7876 static ssize_t
7877 buffer_percent_read(struct file *filp, char __user *ubuf,
7878 		    size_t cnt, loff_t *ppos)
7879 {
7880 	struct trace_array *tr = filp->private_data;
7881 	char buf[64];
7882 	int r;
7883 
7884 	r = tr->buffer_percent;
7885 	r = sprintf(buf, "%d\n", r);
7886 
7887 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7888 }
7889 
7890 static ssize_t
7891 buffer_percent_write(struct file *filp, const char __user *ubuf,
7892 		     size_t cnt, loff_t *ppos)
7893 {
7894 	struct trace_array *tr = filp->private_data;
7895 	unsigned long val;
7896 	int ret;
7897 
7898 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7899 	if (ret)
7900 		return ret;
7901 
7902 	if (val > 100)
7903 		return -EINVAL;
7904 
7905 	if (!val)
7906 		val = 1;
7907 
7908 	tr->buffer_percent = val;
7909 
7910 	(*ppos)++;
7911 
7912 	return cnt;
7913 }
7914 
7915 static const struct file_operations buffer_percent_fops = {
7916 	.open		= tracing_open_generic_tr,
7917 	.read		= buffer_percent_read,
7918 	.write		= buffer_percent_write,
7919 	.release	= tracing_release_generic_tr,
7920 	.llseek		= default_llseek,
7921 };
7922 
7923 struct dentry *trace_instance_dir;
7924 
7925 static void
7926 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7927 
7928 static int
7929 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7930 {
7931 	enum ring_buffer_flags rb_flags;
7932 
7933 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7934 
7935 	buf->tr = tr;
7936 
7937 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7938 	if (!buf->buffer)
7939 		return -ENOMEM;
7940 
7941 	buf->data = alloc_percpu(struct trace_array_cpu);
7942 	if (!buf->data) {
7943 		ring_buffer_free(buf->buffer);
7944 		buf->buffer = NULL;
7945 		return -ENOMEM;
7946 	}
7947 
7948 	/* Allocate the first page for all buffers */
7949 	set_buffer_entries(&tr->trace_buffer,
7950 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7951 
7952 	return 0;
7953 }
7954 
7955 static int allocate_trace_buffers(struct trace_array *tr, int size)
7956 {
7957 	int ret;
7958 
7959 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7960 	if (ret)
7961 		return ret;
7962 
7963 #ifdef CONFIG_TRACER_MAX_TRACE
7964 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7965 				    allocate_snapshot ? size : 1);
7966 	if (WARN_ON(ret)) {
7967 		ring_buffer_free(tr->trace_buffer.buffer);
7968 		tr->trace_buffer.buffer = NULL;
7969 		free_percpu(tr->trace_buffer.data);
7970 		tr->trace_buffer.data = NULL;
7971 		return -ENOMEM;
7972 	}
7973 	tr->allocated_snapshot = allocate_snapshot;
7974 
7975 	/*
7976 	 * Only the top level trace array gets its snapshot allocated
7977 	 * from the kernel command line.
7978 	 */
7979 	allocate_snapshot = false;
7980 #endif
7981 	return 0;
7982 }
7983 
7984 static void free_trace_buffer(struct trace_buffer *buf)
7985 {
7986 	if (buf->buffer) {
7987 		ring_buffer_free(buf->buffer);
7988 		buf->buffer = NULL;
7989 		free_percpu(buf->data);
7990 		buf->data = NULL;
7991 	}
7992 }
7993 
7994 static void free_trace_buffers(struct trace_array *tr)
7995 {
7996 	if (!tr)
7997 		return;
7998 
7999 	free_trace_buffer(&tr->trace_buffer);
8000 
8001 #ifdef CONFIG_TRACER_MAX_TRACE
8002 	free_trace_buffer(&tr->max_buffer);
8003 #endif
8004 }
8005 
8006 static void init_trace_flags_index(struct trace_array *tr)
8007 {
8008 	int i;
8009 
8010 	/* Used by the trace options files */
8011 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8012 		tr->trace_flags_index[i] = i;
8013 }
8014 
8015 static void __update_tracer_options(struct trace_array *tr)
8016 {
8017 	struct tracer *t;
8018 
8019 	for (t = trace_types; t; t = t->next)
8020 		add_tracer_options(tr, t);
8021 }
8022 
8023 static void update_tracer_options(struct trace_array *tr)
8024 {
8025 	mutex_lock(&trace_types_lock);
8026 	__update_tracer_options(tr);
8027 	mutex_unlock(&trace_types_lock);
8028 }
8029 
8030 static int instance_mkdir(const char *name)
8031 {
8032 	struct trace_array *tr;
8033 	int ret;
8034 
8035 	mutex_lock(&event_mutex);
8036 	mutex_lock(&trace_types_lock);
8037 
8038 	ret = -EEXIST;
8039 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8040 		if (tr->name && strcmp(tr->name, name) == 0)
8041 			goto out_unlock;
8042 	}
8043 
8044 	ret = -ENOMEM;
8045 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8046 	if (!tr)
8047 		goto out_unlock;
8048 
8049 	tr->name = kstrdup(name, GFP_KERNEL);
8050 	if (!tr->name)
8051 		goto out_free_tr;
8052 
8053 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8054 		goto out_free_tr;
8055 
8056 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8057 
8058 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8059 
8060 	raw_spin_lock_init(&tr->start_lock);
8061 
8062 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8063 
8064 	tr->current_trace = &nop_trace;
8065 
8066 	INIT_LIST_HEAD(&tr->systems);
8067 	INIT_LIST_HEAD(&tr->events);
8068 	INIT_LIST_HEAD(&tr->hist_vars);
8069 
8070 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8071 		goto out_free_tr;
8072 
8073 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8074 	if (!tr->dir)
8075 		goto out_free_tr;
8076 
8077 	ret = event_trace_add_tracer(tr->dir, tr);
8078 	if (ret) {
8079 		tracefs_remove_recursive(tr->dir);
8080 		goto out_free_tr;
8081 	}
8082 
8083 	ftrace_init_trace_array(tr);
8084 
8085 	init_tracer_tracefs(tr, tr->dir);
8086 	init_trace_flags_index(tr);
8087 	__update_tracer_options(tr);
8088 
8089 	list_add(&tr->list, &ftrace_trace_arrays);
8090 
8091 	mutex_unlock(&trace_types_lock);
8092 	mutex_unlock(&event_mutex);
8093 
8094 	return 0;
8095 
8096  out_free_tr:
8097 	free_trace_buffers(tr);
8098 	free_cpumask_var(tr->tracing_cpumask);
8099 	kfree(tr->name);
8100 	kfree(tr);
8101 
8102  out_unlock:
8103 	mutex_unlock(&trace_types_lock);
8104 	mutex_unlock(&event_mutex);
8105 
8106 	return ret;
8107 
8108 }
8109 
8110 static int instance_rmdir(const char *name)
8111 {
8112 	struct trace_array *tr;
8113 	int found = 0;
8114 	int ret;
8115 	int i;
8116 
8117 	mutex_lock(&event_mutex);
8118 	mutex_lock(&trace_types_lock);
8119 
8120 	ret = -ENODEV;
8121 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8122 		if (tr->name && strcmp(tr->name, name) == 0) {
8123 			found = 1;
8124 			break;
8125 		}
8126 	}
8127 	if (!found)
8128 		goto out_unlock;
8129 
8130 	ret = -EBUSY;
8131 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8132 		goto out_unlock;
8133 
8134 	list_del(&tr->list);
8135 
8136 	/* Disable all the flags that were enabled coming in */
8137 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8138 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8139 			set_tracer_flag(tr, 1 << i, 0);
8140 	}
8141 
8142 	tracing_set_nop(tr);
8143 	clear_ftrace_function_probes(tr);
8144 	event_trace_del_tracer(tr);
8145 	ftrace_clear_pids(tr);
8146 	ftrace_destroy_function_files(tr);
8147 	tracefs_remove_recursive(tr->dir);
8148 	free_trace_buffers(tr);
8149 
8150 	for (i = 0; i < tr->nr_topts; i++) {
8151 		kfree(tr->topts[i].topts);
8152 	}
8153 	kfree(tr->topts);
8154 
8155 	free_cpumask_var(tr->tracing_cpumask);
8156 	kfree(tr->name);
8157 	kfree(tr);
8158 
8159 	ret = 0;
8160 
8161  out_unlock:
8162 	mutex_unlock(&trace_types_lock);
8163 	mutex_unlock(&event_mutex);
8164 
8165 	return ret;
8166 }
8167 
8168 static __init void create_trace_instances(struct dentry *d_tracer)
8169 {
8170 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8171 							 instance_mkdir,
8172 							 instance_rmdir);
8173 	if (WARN_ON(!trace_instance_dir))
8174 		return;
8175 }
8176 
8177 static void
8178 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8179 {
8180 	struct trace_event_file *file;
8181 	int cpu;
8182 
8183 	trace_create_file("available_tracers", 0444, d_tracer,
8184 			tr, &show_traces_fops);
8185 
8186 	trace_create_file("current_tracer", 0644, d_tracer,
8187 			tr, &set_tracer_fops);
8188 
8189 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8190 			  tr, &tracing_cpumask_fops);
8191 
8192 	trace_create_file("trace_options", 0644, d_tracer,
8193 			  tr, &tracing_iter_fops);
8194 
8195 	trace_create_file("trace", 0644, d_tracer,
8196 			  tr, &tracing_fops);
8197 
8198 	trace_create_file("trace_pipe", 0444, d_tracer,
8199 			  tr, &tracing_pipe_fops);
8200 
8201 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8202 			  tr, &tracing_entries_fops);
8203 
8204 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8205 			  tr, &tracing_total_entries_fops);
8206 
8207 	trace_create_file("free_buffer", 0200, d_tracer,
8208 			  tr, &tracing_free_buffer_fops);
8209 
8210 	trace_create_file("trace_marker", 0220, d_tracer,
8211 			  tr, &tracing_mark_fops);
8212 
8213 	file = __find_event_file(tr, "ftrace", "print");
8214 	if (file && file->dir)
8215 		trace_create_file("trigger", 0644, file->dir, file,
8216 				  &event_trigger_fops);
8217 	tr->trace_marker_file = file;
8218 
8219 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8220 			  tr, &tracing_mark_raw_fops);
8221 
8222 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8223 			  &trace_clock_fops);
8224 
8225 	trace_create_file("tracing_on", 0644, d_tracer,
8226 			  tr, &rb_simple_fops);
8227 
8228 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8229 			  &trace_time_stamp_mode_fops);
8230 
8231 	tr->buffer_percent = 50;
8232 
8233 	trace_create_file("buffer_percent", 0444, d_tracer,
8234 			tr, &buffer_percent_fops);
8235 
8236 	create_trace_options_dir(tr);
8237 
8238 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8239 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8240 			&tr->max_latency, &tracing_max_lat_fops);
8241 #endif
8242 
8243 	if (ftrace_create_function_files(tr, d_tracer))
8244 		WARN(1, "Could not allocate function filter files");
8245 
8246 #ifdef CONFIG_TRACER_SNAPSHOT
8247 	trace_create_file("snapshot", 0644, d_tracer,
8248 			  tr, &snapshot_fops);
8249 #endif
8250 
8251 	for_each_tracing_cpu(cpu)
8252 		tracing_init_tracefs_percpu(tr, cpu);
8253 
8254 	ftrace_init_tracefs(tr, d_tracer);
8255 }
8256 
8257 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8258 {
8259 	struct vfsmount *mnt;
8260 	struct file_system_type *type;
8261 
8262 	/*
8263 	 * To maintain backward compatibility for tools that mount
8264 	 * debugfs to get to the tracing facility, tracefs is automatically
8265 	 * mounted to the debugfs/tracing directory.
8266 	 */
8267 	type = get_fs_type("tracefs");
8268 	if (!type)
8269 		return NULL;
8270 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8271 	put_filesystem(type);
8272 	if (IS_ERR(mnt))
8273 		return NULL;
8274 	mntget(mnt);
8275 
8276 	return mnt;
8277 }
8278 
8279 /**
8280  * tracing_init_dentry - initialize top level trace array
8281  *
8282  * This is called when creating files or directories in the tracing
8283  * directory. It is called via fs_initcall() by any of the boot up code
8284  * and expects to return the dentry of the top level tracing directory.
8285  */
8286 struct dentry *tracing_init_dentry(void)
8287 {
8288 	struct trace_array *tr = &global_trace;
8289 
8290 	/* The top level trace array uses  NULL as parent */
8291 	if (tr->dir)
8292 		return NULL;
8293 
8294 	if (WARN_ON(!tracefs_initialized()) ||
8295 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8296 		 WARN_ON(!debugfs_initialized())))
8297 		return ERR_PTR(-ENODEV);
8298 
8299 	/*
8300 	 * As there may still be users that expect the tracing
8301 	 * files to exist in debugfs/tracing, we must automount
8302 	 * the tracefs file system there, so older tools still
8303 	 * work with the newer kerenl.
8304 	 */
8305 	tr->dir = debugfs_create_automount("tracing", NULL,
8306 					   trace_automount, NULL);
8307 	if (!tr->dir) {
8308 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8309 		return ERR_PTR(-ENOMEM);
8310 	}
8311 
8312 	return NULL;
8313 }
8314 
8315 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8316 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8317 
8318 static void __init trace_eval_init(void)
8319 {
8320 	int len;
8321 
8322 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8323 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8324 }
8325 
8326 #ifdef CONFIG_MODULES
8327 static void trace_module_add_evals(struct module *mod)
8328 {
8329 	if (!mod->num_trace_evals)
8330 		return;
8331 
8332 	/*
8333 	 * Modules with bad taint do not have events created, do
8334 	 * not bother with enums either.
8335 	 */
8336 	if (trace_module_has_bad_taint(mod))
8337 		return;
8338 
8339 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8340 }
8341 
8342 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8343 static void trace_module_remove_evals(struct module *mod)
8344 {
8345 	union trace_eval_map_item *map;
8346 	union trace_eval_map_item **last = &trace_eval_maps;
8347 
8348 	if (!mod->num_trace_evals)
8349 		return;
8350 
8351 	mutex_lock(&trace_eval_mutex);
8352 
8353 	map = trace_eval_maps;
8354 
8355 	while (map) {
8356 		if (map->head.mod == mod)
8357 			break;
8358 		map = trace_eval_jmp_to_tail(map);
8359 		last = &map->tail.next;
8360 		map = map->tail.next;
8361 	}
8362 	if (!map)
8363 		goto out;
8364 
8365 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8366 	kfree(map);
8367  out:
8368 	mutex_unlock(&trace_eval_mutex);
8369 }
8370 #else
8371 static inline void trace_module_remove_evals(struct module *mod) { }
8372 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8373 
8374 static int trace_module_notify(struct notifier_block *self,
8375 			       unsigned long val, void *data)
8376 {
8377 	struct module *mod = data;
8378 
8379 	switch (val) {
8380 	case MODULE_STATE_COMING:
8381 		trace_module_add_evals(mod);
8382 		break;
8383 	case MODULE_STATE_GOING:
8384 		trace_module_remove_evals(mod);
8385 		break;
8386 	}
8387 
8388 	return 0;
8389 }
8390 
8391 static struct notifier_block trace_module_nb = {
8392 	.notifier_call = trace_module_notify,
8393 	.priority = 0,
8394 };
8395 #endif /* CONFIG_MODULES */
8396 
8397 static __init int tracer_init_tracefs(void)
8398 {
8399 	struct dentry *d_tracer;
8400 
8401 	trace_access_lock_init();
8402 
8403 	d_tracer = tracing_init_dentry();
8404 	if (IS_ERR(d_tracer))
8405 		return 0;
8406 
8407 	event_trace_init();
8408 
8409 	init_tracer_tracefs(&global_trace, d_tracer);
8410 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8411 
8412 	trace_create_file("tracing_thresh", 0644, d_tracer,
8413 			&global_trace, &tracing_thresh_fops);
8414 
8415 	trace_create_file("README", 0444, d_tracer,
8416 			NULL, &tracing_readme_fops);
8417 
8418 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8419 			NULL, &tracing_saved_cmdlines_fops);
8420 
8421 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8422 			  NULL, &tracing_saved_cmdlines_size_fops);
8423 
8424 	trace_create_file("saved_tgids", 0444, d_tracer,
8425 			NULL, &tracing_saved_tgids_fops);
8426 
8427 	trace_eval_init();
8428 
8429 	trace_create_eval_file(d_tracer);
8430 
8431 #ifdef CONFIG_MODULES
8432 	register_module_notifier(&trace_module_nb);
8433 #endif
8434 
8435 #ifdef CONFIG_DYNAMIC_FTRACE
8436 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8437 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8438 #endif
8439 
8440 	create_trace_instances(d_tracer);
8441 
8442 	update_tracer_options(&global_trace);
8443 
8444 	return 0;
8445 }
8446 
8447 static int trace_panic_handler(struct notifier_block *this,
8448 			       unsigned long event, void *unused)
8449 {
8450 	if (ftrace_dump_on_oops)
8451 		ftrace_dump(ftrace_dump_on_oops);
8452 	return NOTIFY_OK;
8453 }
8454 
8455 static struct notifier_block trace_panic_notifier = {
8456 	.notifier_call  = trace_panic_handler,
8457 	.next           = NULL,
8458 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8459 };
8460 
8461 static int trace_die_handler(struct notifier_block *self,
8462 			     unsigned long val,
8463 			     void *data)
8464 {
8465 	switch (val) {
8466 	case DIE_OOPS:
8467 		if (ftrace_dump_on_oops)
8468 			ftrace_dump(ftrace_dump_on_oops);
8469 		break;
8470 	default:
8471 		break;
8472 	}
8473 	return NOTIFY_OK;
8474 }
8475 
8476 static struct notifier_block trace_die_notifier = {
8477 	.notifier_call = trace_die_handler,
8478 	.priority = 200
8479 };
8480 
8481 /*
8482  * printk is set to max of 1024, we really don't need it that big.
8483  * Nothing should be printing 1000 characters anyway.
8484  */
8485 #define TRACE_MAX_PRINT		1000
8486 
8487 /*
8488  * Define here KERN_TRACE so that we have one place to modify
8489  * it if we decide to change what log level the ftrace dump
8490  * should be at.
8491  */
8492 #define KERN_TRACE		KERN_EMERG
8493 
8494 void
8495 trace_printk_seq(struct trace_seq *s)
8496 {
8497 	/* Probably should print a warning here. */
8498 	if (s->seq.len >= TRACE_MAX_PRINT)
8499 		s->seq.len = TRACE_MAX_PRINT;
8500 
8501 	/*
8502 	 * More paranoid code. Although the buffer size is set to
8503 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8504 	 * an extra layer of protection.
8505 	 */
8506 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8507 		s->seq.len = s->seq.size - 1;
8508 
8509 	/* should be zero ended, but we are paranoid. */
8510 	s->buffer[s->seq.len] = 0;
8511 
8512 	printk(KERN_TRACE "%s", s->buffer);
8513 
8514 	trace_seq_init(s);
8515 }
8516 
8517 void trace_init_global_iter(struct trace_iterator *iter)
8518 {
8519 	iter->tr = &global_trace;
8520 	iter->trace = iter->tr->current_trace;
8521 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8522 	iter->trace_buffer = &global_trace.trace_buffer;
8523 
8524 	if (iter->trace && iter->trace->open)
8525 		iter->trace->open(iter);
8526 
8527 	/* Annotate start of buffers if we had overruns */
8528 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8529 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8530 
8531 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8532 	if (trace_clocks[iter->tr->clock_id].in_ns)
8533 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8534 }
8535 
8536 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8537 {
8538 	/* use static because iter can be a bit big for the stack */
8539 	static struct trace_iterator iter;
8540 	static atomic_t dump_running;
8541 	struct trace_array *tr = &global_trace;
8542 	unsigned int old_userobj;
8543 	unsigned long flags;
8544 	int cnt = 0, cpu;
8545 
8546 	/* Only allow one dump user at a time. */
8547 	if (atomic_inc_return(&dump_running) != 1) {
8548 		atomic_dec(&dump_running);
8549 		return;
8550 	}
8551 
8552 	/*
8553 	 * Always turn off tracing when we dump.
8554 	 * We don't need to show trace output of what happens
8555 	 * between multiple crashes.
8556 	 *
8557 	 * If the user does a sysrq-z, then they can re-enable
8558 	 * tracing with echo 1 > tracing_on.
8559 	 */
8560 	tracing_off();
8561 
8562 	local_irq_save(flags);
8563 	printk_nmi_direct_enter();
8564 
8565 	/* Simulate the iterator */
8566 	trace_init_global_iter(&iter);
8567 
8568 	for_each_tracing_cpu(cpu) {
8569 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8570 	}
8571 
8572 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8573 
8574 	/* don't look at user memory in panic mode */
8575 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8576 
8577 	switch (oops_dump_mode) {
8578 	case DUMP_ALL:
8579 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8580 		break;
8581 	case DUMP_ORIG:
8582 		iter.cpu_file = raw_smp_processor_id();
8583 		break;
8584 	case DUMP_NONE:
8585 		goto out_enable;
8586 	default:
8587 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8588 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8589 	}
8590 
8591 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8592 
8593 	/* Did function tracer already get disabled? */
8594 	if (ftrace_is_dead()) {
8595 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8596 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8597 	}
8598 
8599 	/*
8600 	 * We need to stop all tracing on all CPUS to read the
8601 	 * the next buffer. This is a bit expensive, but is
8602 	 * not done often. We fill all what we can read,
8603 	 * and then release the locks again.
8604 	 */
8605 
8606 	while (!trace_empty(&iter)) {
8607 
8608 		if (!cnt)
8609 			printk(KERN_TRACE "---------------------------------\n");
8610 
8611 		cnt++;
8612 
8613 		/* reset all but tr, trace, and overruns */
8614 		memset(&iter.seq, 0,
8615 		       sizeof(struct trace_iterator) -
8616 		       offsetof(struct trace_iterator, seq));
8617 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8618 		iter.pos = -1;
8619 
8620 		if (trace_find_next_entry_inc(&iter) != NULL) {
8621 			int ret;
8622 
8623 			ret = print_trace_line(&iter);
8624 			if (ret != TRACE_TYPE_NO_CONSUME)
8625 				trace_consume(&iter);
8626 		}
8627 		touch_nmi_watchdog();
8628 
8629 		trace_printk_seq(&iter.seq);
8630 	}
8631 
8632 	if (!cnt)
8633 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8634 	else
8635 		printk(KERN_TRACE "---------------------------------\n");
8636 
8637  out_enable:
8638 	tr->trace_flags |= old_userobj;
8639 
8640 	for_each_tracing_cpu(cpu) {
8641 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8642 	}
8643 	atomic_dec(&dump_running);
8644 	printk_nmi_direct_exit();
8645 	local_irq_restore(flags);
8646 }
8647 EXPORT_SYMBOL_GPL(ftrace_dump);
8648 
8649 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8650 {
8651 	char **argv;
8652 	int argc, ret;
8653 
8654 	argc = 0;
8655 	ret = 0;
8656 	argv = argv_split(GFP_KERNEL, buf, &argc);
8657 	if (!argv)
8658 		return -ENOMEM;
8659 
8660 	if (argc)
8661 		ret = createfn(argc, argv);
8662 
8663 	argv_free(argv);
8664 
8665 	return ret;
8666 }
8667 
8668 #define WRITE_BUFSIZE  4096
8669 
8670 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8671 				size_t count, loff_t *ppos,
8672 				int (*createfn)(int, char **))
8673 {
8674 	char *kbuf, *buf, *tmp;
8675 	int ret = 0;
8676 	size_t done = 0;
8677 	size_t size;
8678 
8679 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8680 	if (!kbuf)
8681 		return -ENOMEM;
8682 
8683 	while (done < count) {
8684 		size = count - done;
8685 
8686 		if (size >= WRITE_BUFSIZE)
8687 			size = WRITE_BUFSIZE - 1;
8688 
8689 		if (copy_from_user(kbuf, buffer + done, size)) {
8690 			ret = -EFAULT;
8691 			goto out;
8692 		}
8693 		kbuf[size] = '\0';
8694 		buf = kbuf;
8695 		do {
8696 			tmp = strchr(buf, '\n');
8697 			if (tmp) {
8698 				*tmp = '\0';
8699 				size = tmp - buf + 1;
8700 			} else {
8701 				size = strlen(buf);
8702 				if (done + size < count) {
8703 					if (buf != kbuf)
8704 						break;
8705 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8706 					pr_warn("Line length is too long: Should be less than %d\n",
8707 						WRITE_BUFSIZE - 2);
8708 					ret = -EINVAL;
8709 					goto out;
8710 				}
8711 			}
8712 			done += size;
8713 
8714 			/* Remove comments */
8715 			tmp = strchr(buf, '#');
8716 
8717 			if (tmp)
8718 				*tmp = '\0';
8719 
8720 			ret = trace_run_command(buf, createfn);
8721 			if (ret)
8722 				goto out;
8723 			buf += size;
8724 
8725 		} while (done < count);
8726 	}
8727 	ret = done;
8728 
8729 out:
8730 	kfree(kbuf);
8731 
8732 	return ret;
8733 }
8734 
8735 __init static int tracer_alloc_buffers(void)
8736 {
8737 	int ring_buf_size;
8738 	int ret = -ENOMEM;
8739 
8740 	/*
8741 	 * Make sure we don't accidently add more trace options
8742 	 * than we have bits for.
8743 	 */
8744 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8745 
8746 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8747 		goto out;
8748 
8749 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8750 		goto out_free_buffer_mask;
8751 
8752 	/* Only allocate trace_printk buffers if a trace_printk exists */
8753 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8754 		/* Must be called before global_trace.buffer is allocated */
8755 		trace_printk_init_buffers();
8756 
8757 	/* To save memory, keep the ring buffer size to its minimum */
8758 	if (ring_buffer_expanded)
8759 		ring_buf_size = trace_buf_size;
8760 	else
8761 		ring_buf_size = 1;
8762 
8763 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8764 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8765 
8766 	raw_spin_lock_init(&global_trace.start_lock);
8767 
8768 	/*
8769 	 * The prepare callbacks allocates some memory for the ring buffer. We
8770 	 * don't free the buffer if the if the CPU goes down. If we were to free
8771 	 * the buffer, then the user would lose any trace that was in the
8772 	 * buffer. The memory will be removed once the "instance" is removed.
8773 	 */
8774 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8775 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8776 				      NULL);
8777 	if (ret < 0)
8778 		goto out_free_cpumask;
8779 	/* Used for event triggers */
8780 	ret = -ENOMEM;
8781 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8782 	if (!temp_buffer)
8783 		goto out_rm_hp_state;
8784 
8785 	if (trace_create_savedcmd() < 0)
8786 		goto out_free_temp_buffer;
8787 
8788 	/* TODO: make the number of buffers hot pluggable with CPUS */
8789 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8790 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8791 		WARN_ON(1);
8792 		goto out_free_savedcmd;
8793 	}
8794 
8795 	if (global_trace.buffer_disabled)
8796 		tracing_off();
8797 
8798 	if (trace_boot_clock) {
8799 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8800 		if (ret < 0)
8801 			pr_warn("Trace clock %s not defined, going back to default\n",
8802 				trace_boot_clock);
8803 	}
8804 
8805 	/*
8806 	 * register_tracer() might reference current_trace, so it
8807 	 * needs to be set before we register anything. This is
8808 	 * just a bootstrap of current_trace anyway.
8809 	 */
8810 	global_trace.current_trace = &nop_trace;
8811 
8812 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8813 
8814 	ftrace_init_global_array_ops(&global_trace);
8815 
8816 	init_trace_flags_index(&global_trace);
8817 
8818 	register_tracer(&nop_trace);
8819 
8820 	/* Function tracing may start here (via kernel command line) */
8821 	init_function_trace();
8822 
8823 	/* All seems OK, enable tracing */
8824 	tracing_disabled = 0;
8825 
8826 	atomic_notifier_chain_register(&panic_notifier_list,
8827 				       &trace_panic_notifier);
8828 
8829 	register_die_notifier(&trace_die_notifier);
8830 
8831 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8832 
8833 	INIT_LIST_HEAD(&global_trace.systems);
8834 	INIT_LIST_HEAD(&global_trace.events);
8835 	INIT_LIST_HEAD(&global_trace.hist_vars);
8836 	list_add(&global_trace.list, &ftrace_trace_arrays);
8837 
8838 	apply_trace_boot_options();
8839 
8840 	register_snapshot_cmd();
8841 
8842 	return 0;
8843 
8844 out_free_savedcmd:
8845 	free_saved_cmdlines_buffer(savedcmd);
8846 out_free_temp_buffer:
8847 	ring_buffer_free(temp_buffer);
8848 out_rm_hp_state:
8849 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8850 out_free_cpumask:
8851 	free_cpumask_var(global_trace.tracing_cpumask);
8852 out_free_buffer_mask:
8853 	free_cpumask_var(tracing_buffer_mask);
8854 out:
8855 	return ret;
8856 }
8857 
8858 void __init early_trace_init(void)
8859 {
8860 	if (tracepoint_printk) {
8861 		tracepoint_print_iter =
8862 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8863 		if (WARN_ON(!tracepoint_print_iter))
8864 			tracepoint_printk = 0;
8865 		else
8866 			static_key_enable(&tracepoint_printk_key.key);
8867 	}
8868 	tracer_alloc_buffers();
8869 }
8870 
8871 void __init trace_init(void)
8872 {
8873 	trace_event_init();
8874 }
8875 
8876 __init static int clear_boot_tracer(void)
8877 {
8878 	/*
8879 	 * The default tracer at boot buffer is an init section.
8880 	 * This function is called in lateinit. If we did not
8881 	 * find the boot tracer, then clear it out, to prevent
8882 	 * later registration from accessing the buffer that is
8883 	 * about to be freed.
8884 	 */
8885 	if (!default_bootup_tracer)
8886 		return 0;
8887 
8888 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8889 	       default_bootup_tracer);
8890 	default_bootup_tracer = NULL;
8891 
8892 	return 0;
8893 }
8894 
8895 fs_initcall(tracer_init_tracefs);
8896 late_initcall_sync(clear_boot_tracer);
8897 
8898 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8899 __init static int tracing_set_default_clock(void)
8900 {
8901 	/* sched_clock_stable() is determined in late_initcall */
8902 	if (!trace_boot_clock && !sched_clock_stable()) {
8903 		printk(KERN_WARNING
8904 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8905 		       "If you want to keep using the local clock, then add:\n"
8906 		       "  \"trace_clock=local\"\n"
8907 		       "on the kernel command line\n");
8908 		tracing_set_clock(&global_trace, "global");
8909 	}
8910 
8911 	return 0;
8912 }
8913 late_initcall_sync(tracing_set_default_clock);
8914 #endif
8915