xref: /linux-6.15/kernel/trace/trace.c (revision 293d5b43)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
5  * Copyright (C) 2008 Ingo Molnar <[email protected]>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <[email protected]>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44 
45 #include "trace.h"
46 #include "trace_output.h"
47 
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53 
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62 
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67 
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71 
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 	{ }
75 };
76 
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 	return 0;
81 }
82 
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89 
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97 
98 cpumask_var_t __read_mostly	tracing_buffer_mask;
99 
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115 
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117 
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120 
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 	struct module			*mod;
125 	unsigned long			length;
126 };
127 
128 union trace_enum_map_item;
129 
130 struct trace_enum_map_tail {
131 	/*
132 	 * "end" is first and points to NULL as it must be different
133 	 * than "mod" or "enum_string"
134 	 */
135 	union trace_enum_map_item	*next;
136 	const char			*end;	/* points to NULL */
137 };
138 
139 static DEFINE_MUTEX(trace_enum_mutex);
140 
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149 	struct trace_enum_map		map;
150 	struct trace_enum_map_head	head;
151 	struct trace_enum_map_tail	tail;
152 };
153 
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156 
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158 
159 #define MAX_TRACER_SIZE		100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162 
163 static bool allocate_snapshot;
164 
165 static int __init set_cmdline_ftrace(char *str)
166 {
167 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 	default_bootup_tracer = bootup_tracer_buf;
169 	/* We are using ftrace early, expand it */
170 	ring_buffer_expanded = true;
171 	return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174 
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 	if (*str++ != '=' || !*str) {
178 		ftrace_dump_on_oops = DUMP_ALL;
179 		return 1;
180 	}
181 
182 	if (!strcmp("orig_cpu", str)) {
183 		ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186 
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190 
191 static int __init stop_trace_on_warning(char *str)
192 {
193 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 		__disable_trace_on_warning = 1;
195 	return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198 
199 static int __init boot_alloc_snapshot(char *str)
200 {
201 	allocate_snapshot = true;
202 	/* We also need the main ring buffer expanded */
203 	ring_buffer_expanded = true;
204 	return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207 
208 
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210 
211 static int __init set_trace_boot_options(char *str)
212 {
213 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 	return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217 
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220 
221 static int __init set_trace_boot_clock(char *str)
222 {
223 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 	trace_boot_clock = trace_boot_clock_buf;
225 	return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228 
229 static int __init set_tracepoint_printk(char *str)
230 {
231 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 		tracepoint_printk = 1;
233 	return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236 
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 	nsec += 500;
240 	do_div(nsec, 1000);
241 	return nsec;
242 }
243 
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS						\
246 	(FUNCTION_DEFAULT_FLAGS |					\
247 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
248 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
249 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
250 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251 
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
254 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255 
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 	TRACE_ITER_EVENT_FORK
259 
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273 	.trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275 
276 LIST_HEAD(ftrace_trace_arrays);
277 
278 int trace_array_get(struct trace_array *this_tr)
279 {
280 	struct trace_array *tr;
281 	int ret = -ENODEV;
282 
283 	mutex_lock(&trace_types_lock);
284 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 		if (tr == this_tr) {
286 			tr->ref++;
287 			ret = 0;
288 			break;
289 		}
290 	}
291 	mutex_unlock(&trace_types_lock);
292 
293 	return ret;
294 }
295 
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 	WARN_ON(!this_tr->ref);
299 	this_tr->ref--;
300 }
301 
302 void trace_array_put(struct trace_array *this_tr)
303 {
304 	mutex_lock(&trace_types_lock);
305 	__trace_array_put(this_tr);
306 	mutex_unlock(&trace_types_lock);
307 }
308 
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 			      struct ring_buffer *buffer,
311 			      struct ring_buffer_event *event)
312 {
313 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 	    !filter_match_preds(call->filter, rec)) {
315 		__trace_event_discard_commit(buffer, event);
316 		return 1;
317 	}
318 
319 	return 0;
320 }
321 
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 	vfree(pid_list->pids);
325 	kfree(pid_list);
326 }
327 
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 	/*
339 	 * If pid_max changed after filtered_pids was created, we
340 	 * by default ignore all pids greater than the previous pid_max.
341 	 */
342 	if (search_pid >= filtered_pids->pid_max)
343 		return false;
344 
345 	return test_bit(search_pid, filtered_pids->pids);
346 }
347 
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 	/*
361 	 * Return false, because if filtered_pids does not exist,
362 	 * all pids are good to trace.
363 	 */
364 	if (!filtered_pids)
365 		return false;
366 
367 	return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369 
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 				  struct task_struct *self,
384 				  struct task_struct *task)
385 {
386 	if (!pid_list)
387 		return;
388 
389 	/* For forks, we only add if the forking task is listed */
390 	if (self) {
391 		if (!trace_find_filtered_pid(pid_list, self->pid))
392 			return;
393 	}
394 
395 	/* Sorry, but we don't support pid_max changing after setting */
396 	if (task->pid >= pid_list->pid_max)
397 		return;
398 
399 	/* "self" is set for forks, and NULL for exits */
400 	if (self)
401 		set_bit(task->pid, pid_list->pids);
402 	else
403 		clear_bit(task->pid, pid_list->pids);
404 }
405 
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 	unsigned long pid = (unsigned long)v;
421 
422 	(*pos)++;
423 
424 	/* pid already is +1 of the actual prevous bit */
425 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426 
427 	/* Return pid + 1 to allow zero to be represented */
428 	if (pid < pid_list->pid_max)
429 		return (void *)(pid + 1);
430 
431 	return NULL;
432 }
433 
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 	unsigned long pid;
448 	loff_t l = 0;
449 
450 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 	if (pid >= pid_list->pid_max)
452 		return NULL;
453 
454 	/* Return pid + 1 so that zero can be the exit value */
455 	for (pid++; pid && l < *pos;
456 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 		;
458 	return (void *)pid;
459 }
460 
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 	unsigned long pid = (unsigned long)v - 1;
472 
473 	seq_printf(m, "%lu\n", pid);
474 	return 0;
475 }
476 
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE		127
479 
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 		    struct trace_pid_list **new_pid_list,
482 		    const char __user *ubuf, size_t cnt)
483 {
484 	struct trace_pid_list *pid_list;
485 	struct trace_parser parser;
486 	unsigned long val;
487 	int nr_pids = 0;
488 	ssize_t read = 0;
489 	ssize_t ret = 0;
490 	loff_t pos;
491 	pid_t pid;
492 
493 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 		return -ENOMEM;
495 
496 	/*
497 	 * Always recreate a new array. The write is an all or nothing
498 	 * operation. Always create a new array when adding new pids by
499 	 * the user. If the operation fails, then the current list is
500 	 * not modified.
501 	 */
502 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 	if (!pid_list)
504 		return -ENOMEM;
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		kfree(pid_list);
515 		return -ENOMEM;
516 	}
517 
518 	if (filtered_pids) {
519 		/* copy the current bits to the new max */
520 		for_each_set_bit(pid, filtered_pids->pids,
521 				 filtered_pids->pid_max) {
522 			set_bit(pid, pid_list->pids);
523 			nr_pids++;
524 		}
525 	}
526 
527 	while (cnt > 0) {
528 
529 		pos = 0;
530 
531 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
532 		if (ret < 0 || !trace_parser_loaded(&parser))
533 			break;
534 
535 		read += ret;
536 		ubuf += ret;
537 		cnt -= ret;
538 
539 		parser.buffer[parser.idx] = 0;
540 
541 		ret = -EINVAL;
542 		if (kstrtoul(parser.buffer, 0, &val))
543 			break;
544 		if (val >= pid_list->pid_max)
545 			break;
546 
547 		pid = (pid_t)val;
548 
549 		set_bit(pid, pid_list->pids);
550 		nr_pids++;
551 
552 		trace_parser_clear(&parser);
553 		ret = 0;
554 	}
555 	trace_parser_put(&parser);
556 
557 	if (ret < 0) {
558 		trace_free_pid_list(pid_list);
559 		return ret;
560 	}
561 
562 	if (!nr_pids) {
563 		/* Cleared the list of pids */
564 		trace_free_pid_list(pid_list);
565 		read = ret;
566 		pid_list = NULL;
567 	}
568 
569 	*new_pid_list = pid_list;
570 
571 	return read;
572 }
573 
574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576 	u64 ts;
577 
578 	/* Early boot up does not have a buffer yet */
579 	if (!buf->buffer)
580 		return trace_clock_local();
581 
582 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
583 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584 
585 	return ts;
586 }
587 
588 cycle_t ftrace_now(int cpu)
589 {
590 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592 
593 /**
594  * tracing_is_enabled - Show if global_trace has been disabled
595  *
596  * Shows if the global trace has been enabled or not. It uses the
597  * mirror flag "buffer_disabled" to be used in fast paths such as for
598  * the irqsoff tracer. But it may be inaccurate due to races. If you
599  * need to know the accurate state, use tracing_is_on() which is a little
600  * slower, but accurate.
601  */
602 int tracing_is_enabled(void)
603 {
604 	/*
605 	 * For quick access (irqsoff uses this in fast path), just
606 	 * return the mirror variable of the state of the ring buffer.
607 	 * It's a little racy, but we don't really care.
608 	 */
609 	smp_rmb();
610 	return !global_trace.buffer_disabled;
611 }
612 
613 /*
614  * trace_buf_size is the size in bytes that is allocated
615  * for a buffer. Note, the number of bytes is always rounded
616  * to page size.
617  *
618  * This number is purposely set to a low number of 16384.
619  * If the dump on oops happens, it will be much appreciated
620  * to not have to wait for all that output. Anyway this can be
621  * boot time and run time configurable.
622  */
623 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
624 
625 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626 
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer		*trace_types __read_mostly;
629 
630 /*
631  * trace_types_lock is used to protect the trace_types list.
632  */
633 DEFINE_MUTEX(trace_types_lock);
634 
635 /*
636  * serialize the access of the ring buffer
637  *
638  * ring buffer serializes readers, but it is low level protection.
639  * The validity of the events (which returns by ring_buffer_peek() ..etc)
640  * are not protected by ring buffer.
641  *
642  * The content of events may become garbage if we allow other process consumes
643  * these events concurrently:
644  *   A) the page of the consumed events may become a normal page
645  *      (not reader page) in ring buffer, and this page will be rewrited
646  *      by events producer.
647  *   B) The page of the consumed events may become a page for splice_read,
648  *      and this page will be returned to system.
649  *
650  * These primitives allow multi process access to different cpu ring buffer
651  * concurrently.
652  *
653  * These primitives don't distinguish read-only and read-consume access.
654  * Multi read-only access are also serialized.
655  */
656 
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660 
661 static inline void trace_access_lock(int cpu)
662 {
663 	if (cpu == RING_BUFFER_ALL_CPUS) {
664 		/* gain it for accessing the whole ring buffer. */
665 		down_write(&all_cpu_access_lock);
666 	} else {
667 		/* gain it for accessing a cpu ring buffer. */
668 
669 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670 		down_read(&all_cpu_access_lock);
671 
672 		/* Secondly block other access to this @cpu ring buffer. */
673 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
674 	}
675 }
676 
677 static inline void trace_access_unlock(int cpu)
678 {
679 	if (cpu == RING_BUFFER_ALL_CPUS) {
680 		up_write(&all_cpu_access_lock);
681 	} else {
682 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683 		up_read(&all_cpu_access_lock);
684 	}
685 }
686 
687 static inline void trace_access_lock_init(void)
688 {
689 	int cpu;
690 
691 	for_each_possible_cpu(cpu)
692 		mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694 
695 #else
696 
697 static DEFINE_MUTEX(access_lock);
698 
699 static inline void trace_access_lock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_lock(&access_lock);
703 }
704 
705 static inline void trace_access_unlock(int cpu)
706 {
707 	(void)cpu;
708 	mutex_unlock(&access_lock);
709 }
710 
711 static inline void trace_access_lock_init(void)
712 {
713 }
714 
715 #endif
716 
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719 				 unsigned long flags,
720 				 int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722 				      struct ring_buffer *buffer,
723 				      unsigned long flags,
724 				      int skip, int pc, struct pt_regs *regs);
725 
726 #else
727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728 					unsigned long flags,
729 					int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 static inline void ftrace_trace_stack(struct trace_array *tr,
733 				      struct ring_buffer *buffer,
734 				      unsigned long flags,
735 				      int skip, int pc, struct pt_regs *regs)
736 {
737 }
738 
739 #endif
740 
741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743 	if (tr->trace_buffer.buffer)
744 		ring_buffer_record_on(tr->trace_buffer.buffer);
745 	/*
746 	 * This flag is looked at when buffers haven't been allocated
747 	 * yet, or by some tracers (like irqsoff), that just want to
748 	 * know if the ring buffer has been disabled, but it can handle
749 	 * races of where it gets disabled but we still do a record.
750 	 * As the check is in the fast path of the tracers, it is more
751 	 * important to be fast than accurate.
752 	 */
753 	tr->buffer_disabled = 0;
754 	/* Make the flag seen by readers */
755 	smp_wmb();
756 }
757 
758 /**
759  * tracing_on - enable tracing buffers
760  *
761  * This function enables tracing buffers that may have been
762  * disabled with tracing_off.
763  */
764 void tracing_on(void)
765 {
766 	tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769 
770 /**
771  * __trace_puts - write a constant string into the trace buffer.
772  * @ip:	   The address of the caller
773  * @str:   The constant string to write
774  * @size:  The size of the string.
775  */
776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778 	struct ring_buffer_event *event;
779 	struct ring_buffer *buffer;
780 	struct print_entry *entry;
781 	unsigned long irq_flags;
782 	int alloc;
783 	int pc;
784 
785 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786 		return 0;
787 
788 	pc = preempt_count();
789 
790 	if (unlikely(tracing_selftest_running || tracing_disabled))
791 		return 0;
792 
793 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
794 
795 	local_save_flags(irq_flags);
796 	buffer = global_trace.trace_buffer.buffer;
797 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
798 					  irq_flags, pc);
799 	if (!event)
800 		return 0;
801 
802 	entry = ring_buffer_event_data(event);
803 	entry->ip = ip;
804 
805 	memcpy(&entry->buf, str, size);
806 
807 	/* Add a newline if necessary */
808 	if (entry->buf[size - 1] != '\n') {
809 		entry->buf[size] = '\n';
810 		entry->buf[size + 1] = '\0';
811 	} else
812 		entry->buf[size] = '\0';
813 
814 	__buffer_unlock_commit(buffer, event);
815 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816 
817 	return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820 
821 /**
822  * __trace_bputs - write the pointer to a constant string into trace buffer
823  * @ip:	   The address of the caller
824  * @str:   The constant string to write to the buffer to
825  */
826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828 	struct ring_buffer_event *event;
829 	struct ring_buffer *buffer;
830 	struct bputs_entry *entry;
831 	unsigned long irq_flags;
832 	int size = sizeof(struct bputs_entry);
833 	int pc;
834 
835 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836 		return 0;
837 
838 	pc = preempt_count();
839 
840 	if (unlikely(tracing_selftest_running || tracing_disabled))
841 		return 0;
842 
843 	local_save_flags(irq_flags);
844 	buffer = global_trace.trace_buffer.buffer;
845 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846 					  irq_flags, pc);
847 	if (!event)
848 		return 0;
849 
850 	entry = ring_buffer_event_data(event);
851 	entry->ip			= ip;
852 	entry->str			= str;
853 
854 	__buffer_unlock_commit(buffer, event);
855 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856 
857 	return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860 
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863  * trace_snapshot - take a snapshot of the current buffer.
864  *
865  * This causes a swap between the snapshot buffer and the current live
866  * tracing buffer. You can use this to take snapshots of the live
867  * trace when some condition is triggered, but continue to trace.
868  *
869  * Note, make sure to allocate the snapshot with either
870  * a tracing_snapshot_alloc(), or by doing it manually
871  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872  *
873  * If the snapshot buffer is not allocated, it will stop tracing.
874  * Basically making a permanent snapshot.
875  */
876 void tracing_snapshot(void)
877 {
878 	struct trace_array *tr = &global_trace;
879 	struct tracer *tracer = tr->current_trace;
880 	unsigned long flags;
881 
882 	if (in_nmi()) {
883 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884 		internal_trace_puts("*** snapshot is being ignored        ***\n");
885 		return;
886 	}
887 
888 	if (!tr->allocated_snapshot) {
889 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890 		internal_trace_puts("*** stopping trace here!   ***\n");
891 		tracing_off();
892 		return;
893 	}
894 
895 	/* Note, snapshot can not be used when the tracer uses it */
896 	if (tracer->use_max_tr) {
897 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899 		return;
900 	}
901 
902 	local_irq_save(flags);
903 	update_max_tr(tr, current, smp_processor_id());
904 	local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907 
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909 					struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911 
912 static int alloc_snapshot(struct trace_array *tr)
913 {
914 	int ret;
915 
916 	if (!tr->allocated_snapshot) {
917 
918 		/* allocate spare buffer */
919 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
920 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921 		if (ret < 0)
922 			return ret;
923 
924 		tr->allocated_snapshot = true;
925 	}
926 
927 	return 0;
928 }
929 
930 static void free_snapshot(struct trace_array *tr)
931 {
932 	/*
933 	 * We don't free the ring buffer. instead, resize it because
934 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
935 	 * we want preserve it.
936 	 */
937 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938 	set_buffer_entries(&tr->max_buffer, 1);
939 	tracing_reset_online_cpus(&tr->max_buffer);
940 	tr->allocated_snapshot = false;
941 }
942 
943 /**
944  * tracing_alloc_snapshot - allocate snapshot buffer.
945  *
946  * This only allocates the snapshot buffer if it isn't already
947  * allocated - it doesn't also take a snapshot.
948  *
949  * This is meant to be used in cases where the snapshot buffer needs
950  * to be set up for events that can't sleep but need to be able to
951  * trigger a snapshot.
952  */
953 int tracing_alloc_snapshot(void)
954 {
955 	struct trace_array *tr = &global_trace;
956 	int ret;
957 
958 	ret = alloc_snapshot(tr);
959 	WARN_ON(ret < 0);
960 
961 	return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964 
965 /**
966  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967  *
968  * This is similar to trace_snapshot(), but it will allocate the
969  * snapshot buffer if it isn't already allocated. Use this only
970  * where it is safe to sleep, as the allocation may sleep.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  */
976 void tracing_snapshot_alloc(void)
977 {
978 	int ret;
979 
980 	ret = tracing_alloc_snapshot();
981 	if (ret < 0)
982 		return;
983 
984 	tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
988 void tracing_snapshot(void)
989 {
990 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
993 int tracing_alloc_snapshot(void)
994 {
995 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996 	return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999 void tracing_snapshot_alloc(void)
1000 {
1001 	/* Give warning */
1002 	tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006 
1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009 	if (tr->trace_buffer.buffer)
1010 		ring_buffer_record_off(tr->trace_buffer.buffer);
1011 	/*
1012 	 * This flag is looked at when buffers haven't been allocated
1013 	 * yet, or by some tracers (like irqsoff), that just want to
1014 	 * know if the ring buffer has been disabled, but it can handle
1015 	 * races of where it gets disabled but we still do a record.
1016 	 * As the check is in the fast path of the tracers, it is more
1017 	 * important to be fast than accurate.
1018 	 */
1019 	tr->buffer_disabled = 1;
1020 	/* Make the flag seen by readers */
1021 	smp_wmb();
1022 }
1023 
1024 /**
1025  * tracing_off - turn off tracing buffers
1026  *
1027  * This function stops the tracing buffers from recording data.
1028  * It does not disable any overhead the tracers themselves may
1029  * be causing. This function simply causes all recording to
1030  * the ring buffers to fail.
1031  */
1032 void tracing_off(void)
1033 {
1034 	tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037 
1038 void disable_trace_on_warning(void)
1039 {
1040 	if (__disable_trace_on_warning)
1041 		tracing_off();
1042 }
1043 
1044 /**
1045  * tracer_tracing_is_on - show real state of ring buffer enabled
1046  * @tr : the trace array to know if ring buffer is enabled
1047  *
1048  * Shows real state of the ring buffer if it is enabled or not.
1049  */
1050 static int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052 	if (tr->trace_buffer.buffer)
1053 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054 	return !tr->buffer_disabled;
1055 }
1056 
1057 /**
1058  * tracing_is_on - show state of ring buffers enabled
1059  */
1060 int tracing_is_on(void)
1061 {
1062 	return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065 
1066 static int __init set_buf_size(char *str)
1067 {
1068 	unsigned long buf_size;
1069 
1070 	if (!str)
1071 		return 0;
1072 	buf_size = memparse(str, &str);
1073 	/* nr_entries can not be zero */
1074 	if (buf_size == 0)
1075 		return 0;
1076 	trace_buf_size = buf_size;
1077 	return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080 
1081 static int __init set_tracing_thresh(char *str)
1082 {
1083 	unsigned long threshold;
1084 	int ret;
1085 
1086 	if (!str)
1087 		return 0;
1088 	ret = kstrtoul(str, 0, &threshold);
1089 	if (ret < 0)
1090 		return 0;
1091 	tracing_thresh = threshold * 1000;
1092 	return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095 
1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098 	return nsecs / 1000;
1099 }
1100 
1101 /*
1102  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105  * of strings in the order that the enums were defined.
1106  */
1107 #undef C
1108 #define C(a, b) b
1109 
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112 	TRACE_FLAGS
1113 	NULL
1114 };
1115 
1116 static struct {
1117 	u64 (*func)(void);
1118 	const char *name;
1119 	int in_ns;		/* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121 	{ trace_clock_local,		"local",	1 },
1122 	{ trace_clock_global,		"global",	1 },
1123 	{ trace_clock_counter,		"counter",	0 },
1124 	{ trace_clock_jiffies,		"uptime",	0 },
1125 	{ trace_clock,			"perf",		1 },
1126 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1127 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1128 	ARCH_TRACE_CLOCKS
1129 };
1130 
1131 /*
1132  * trace_parser_get_init - gets the buffer for trace parser
1133  */
1134 int trace_parser_get_init(struct trace_parser *parser, int size)
1135 {
1136 	memset(parser, 0, sizeof(*parser));
1137 
1138 	parser->buffer = kmalloc(size, GFP_KERNEL);
1139 	if (!parser->buffer)
1140 		return 1;
1141 
1142 	parser->size = size;
1143 	return 0;
1144 }
1145 
1146 /*
1147  * trace_parser_put - frees the buffer for trace parser
1148  */
1149 void trace_parser_put(struct trace_parser *parser)
1150 {
1151 	kfree(parser->buffer);
1152 }
1153 
1154 /*
1155  * trace_get_user - reads the user input string separated by  space
1156  * (matched by isspace(ch))
1157  *
1158  * For each string found the 'struct trace_parser' is updated,
1159  * and the function returns.
1160  *
1161  * Returns number of bytes read.
1162  *
1163  * See kernel/trace/trace.h for 'struct trace_parser' details.
1164  */
1165 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1166 	size_t cnt, loff_t *ppos)
1167 {
1168 	char ch;
1169 	size_t read = 0;
1170 	ssize_t ret;
1171 
1172 	if (!*ppos)
1173 		trace_parser_clear(parser);
1174 
1175 	ret = get_user(ch, ubuf++);
1176 	if (ret)
1177 		goto out;
1178 
1179 	read++;
1180 	cnt--;
1181 
1182 	/*
1183 	 * The parser is not finished with the last write,
1184 	 * continue reading the user input without skipping spaces.
1185 	 */
1186 	if (!parser->cont) {
1187 		/* skip white space */
1188 		while (cnt && isspace(ch)) {
1189 			ret = get_user(ch, ubuf++);
1190 			if (ret)
1191 				goto out;
1192 			read++;
1193 			cnt--;
1194 		}
1195 
1196 		/* only spaces were written */
1197 		if (isspace(ch)) {
1198 			*ppos += read;
1199 			ret = read;
1200 			goto out;
1201 		}
1202 
1203 		parser->idx = 0;
1204 	}
1205 
1206 	/* read the non-space input */
1207 	while (cnt && !isspace(ch)) {
1208 		if (parser->idx < parser->size - 1)
1209 			parser->buffer[parser->idx++] = ch;
1210 		else {
1211 			ret = -EINVAL;
1212 			goto out;
1213 		}
1214 		ret = get_user(ch, ubuf++);
1215 		if (ret)
1216 			goto out;
1217 		read++;
1218 		cnt--;
1219 	}
1220 
1221 	/* We either got finished input or we have to wait for another call. */
1222 	if (isspace(ch)) {
1223 		parser->buffer[parser->idx] = 0;
1224 		parser->cont = false;
1225 	} else if (parser->idx < parser->size - 1) {
1226 		parser->cont = true;
1227 		parser->buffer[parser->idx++] = ch;
1228 	} else {
1229 		ret = -EINVAL;
1230 		goto out;
1231 	}
1232 
1233 	*ppos += read;
1234 	ret = read;
1235 
1236 out:
1237 	return ret;
1238 }
1239 
1240 /* TODO add a seq_buf_to_buffer() */
1241 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1242 {
1243 	int len;
1244 
1245 	if (trace_seq_used(s) <= s->seq.readpos)
1246 		return -EBUSY;
1247 
1248 	len = trace_seq_used(s) - s->seq.readpos;
1249 	if (cnt > len)
1250 		cnt = len;
1251 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1252 
1253 	s->seq.readpos += cnt;
1254 	return cnt;
1255 }
1256 
1257 unsigned long __read_mostly	tracing_thresh;
1258 
1259 #ifdef CONFIG_TRACER_MAX_TRACE
1260 /*
1261  * Copy the new maximum trace into the separate maximum-trace
1262  * structure. (this way the maximum trace is permanently saved,
1263  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1264  */
1265 static void
1266 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1267 {
1268 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1269 	struct trace_buffer *max_buf = &tr->max_buffer;
1270 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1271 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1272 
1273 	max_buf->cpu = cpu;
1274 	max_buf->time_start = data->preempt_timestamp;
1275 
1276 	max_data->saved_latency = tr->max_latency;
1277 	max_data->critical_start = data->critical_start;
1278 	max_data->critical_end = data->critical_end;
1279 
1280 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1281 	max_data->pid = tsk->pid;
1282 	/*
1283 	 * If tsk == current, then use current_uid(), as that does not use
1284 	 * RCU. The irq tracer can be called out of RCU scope.
1285 	 */
1286 	if (tsk == current)
1287 		max_data->uid = current_uid();
1288 	else
1289 		max_data->uid = task_uid(tsk);
1290 
1291 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1292 	max_data->policy = tsk->policy;
1293 	max_data->rt_priority = tsk->rt_priority;
1294 
1295 	/* record this tasks comm */
1296 	tracing_record_cmdline(tsk);
1297 }
1298 
1299 /**
1300  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1301  * @tr: tracer
1302  * @tsk: the task with the latency
1303  * @cpu: The cpu that initiated the trace.
1304  *
1305  * Flip the buffers between the @tr and the max_tr and record information
1306  * about which task was the cause of this latency.
1307  */
1308 void
1309 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 	struct ring_buffer *buf;
1312 
1313 	if (tr->stop_count)
1314 		return;
1315 
1316 	WARN_ON_ONCE(!irqs_disabled());
1317 
1318 	if (!tr->allocated_snapshot) {
1319 		/* Only the nop tracer should hit this when disabling */
1320 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1321 		return;
1322 	}
1323 
1324 	arch_spin_lock(&tr->max_lock);
1325 
1326 	buf = tr->trace_buffer.buffer;
1327 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1328 	tr->max_buffer.buffer = buf;
1329 
1330 	__update_max_tr(tr, tsk, cpu);
1331 	arch_spin_unlock(&tr->max_lock);
1332 }
1333 
1334 /**
1335  * update_max_tr_single - only copy one trace over, and reset the rest
1336  * @tr - tracer
1337  * @tsk - task with the latency
1338  * @cpu - the cpu of the buffer to copy.
1339  *
1340  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1341  */
1342 void
1343 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1344 {
1345 	int ret;
1346 
1347 	if (tr->stop_count)
1348 		return;
1349 
1350 	WARN_ON_ONCE(!irqs_disabled());
1351 	if (!tr->allocated_snapshot) {
1352 		/* Only the nop tracer should hit this when disabling */
1353 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1354 		return;
1355 	}
1356 
1357 	arch_spin_lock(&tr->max_lock);
1358 
1359 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1360 
1361 	if (ret == -EBUSY) {
1362 		/*
1363 		 * We failed to swap the buffer due to a commit taking
1364 		 * place on this CPU. We fail to record, but we reset
1365 		 * the max trace buffer (no one writes directly to it)
1366 		 * and flag that it failed.
1367 		 */
1368 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1369 			"Failed to swap buffers due to commit in progress\n");
1370 	}
1371 
1372 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1373 
1374 	__update_max_tr(tr, tsk, cpu);
1375 	arch_spin_unlock(&tr->max_lock);
1376 }
1377 #endif /* CONFIG_TRACER_MAX_TRACE */
1378 
1379 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1380 {
1381 	/* Iterators are static, they should be filled or empty */
1382 	if (trace_buffer_iter(iter, iter->cpu_file))
1383 		return 0;
1384 
1385 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1386 				full);
1387 }
1388 
1389 #ifdef CONFIG_FTRACE_STARTUP_TEST
1390 static int run_tracer_selftest(struct tracer *type)
1391 {
1392 	struct trace_array *tr = &global_trace;
1393 	struct tracer *saved_tracer = tr->current_trace;
1394 	int ret;
1395 
1396 	if (!type->selftest || tracing_selftest_disabled)
1397 		return 0;
1398 
1399 	/*
1400 	 * Run a selftest on this tracer.
1401 	 * Here we reset the trace buffer, and set the current
1402 	 * tracer to be this tracer. The tracer can then run some
1403 	 * internal tracing to verify that everything is in order.
1404 	 * If we fail, we do not register this tracer.
1405 	 */
1406 	tracing_reset_online_cpus(&tr->trace_buffer);
1407 
1408 	tr->current_trace = type;
1409 
1410 #ifdef CONFIG_TRACER_MAX_TRACE
1411 	if (type->use_max_tr) {
1412 		/* If we expanded the buffers, make sure the max is expanded too */
1413 		if (ring_buffer_expanded)
1414 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1415 					   RING_BUFFER_ALL_CPUS);
1416 		tr->allocated_snapshot = true;
1417 	}
1418 #endif
1419 
1420 	/* the test is responsible for initializing and enabling */
1421 	pr_info("Testing tracer %s: ", type->name);
1422 	ret = type->selftest(type, tr);
1423 	/* the test is responsible for resetting too */
1424 	tr->current_trace = saved_tracer;
1425 	if (ret) {
1426 		printk(KERN_CONT "FAILED!\n");
1427 		/* Add the warning after printing 'FAILED' */
1428 		WARN_ON(1);
1429 		return -1;
1430 	}
1431 	/* Only reset on passing, to avoid touching corrupted buffers */
1432 	tracing_reset_online_cpus(&tr->trace_buffer);
1433 
1434 #ifdef CONFIG_TRACER_MAX_TRACE
1435 	if (type->use_max_tr) {
1436 		tr->allocated_snapshot = false;
1437 
1438 		/* Shrink the max buffer again */
1439 		if (ring_buffer_expanded)
1440 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1441 					   RING_BUFFER_ALL_CPUS);
1442 	}
1443 #endif
1444 
1445 	printk(KERN_CONT "PASSED\n");
1446 	return 0;
1447 }
1448 #else
1449 static inline int run_tracer_selftest(struct tracer *type)
1450 {
1451 	return 0;
1452 }
1453 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1454 
1455 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1456 
1457 static void __init apply_trace_boot_options(void);
1458 
1459 /**
1460  * register_tracer - register a tracer with the ftrace system.
1461  * @type - the plugin for the tracer
1462  *
1463  * Register a new plugin tracer.
1464  */
1465 int __init register_tracer(struct tracer *type)
1466 {
1467 	struct tracer *t;
1468 	int ret = 0;
1469 
1470 	if (!type->name) {
1471 		pr_info("Tracer must have a name\n");
1472 		return -1;
1473 	}
1474 
1475 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1476 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1477 		return -1;
1478 	}
1479 
1480 	mutex_lock(&trace_types_lock);
1481 
1482 	tracing_selftest_running = true;
1483 
1484 	for (t = trace_types; t; t = t->next) {
1485 		if (strcmp(type->name, t->name) == 0) {
1486 			/* already found */
1487 			pr_info("Tracer %s already registered\n",
1488 				type->name);
1489 			ret = -1;
1490 			goto out;
1491 		}
1492 	}
1493 
1494 	if (!type->set_flag)
1495 		type->set_flag = &dummy_set_flag;
1496 	if (!type->flags) {
1497 		/*allocate a dummy tracer_flags*/
1498 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1499 		if (!type->flags) {
1500 			ret = -ENOMEM;
1501 			goto out;
1502 		}
1503 		type->flags->val = 0;
1504 		type->flags->opts = dummy_tracer_opt;
1505 	} else
1506 		if (!type->flags->opts)
1507 			type->flags->opts = dummy_tracer_opt;
1508 
1509 	/* store the tracer for __set_tracer_option */
1510 	type->flags->trace = type;
1511 
1512 	ret = run_tracer_selftest(type);
1513 	if (ret < 0)
1514 		goto out;
1515 
1516 	type->next = trace_types;
1517 	trace_types = type;
1518 	add_tracer_options(&global_trace, type);
1519 
1520  out:
1521 	tracing_selftest_running = false;
1522 	mutex_unlock(&trace_types_lock);
1523 
1524 	if (ret || !default_bootup_tracer)
1525 		goto out_unlock;
1526 
1527 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1528 		goto out_unlock;
1529 
1530 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1531 	/* Do we want this tracer to start on bootup? */
1532 	tracing_set_tracer(&global_trace, type->name);
1533 	default_bootup_tracer = NULL;
1534 
1535 	apply_trace_boot_options();
1536 
1537 	/* disable other selftests, since this will break it. */
1538 	tracing_selftest_disabled = true;
1539 #ifdef CONFIG_FTRACE_STARTUP_TEST
1540 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1541 	       type->name);
1542 #endif
1543 
1544  out_unlock:
1545 	return ret;
1546 }
1547 
1548 void tracing_reset(struct trace_buffer *buf, int cpu)
1549 {
1550 	struct ring_buffer *buffer = buf->buffer;
1551 
1552 	if (!buffer)
1553 		return;
1554 
1555 	ring_buffer_record_disable(buffer);
1556 
1557 	/* Make sure all commits have finished */
1558 	synchronize_sched();
1559 	ring_buffer_reset_cpu(buffer, cpu);
1560 
1561 	ring_buffer_record_enable(buffer);
1562 }
1563 
1564 void tracing_reset_online_cpus(struct trace_buffer *buf)
1565 {
1566 	struct ring_buffer *buffer = buf->buffer;
1567 	int cpu;
1568 
1569 	if (!buffer)
1570 		return;
1571 
1572 	ring_buffer_record_disable(buffer);
1573 
1574 	/* Make sure all commits have finished */
1575 	synchronize_sched();
1576 
1577 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578 
1579 	for_each_online_cpu(cpu)
1580 		ring_buffer_reset_cpu(buffer, cpu);
1581 
1582 	ring_buffer_record_enable(buffer);
1583 }
1584 
1585 /* Must have trace_types_lock held */
1586 void tracing_reset_all_online_cpus(void)
1587 {
1588 	struct trace_array *tr;
1589 
1590 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1591 		tracing_reset_online_cpus(&tr->trace_buffer);
1592 #ifdef CONFIG_TRACER_MAX_TRACE
1593 		tracing_reset_online_cpus(&tr->max_buffer);
1594 #endif
1595 	}
1596 }
1597 
1598 #define SAVED_CMDLINES_DEFAULT 128
1599 #define NO_CMDLINE_MAP UINT_MAX
1600 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1601 struct saved_cmdlines_buffer {
1602 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1603 	unsigned *map_cmdline_to_pid;
1604 	unsigned cmdline_num;
1605 	int cmdline_idx;
1606 	char *saved_cmdlines;
1607 };
1608 static struct saved_cmdlines_buffer *savedcmd;
1609 
1610 /* temporary disable recording */
1611 static atomic_t trace_record_cmdline_disabled __read_mostly;
1612 
1613 static inline char *get_saved_cmdlines(int idx)
1614 {
1615 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1616 }
1617 
1618 static inline void set_cmdline(int idx, const char *cmdline)
1619 {
1620 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1621 }
1622 
1623 static int allocate_cmdlines_buffer(unsigned int val,
1624 				    struct saved_cmdlines_buffer *s)
1625 {
1626 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1627 					GFP_KERNEL);
1628 	if (!s->map_cmdline_to_pid)
1629 		return -ENOMEM;
1630 
1631 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1632 	if (!s->saved_cmdlines) {
1633 		kfree(s->map_cmdline_to_pid);
1634 		return -ENOMEM;
1635 	}
1636 
1637 	s->cmdline_idx = 0;
1638 	s->cmdline_num = val;
1639 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1640 	       sizeof(s->map_pid_to_cmdline));
1641 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1642 	       val * sizeof(*s->map_cmdline_to_pid));
1643 
1644 	return 0;
1645 }
1646 
1647 static int trace_create_savedcmd(void)
1648 {
1649 	int ret;
1650 
1651 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1652 	if (!savedcmd)
1653 		return -ENOMEM;
1654 
1655 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1656 	if (ret < 0) {
1657 		kfree(savedcmd);
1658 		savedcmd = NULL;
1659 		return -ENOMEM;
1660 	}
1661 
1662 	return 0;
1663 }
1664 
1665 int is_tracing_stopped(void)
1666 {
1667 	return global_trace.stop_count;
1668 }
1669 
1670 /**
1671  * tracing_start - quick start of the tracer
1672  *
1673  * If tracing is enabled but was stopped by tracing_stop,
1674  * this will start the tracer back up.
1675  */
1676 void tracing_start(void)
1677 {
1678 	struct ring_buffer *buffer;
1679 	unsigned long flags;
1680 
1681 	if (tracing_disabled)
1682 		return;
1683 
1684 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1685 	if (--global_trace.stop_count) {
1686 		if (global_trace.stop_count < 0) {
1687 			/* Someone screwed up their debugging */
1688 			WARN_ON_ONCE(1);
1689 			global_trace.stop_count = 0;
1690 		}
1691 		goto out;
1692 	}
1693 
1694 	/* Prevent the buffers from switching */
1695 	arch_spin_lock(&global_trace.max_lock);
1696 
1697 	buffer = global_trace.trace_buffer.buffer;
1698 	if (buffer)
1699 		ring_buffer_record_enable(buffer);
1700 
1701 #ifdef CONFIG_TRACER_MAX_TRACE
1702 	buffer = global_trace.max_buffer.buffer;
1703 	if (buffer)
1704 		ring_buffer_record_enable(buffer);
1705 #endif
1706 
1707 	arch_spin_unlock(&global_trace.max_lock);
1708 
1709  out:
1710 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1711 }
1712 
1713 static void tracing_start_tr(struct trace_array *tr)
1714 {
1715 	struct ring_buffer *buffer;
1716 	unsigned long flags;
1717 
1718 	if (tracing_disabled)
1719 		return;
1720 
1721 	/* If global, we need to also start the max tracer */
1722 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1723 		return tracing_start();
1724 
1725 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1726 
1727 	if (--tr->stop_count) {
1728 		if (tr->stop_count < 0) {
1729 			/* Someone screwed up their debugging */
1730 			WARN_ON_ONCE(1);
1731 			tr->stop_count = 0;
1732 		}
1733 		goto out;
1734 	}
1735 
1736 	buffer = tr->trace_buffer.buffer;
1737 	if (buffer)
1738 		ring_buffer_record_enable(buffer);
1739 
1740  out:
1741 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1742 }
1743 
1744 /**
1745  * tracing_stop - quick stop of the tracer
1746  *
1747  * Light weight way to stop tracing. Use in conjunction with
1748  * tracing_start.
1749  */
1750 void tracing_stop(void)
1751 {
1752 	struct ring_buffer *buffer;
1753 	unsigned long flags;
1754 
1755 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1756 	if (global_trace.stop_count++)
1757 		goto out;
1758 
1759 	/* Prevent the buffers from switching */
1760 	arch_spin_lock(&global_trace.max_lock);
1761 
1762 	buffer = global_trace.trace_buffer.buffer;
1763 	if (buffer)
1764 		ring_buffer_record_disable(buffer);
1765 
1766 #ifdef CONFIG_TRACER_MAX_TRACE
1767 	buffer = global_trace.max_buffer.buffer;
1768 	if (buffer)
1769 		ring_buffer_record_disable(buffer);
1770 #endif
1771 
1772 	arch_spin_unlock(&global_trace.max_lock);
1773 
1774  out:
1775 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1776 }
1777 
1778 static void tracing_stop_tr(struct trace_array *tr)
1779 {
1780 	struct ring_buffer *buffer;
1781 	unsigned long flags;
1782 
1783 	/* If global, we need to also stop the max tracer */
1784 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1785 		return tracing_stop();
1786 
1787 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1788 	if (tr->stop_count++)
1789 		goto out;
1790 
1791 	buffer = tr->trace_buffer.buffer;
1792 	if (buffer)
1793 		ring_buffer_record_disable(buffer);
1794 
1795  out:
1796 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1797 }
1798 
1799 void trace_stop_cmdline_recording(void);
1800 
1801 static int trace_save_cmdline(struct task_struct *tsk)
1802 {
1803 	unsigned pid, idx;
1804 
1805 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1806 		return 0;
1807 
1808 	/*
1809 	 * It's not the end of the world if we don't get
1810 	 * the lock, but we also don't want to spin
1811 	 * nor do we want to disable interrupts,
1812 	 * so if we miss here, then better luck next time.
1813 	 */
1814 	if (!arch_spin_trylock(&trace_cmdline_lock))
1815 		return 0;
1816 
1817 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1818 	if (idx == NO_CMDLINE_MAP) {
1819 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1820 
1821 		/*
1822 		 * Check whether the cmdline buffer at idx has a pid
1823 		 * mapped. We are going to overwrite that entry so we
1824 		 * need to clear the map_pid_to_cmdline. Otherwise we
1825 		 * would read the new comm for the old pid.
1826 		 */
1827 		pid = savedcmd->map_cmdline_to_pid[idx];
1828 		if (pid != NO_CMDLINE_MAP)
1829 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1830 
1831 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1832 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1833 
1834 		savedcmd->cmdline_idx = idx;
1835 	}
1836 
1837 	set_cmdline(idx, tsk->comm);
1838 
1839 	arch_spin_unlock(&trace_cmdline_lock);
1840 
1841 	return 1;
1842 }
1843 
1844 static void __trace_find_cmdline(int pid, char comm[])
1845 {
1846 	unsigned map;
1847 
1848 	if (!pid) {
1849 		strcpy(comm, "<idle>");
1850 		return;
1851 	}
1852 
1853 	if (WARN_ON_ONCE(pid < 0)) {
1854 		strcpy(comm, "<XXX>");
1855 		return;
1856 	}
1857 
1858 	if (pid > PID_MAX_DEFAULT) {
1859 		strcpy(comm, "<...>");
1860 		return;
1861 	}
1862 
1863 	map = savedcmd->map_pid_to_cmdline[pid];
1864 	if (map != NO_CMDLINE_MAP)
1865 		strcpy(comm, get_saved_cmdlines(map));
1866 	else
1867 		strcpy(comm, "<...>");
1868 }
1869 
1870 void trace_find_cmdline(int pid, char comm[])
1871 {
1872 	preempt_disable();
1873 	arch_spin_lock(&trace_cmdline_lock);
1874 
1875 	__trace_find_cmdline(pid, comm);
1876 
1877 	arch_spin_unlock(&trace_cmdline_lock);
1878 	preempt_enable();
1879 }
1880 
1881 void tracing_record_cmdline(struct task_struct *tsk)
1882 {
1883 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1884 		return;
1885 
1886 	if (!__this_cpu_read(trace_cmdline_save))
1887 		return;
1888 
1889 	if (trace_save_cmdline(tsk))
1890 		__this_cpu_write(trace_cmdline_save, false);
1891 }
1892 
1893 void
1894 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1895 			     int pc)
1896 {
1897 	struct task_struct *tsk = current;
1898 
1899 	entry->preempt_count		= pc & 0xff;
1900 	entry->pid			= (tsk) ? tsk->pid : 0;
1901 	entry->flags =
1902 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1903 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1904 #else
1905 		TRACE_FLAG_IRQS_NOSUPPORT |
1906 #endif
1907 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1908 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1909 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1910 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1911 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1912 }
1913 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1914 
1915 static __always_inline void
1916 trace_event_setup(struct ring_buffer_event *event,
1917 		  int type, unsigned long flags, int pc)
1918 {
1919 	struct trace_entry *ent = ring_buffer_event_data(event);
1920 
1921 	tracing_generic_entry_update(ent, flags, pc);
1922 	ent->type = type;
1923 }
1924 
1925 struct ring_buffer_event *
1926 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1927 			  int type,
1928 			  unsigned long len,
1929 			  unsigned long flags, int pc)
1930 {
1931 	struct ring_buffer_event *event;
1932 
1933 	event = ring_buffer_lock_reserve(buffer, len);
1934 	if (event != NULL)
1935 		trace_event_setup(event, type, flags, pc);
1936 
1937 	return event;
1938 }
1939 
1940 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1941 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1942 static int trace_buffered_event_ref;
1943 
1944 /**
1945  * trace_buffered_event_enable - enable buffering events
1946  *
1947  * When events are being filtered, it is quicker to use a temporary
1948  * buffer to write the event data into if there's a likely chance
1949  * that it will not be committed. The discard of the ring buffer
1950  * is not as fast as committing, and is much slower than copying
1951  * a commit.
1952  *
1953  * When an event is to be filtered, allocate per cpu buffers to
1954  * write the event data into, and if the event is filtered and discarded
1955  * it is simply dropped, otherwise, the entire data is to be committed
1956  * in one shot.
1957  */
1958 void trace_buffered_event_enable(void)
1959 {
1960 	struct ring_buffer_event *event;
1961 	struct page *page;
1962 	int cpu;
1963 
1964 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1965 
1966 	if (trace_buffered_event_ref++)
1967 		return;
1968 
1969 	for_each_tracing_cpu(cpu) {
1970 		page = alloc_pages_node(cpu_to_node(cpu),
1971 					GFP_KERNEL | __GFP_NORETRY, 0);
1972 		if (!page)
1973 			goto failed;
1974 
1975 		event = page_address(page);
1976 		memset(event, 0, sizeof(*event));
1977 
1978 		per_cpu(trace_buffered_event, cpu) = event;
1979 
1980 		preempt_disable();
1981 		if (cpu == smp_processor_id() &&
1982 		    this_cpu_read(trace_buffered_event) !=
1983 		    per_cpu(trace_buffered_event, cpu))
1984 			WARN_ON_ONCE(1);
1985 		preempt_enable();
1986 	}
1987 
1988 	return;
1989  failed:
1990 	trace_buffered_event_disable();
1991 }
1992 
1993 static void enable_trace_buffered_event(void *data)
1994 {
1995 	/* Probably not needed, but do it anyway */
1996 	smp_rmb();
1997 	this_cpu_dec(trace_buffered_event_cnt);
1998 }
1999 
2000 static void disable_trace_buffered_event(void *data)
2001 {
2002 	this_cpu_inc(trace_buffered_event_cnt);
2003 }
2004 
2005 /**
2006  * trace_buffered_event_disable - disable buffering events
2007  *
2008  * When a filter is removed, it is faster to not use the buffered
2009  * events, and to commit directly into the ring buffer. Free up
2010  * the temp buffers when there are no more users. This requires
2011  * special synchronization with current events.
2012  */
2013 void trace_buffered_event_disable(void)
2014 {
2015 	int cpu;
2016 
2017 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2018 
2019 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2020 		return;
2021 
2022 	if (--trace_buffered_event_ref)
2023 		return;
2024 
2025 	preempt_disable();
2026 	/* For each CPU, set the buffer as used. */
2027 	smp_call_function_many(tracing_buffer_mask,
2028 			       disable_trace_buffered_event, NULL, 1);
2029 	preempt_enable();
2030 
2031 	/* Wait for all current users to finish */
2032 	synchronize_sched();
2033 
2034 	for_each_tracing_cpu(cpu) {
2035 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2036 		per_cpu(trace_buffered_event, cpu) = NULL;
2037 	}
2038 	/*
2039 	 * Make sure trace_buffered_event is NULL before clearing
2040 	 * trace_buffered_event_cnt.
2041 	 */
2042 	smp_wmb();
2043 
2044 	preempt_disable();
2045 	/* Do the work on each cpu */
2046 	smp_call_function_many(tracing_buffer_mask,
2047 			       enable_trace_buffered_event, NULL, 1);
2048 	preempt_enable();
2049 }
2050 
2051 void
2052 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2053 {
2054 	__this_cpu_write(trace_cmdline_save, true);
2055 
2056 	/* If this is the temp buffer, we need to commit fully */
2057 	if (this_cpu_read(trace_buffered_event) == event) {
2058 		/* Length is in event->array[0] */
2059 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
2060 		/* Release the temp buffer */
2061 		this_cpu_dec(trace_buffered_event_cnt);
2062 	} else
2063 		ring_buffer_unlock_commit(buffer, event);
2064 }
2065 
2066 static struct ring_buffer *temp_buffer;
2067 
2068 struct ring_buffer_event *
2069 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2070 			  struct trace_event_file *trace_file,
2071 			  int type, unsigned long len,
2072 			  unsigned long flags, int pc)
2073 {
2074 	struct ring_buffer_event *entry;
2075 	int val;
2076 
2077 	*current_rb = trace_file->tr->trace_buffer.buffer;
2078 
2079 	if ((trace_file->flags &
2080 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2081 	    (entry = this_cpu_read(trace_buffered_event))) {
2082 		/* Try to use the per cpu buffer first */
2083 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2084 		if (val == 1) {
2085 			trace_event_setup(entry, type, flags, pc);
2086 			entry->array[0] = len;
2087 			return entry;
2088 		}
2089 		this_cpu_dec(trace_buffered_event_cnt);
2090 	}
2091 
2092 	entry = trace_buffer_lock_reserve(*current_rb,
2093 					 type, len, flags, pc);
2094 	/*
2095 	 * If tracing is off, but we have triggers enabled
2096 	 * we still need to look at the event data. Use the temp_buffer
2097 	 * to store the trace event for the tigger to use. It's recusive
2098 	 * safe and will not be recorded anywhere.
2099 	 */
2100 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2101 		*current_rb = temp_buffer;
2102 		entry = trace_buffer_lock_reserve(*current_rb,
2103 						  type, len, flags, pc);
2104 	}
2105 	return entry;
2106 }
2107 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2108 
2109 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2110 				     struct ring_buffer *buffer,
2111 				     struct ring_buffer_event *event,
2112 				     unsigned long flags, int pc,
2113 				     struct pt_regs *regs)
2114 {
2115 	__buffer_unlock_commit(buffer, event);
2116 
2117 	/*
2118 	 * If regs is not set, then skip the following callers:
2119 	 *   trace_buffer_unlock_commit_regs
2120 	 *   event_trigger_unlock_commit
2121 	 *   trace_event_buffer_commit
2122 	 *   trace_event_raw_event_sched_switch
2123 	 * Note, we can still get here via blktrace, wakeup tracer
2124 	 * and mmiotrace, but that's ok if they lose a function or
2125 	 * two. They are that meaningful.
2126 	 */
2127 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2128 	ftrace_trace_userstack(buffer, flags, pc);
2129 }
2130 
2131 void
2132 trace_function(struct trace_array *tr,
2133 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2134 	       int pc)
2135 {
2136 	struct trace_event_call *call = &event_function;
2137 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2138 	struct ring_buffer_event *event;
2139 	struct ftrace_entry *entry;
2140 
2141 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2142 					  flags, pc);
2143 	if (!event)
2144 		return;
2145 	entry	= ring_buffer_event_data(event);
2146 	entry->ip			= ip;
2147 	entry->parent_ip		= parent_ip;
2148 
2149 	if (!call_filter_check_discard(call, entry, buffer, event))
2150 		__buffer_unlock_commit(buffer, event);
2151 }
2152 
2153 #ifdef CONFIG_STACKTRACE
2154 
2155 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2156 struct ftrace_stack {
2157 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2158 };
2159 
2160 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2161 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2162 
2163 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2164 				 unsigned long flags,
2165 				 int skip, int pc, struct pt_regs *regs)
2166 {
2167 	struct trace_event_call *call = &event_kernel_stack;
2168 	struct ring_buffer_event *event;
2169 	struct stack_entry *entry;
2170 	struct stack_trace trace;
2171 	int use_stack;
2172 	int size = FTRACE_STACK_ENTRIES;
2173 
2174 	trace.nr_entries	= 0;
2175 	trace.skip		= skip;
2176 
2177 	/*
2178 	 * Add two, for this function and the call to save_stack_trace()
2179 	 * If regs is set, then these functions will not be in the way.
2180 	 */
2181 	if (!regs)
2182 		trace.skip += 2;
2183 
2184 	/*
2185 	 * Since events can happen in NMIs there's no safe way to
2186 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2187 	 * or NMI comes in, it will just have to use the default
2188 	 * FTRACE_STACK_SIZE.
2189 	 */
2190 	preempt_disable_notrace();
2191 
2192 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2193 	/*
2194 	 * We don't need any atomic variables, just a barrier.
2195 	 * If an interrupt comes in, we don't care, because it would
2196 	 * have exited and put the counter back to what we want.
2197 	 * We just need a barrier to keep gcc from moving things
2198 	 * around.
2199 	 */
2200 	barrier();
2201 	if (use_stack == 1) {
2202 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2203 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2204 
2205 		if (regs)
2206 			save_stack_trace_regs(regs, &trace);
2207 		else
2208 			save_stack_trace(&trace);
2209 
2210 		if (trace.nr_entries > size)
2211 			size = trace.nr_entries;
2212 	} else
2213 		/* From now on, use_stack is a boolean */
2214 		use_stack = 0;
2215 
2216 	size *= sizeof(unsigned long);
2217 
2218 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2219 					  sizeof(*entry) + size, flags, pc);
2220 	if (!event)
2221 		goto out;
2222 	entry = ring_buffer_event_data(event);
2223 
2224 	memset(&entry->caller, 0, size);
2225 
2226 	if (use_stack)
2227 		memcpy(&entry->caller, trace.entries,
2228 		       trace.nr_entries * sizeof(unsigned long));
2229 	else {
2230 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2231 		trace.entries		= entry->caller;
2232 		if (regs)
2233 			save_stack_trace_regs(regs, &trace);
2234 		else
2235 			save_stack_trace(&trace);
2236 	}
2237 
2238 	entry->size = trace.nr_entries;
2239 
2240 	if (!call_filter_check_discard(call, entry, buffer, event))
2241 		__buffer_unlock_commit(buffer, event);
2242 
2243  out:
2244 	/* Again, don't let gcc optimize things here */
2245 	barrier();
2246 	__this_cpu_dec(ftrace_stack_reserve);
2247 	preempt_enable_notrace();
2248 
2249 }
2250 
2251 static inline void ftrace_trace_stack(struct trace_array *tr,
2252 				      struct ring_buffer *buffer,
2253 				      unsigned long flags,
2254 				      int skip, int pc, struct pt_regs *regs)
2255 {
2256 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2257 		return;
2258 
2259 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2260 }
2261 
2262 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2263 		   int pc)
2264 {
2265 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2266 }
2267 
2268 /**
2269  * trace_dump_stack - record a stack back trace in the trace buffer
2270  * @skip: Number of functions to skip (helper handlers)
2271  */
2272 void trace_dump_stack(int skip)
2273 {
2274 	unsigned long flags;
2275 
2276 	if (tracing_disabled || tracing_selftest_running)
2277 		return;
2278 
2279 	local_save_flags(flags);
2280 
2281 	/*
2282 	 * Skip 3 more, seems to get us at the caller of
2283 	 * this function.
2284 	 */
2285 	skip += 3;
2286 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2287 			     flags, skip, preempt_count(), NULL);
2288 }
2289 
2290 static DEFINE_PER_CPU(int, user_stack_count);
2291 
2292 void
2293 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2294 {
2295 	struct trace_event_call *call = &event_user_stack;
2296 	struct ring_buffer_event *event;
2297 	struct userstack_entry *entry;
2298 	struct stack_trace trace;
2299 
2300 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2301 		return;
2302 
2303 	/*
2304 	 * NMIs can not handle page faults, even with fix ups.
2305 	 * The save user stack can (and often does) fault.
2306 	 */
2307 	if (unlikely(in_nmi()))
2308 		return;
2309 
2310 	/*
2311 	 * prevent recursion, since the user stack tracing may
2312 	 * trigger other kernel events.
2313 	 */
2314 	preempt_disable();
2315 	if (__this_cpu_read(user_stack_count))
2316 		goto out;
2317 
2318 	__this_cpu_inc(user_stack_count);
2319 
2320 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2321 					  sizeof(*entry), flags, pc);
2322 	if (!event)
2323 		goto out_drop_count;
2324 	entry	= ring_buffer_event_data(event);
2325 
2326 	entry->tgid		= current->tgid;
2327 	memset(&entry->caller, 0, sizeof(entry->caller));
2328 
2329 	trace.nr_entries	= 0;
2330 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2331 	trace.skip		= 0;
2332 	trace.entries		= entry->caller;
2333 
2334 	save_stack_trace_user(&trace);
2335 	if (!call_filter_check_discard(call, entry, buffer, event))
2336 		__buffer_unlock_commit(buffer, event);
2337 
2338  out_drop_count:
2339 	__this_cpu_dec(user_stack_count);
2340  out:
2341 	preempt_enable();
2342 }
2343 
2344 #ifdef UNUSED
2345 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2346 {
2347 	ftrace_trace_userstack(tr, flags, preempt_count());
2348 }
2349 #endif /* UNUSED */
2350 
2351 #endif /* CONFIG_STACKTRACE */
2352 
2353 /* created for use with alloc_percpu */
2354 struct trace_buffer_struct {
2355 	int nesting;
2356 	char buffer[4][TRACE_BUF_SIZE];
2357 };
2358 
2359 static struct trace_buffer_struct *trace_percpu_buffer;
2360 
2361 /*
2362  * Thise allows for lockless recording.  If we're nested too deeply, then
2363  * this returns NULL.
2364  */
2365 static char *get_trace_buf(void)
2366 {
2367 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2368 
2369 	if (!buffer || buffer->nesting >= 4)
2370 		return NULL;
2371 
2372 	return &buffer->buffer[buffer->nesting++][0];
2373 }
2374 
2375 static void put_trace_buf(void)
2376 {
2377 	this_cpu_dec(trace_percpu_buffer->nesting);
2378 }
2379 
2380 static int alloc_percpu_trace_buffer(void)
2381 {
2382 	struct trace_buffer_struct *buffers;
2383 
2384 	buffers = alloc_percpu(struct trace_buffer_struct);
2385 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2386 		return -ENOMEM;
2387 
2388 	trace_percpu_buffer = buffers;
2389 	return 0;
2390 }
2391 
2392 static int buffers_allocated;
2393 
2394 void trace_printk_init_buffers(void)
2395 {
2396 	if (buffers_allocated)
2397 		return;
2398 
2399 	if (alloc_percpu_trace_buffer())
2400 		return;
2401 
2402 	/* trace_printk() is for debug use only. Don't use it in production. */
2403 
2404 	pr_warn("\n");
2405 	pr_warn("**********************************************************\n");
2406 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2407 	pr_warn("**                                                      **\n");
2408 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2409 	pr_warn("**                                                      **\n");
2410 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2411 	pr_warn("** unsafe for production use.                           **\n");
2412 	pr_warn("**                                                      **\n");
2413 	pr_warn("** If you see this message and you are not debugging    **\n");
2414 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2415 	pr_warn("**                                                      **\n");
2416 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417 	pr_warn("**********************************************************\n");
2418 
2419 	/* Expand the buffers to set size */
2420 	tracing_update_buffers();
2421 
2422 	buffers_allocated = 1;
2423 
2424 	/*
2425 	 * trace_printk_init_buffers() can be called by modules.
2426 	 * If that happens, then we need to start cmdline recording
2427 	 * directly here. If the global_trace.buffer is already
2428 	 * allocated here, then this was called by module code.
2429 	 */
2430 	if (global_trace.trace_buffer.buffer)
2431 		tracing_start_cmdline_record();
2432 }
2433 
2434 void trace_printk_start_comm(void)
2435 {
2436 	/* Start tracing comms if trace printk is set */
2437 	if (!buffers_allocated)
2438 		return;
2439 	tracing_start_cmdline_record();
2440 }
2441 
2442 static void trace_printk_start_stop_comm(int enabled)
2443 {
2444 	if (!buffers_allocated)
2445 		return;
2446 
2447 	if (enabled)
2448 		tracing_start_cmdline_record();
2449 	else
2450 		tracing_stop_cmdline_record();
2451 }
2452 
2453 /**
2454  * trace_vbprintk - write binary msg to tracing buffer
2455  *
2456  */
2457 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2458 {
2459 	struct trace_event_call *call = &event_bprint;
2460 	struct ring_buffer_event *event;
2461 	struct ring_buffer *buffer;
2462 	struct trace_array *tr = &global_trace;
2463 	struct bprint_entry *entry;
2464 	unsigned long flags;
2465 	char *tbuffer;
2466 	int len = 0, size, pc;
2467 
2468 	if (unlikely(tracing_selftest_running || tracing_disabled))
2469 		return 0;
2470 
2471 	/* Don't pollute graph traces with trace_vprintk internals */
2472 	pause_graph_tracing();
2473 
2474 	pc = preempt_count();
2475 	preempt_disable_notrace();
2476 
2477 	tbuffer = get_trace_buf();
2478 	if (!tbuffer) {
2479 		len = 0;
2480 		goto out_nobuffer;
2481 	}
2482 
2483 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2484 
2485 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2486 		goto out;
2487 
2488 	local_save_flags(flags);
2489 	size = sizeof(*entry) + sizeof(u32) * len;
2490 	buffer = tr->trace_buffer.buffer;
2491 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2492 					  flags, pc);
2493 	if (!event)
2494 		goto out;
2495 	entry = ring_buffer_event_data(event);
2496 	entry->ip			= ip;
2497 	entry->fmt			= fmt;
2498 
2499 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2500 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2501 		__buffer_unlock_commit(buffer, event);
2502 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2503 	}
2504 
2505 out:
2506 	put_trace_buf();
2507 
2508 out_nobuffer:
2509 	preempt_enable_notrace();
2510 	unpause_graph_tracing();
2511 
2512 	return len;
2513 }
2514 EXPORT_SYMBOL_GPL(trace_vbprintk);
2515 
2516 static int
2517 __trace_array_vprintk(struct ring_buffer *buffer,
2518 		      unsigned long ip, const char *fmt, va_list args)
2519 {
2520 	struct trace_event_call *call = &event_print;
2521 	struct ring_buffer_event *event;
2522 	int len = 0, size, pc;
2523 	struct print_entry *entry;
2524 	unsigned long flags;
2525 	char *tbuffer;
2526 
2527 	if (tracing_disabled || tracing_selftest_running)
2528 		return 0;
2529 
2530 	/* Don't pollute graph traces with trace_vprintk internals */
2531 	pause_graph_tracing();
2532 
2533 	pc = preempt_count();
2534 	preempt_disable_notrace();
2535 
2536 
2537 	tbuffer = get_trace_buf();
2538 	if (!tbuffer) {
2539 		len = 0;
2540 		goto out_nobuffer;
2541 	}
2542 
2543 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2544 
2545 	local_save_flags(flags);
2546 	size = sizeof(*entry) + len + 1;
2547 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2548 					  flags, pc);
2549 	if (!event)
2550 		goto out;
2551 	entry = ring_buffer_event_data(event);
2552 	entry->ip = ip;
2553 
2554 	memcpy(&entry->buf, tbuffer, len + 1);
2555 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2556 		__buffer_unlock_commit(buffer, event);
2557 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2558 	}
2559 
2560 out:
2561 	put_trace_buf();
2562 
2563 out_nobuffer:
2564 	preempt_enable_notrace();
2565 	unpause_graph_tracing();
2566 
2567 	return len;
2568 }
2569 
2570 int trace_array_vprintk(struct trace_array *tr,
2571 			unsigned long ip, const char *fmt, va_list args)
2572 {
2573 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2574 }
2575 
2576 int trace_array_printk(struct trace_array *tr,
2577 		       unsigned long ip, const char *fmt, ...)
2578 {
2579 	int ret;
2580 	va_list ap;
2581 
2582 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2583 		return 0;
2584 
2585 	va_start(ap, fmt);
2586 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2587 	va_end(ap);
2588 	return ret;
2589 }
2590 
2591 int trace_array_printk_buf(struct ring_buffer *buffer,
2592 			   unsigned long ip, const char *fmt, ...)
2593 {
2594 	int ret;
2595 	va_list ap;
2596 
2597 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2598 		return 0;
2599 
2600 	va_start(ap, fmt);
2601 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2602 	va_end(ap);
2603 	return ret;
2604 }
2605 
2606 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2607 {
2608 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2609 }
2610 EXPORT_SYMBOL_GPL(trace_vprintk);
2611 
2612 static void trace_iterator_increment(struct trace_iterator *iter)
2613 {
2614 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2615 
2616 	iter->idx++;
2617 	if (buf_iter)
2618 		ring_buffer_read(buf_iter, NULL);
2619 }
2620 
2621 static struct trace_entry *
2622 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2623 		unsigned long *lost_events)
2624 {
2625 	struct ring_buffer_event *event;
2626 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2627 
2628 	if (buf_iter)
2629 		event = ring_buffer_iter_peek(buf_iter, ts);
2630 	else
2631 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2632 					 lost_events);
2633 
2634 	if (event) {
2635 		iter->ent_size = ring_buffer_event_length(event);
2636 		return ring_buffer_event_data(event);
2637 	}
2638 	iter->ent_size = 0;
2639 	return NULL;
2640 }
2641 
2642 static struct trace_entry *
2643 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2644 		  unsigned long *missing_events, u64 *ent_ts)
2645 {
2646 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2647 	struct trace_entry *ent, *next = NULL;
2648 	unsigned long lost_events = 0, next_lost = 0;
2649 	int cpu_file = iter->cpu_file;
2650 	u64 next_ts = 0, ts;
2651 	int next_cpu = -1;
2652 	int next_size = 0;
2653 	int cpu;
2654 
2655 	/*
2656 	 * If we are in a per_cpu trace file, don't bother by iterating over
2657 	 * all cpu and peek directly.
2658 	 */
2659 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2660 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2661 			return NULL;
2662 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2663 		if (ent_cpu)
2664 			*ent_cpu = cpu_file;
2665 
2666 		return ent;
2667 	}
2668 
2669 	for_each_tracing_cpu(cpu) {
2670 
2671 		if (ring_buffer_empty_cpu(buffer, cpu))
2672 			continue;
2673 
2674 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2675 
2676 		/*
2677 		 * Pick the entry with the smallest timestamp:
2678 		 */
2679 		if (ent && (!next || ts < next_ts)) {
2680 			next = ent;
2681 			next_cpu = cpu;
2682 			next_ts = ts;
2683 			next_lost = lost_events;
2684 			next_size = iter->ent_size;
2685 		}
2686 	}
2687 
2688 	iter->ent_size = next_size;
2689 
2690 	if (ent_cpu)
2691 		*ent_cpu = next_cpu;
2692 
2693 	if (ent_ts)
2694 		*ent_ts = next_ts;
2695 
2696 	if (missing_events)
2697 		*missing_events = next_lost;
2698 
2699 	return next;
2700 }
2701 
2702 /* Find the next real entry, without updating the iterator itself */
2703 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2704 					  int *ent_cpu, u64 *ent_ts)
2705 {
2706 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2707 }
2708 
2709 /* Find the next real entry, and increment the iterator to the next entry */
2710 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2711 {
2712 	iter->ent = __find_next_entry(iter, &iter->cpu,
2713 				      &iter->lost_events, &iter->ts);
2714 
2715 	if (iter->ent)
2716 		trace_iterator_increment(iter);
2717 
2718 	return iter->ent ? iter : NULL;
2719 }
2720 
2721 static void trace_consume(struct trace_iterator *iter)
2722 {
2723 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2724 			    &iter->lost_events);
2725 }
2726 
2727 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2728 {
2729 	struct trace_iterator *iter = m->private;
2730 	int i = (int)*pos;
2731 	void *ent;
2732 
2733 	WARN_ON_ONCE(iter->leftover);
2734 
2735 	(*pos)++;
2736 
2737 	/* can't go backwards */
2738 	if (iter->idx > i)
2739 		return NULL;
2740 
2741 	if (iter->idx < 0)
2742 		ent = trace_find_next_entry_inc(iter);
2743 	else
2744 		ent = iter;
2745 
2746 	while (ent && iter->idx < i)
2747 		ent = trace_find_next_entry_inc(iter);
2748 
2749 	iter->pos = *pos;
2750 
2751 	return ent;
2752 }
2753 
2754 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2755 {
2756 	struct ring_buffer_event *event;
2757 	struct ring_buffer_iter *buf_iter;
2758 	unsigned long entries = 0;
2759 	u64 ts;
2760 
2761 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2762 
2763 	buf_iter = trace_buffer_iter(iter, cpu);
2764 	if (!buf_iter)
2765 		return;
2766 
2767 	ring_buffer_iter_reset(buf_iter);
2768 
2769 	/*
2770 	 * We could have the case with the max latency tracers
2771 	 * that a reset never took place on a cpu. This is evident
2772 	 * by the timestamp being before the start of the buffer.
2773 	 */
2774 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2775 		if (ts >= iter->trace_buffer->time_start)
2776 			break;
2777 		entries++;
2778 		ring_buffer_read(buf_iter, NULL);
2779 	}
2780 
2781 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2782 }
2783 
2784 /*
2785  * The current tracer is copied to avoid a global locking
2786  * all around.
2787  */
2788 static void *s_start(struct seq_file *m, loff_t *pos)
2789 {
2790 	struct trace_iterator *iter = m->private;
2791 	struct trace_array *tr = iter->tr;
2792 	int cpu_file = iter->cpu_file;
2793 	void *p = NULL;
2794 	loff_t l = 0;
2795 	int cpu;
2796 
2797 	/*
2798 	 * copy the tracer to avoid using a global lock all around.
2799 	 * iter->trace is a copy of current_trace, the pointer to the
2800 	 * name may be used instead of a strcmp(), as iter->trace->name
2801 	 * will point to the same string as current_trace->name.
2802 	 */
2803 	mutex_lock(&trace_types_lock);
2804 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2805 		*iter->trace = *tr->current_trace;
2806 	mutex_unlock(&trace_types_lock);
2807 
2808 #ifdef CONFIG_TRACER_MAX_TRACE
2809 	if (iter->snapshot && iter->trace->use_max_tr)
2810 		return ERR_PTR(-EBUSY);
2811 #endif
2812 
2813 	if (!iter->snapshot)
2814 		atomic_inc(&trace_record_cmdline_disabled);
2815 
2816 	if (*pos != iter->pos) {
2817 		iter->ent = NULL;
2818 		iter->cpu = 0;
2819 		iter->idx = -1;
2820 
2821 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2822 			for_each_tracing_cpu(cpu)
2823 				tracing_iter_reset(iter, cpu);
2824 		} else
2825 			tracing_iter_reset(iter, cpu_file);
2826 
2827 		iter->leftover = 0;
2828 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2829 			;
2830 
2831 	} else {
2832 		/*
2833 		 * If we overflowed the seq_file before, then we want
2834 		 * to just reuse the trace_seq buffer again.
2835 		 */
2836 		if (iter->leftover)
2837 			p = iter;
2838 		else {
2839 			l = *pos - 1;
2840 			p = s_next(m, p, &l);
2841 		}
2842 	}
2843 
2844 	trace_event_read_lock();
2845 	trace_access_lock(cpu_file);
2846 	return p;
2847 }
2848 
2849 static void s_stop(struct seq_file *m, void *p)
2850 {
2851 	struct trace_iterator *iter = m->private;
2852 
2853 #ifdef CONFIG_TRACER_MAX_TRACE
2854 	if (iter->snapshot && iter->trace->use_max_tr)
2855 		return;
2856 #endif
2857 
2858 	if (!iter->snapshot)
2859 		atomic_dec(&trace_record_cmdline_disabled);
2860 
2861 	trace_access_unlock(iter->cpu_file);
2862 	trace_event_read_unlock();
2863 }
2864 
2865 static void
2866 get_total_entries(struct trace_buffer *buf,
2867 		  unsigned long *total, unsigned long *entries)
2868 {
2869 	unsigned long count;
2870 	int cpu;
2871 
2872 	*total = 0;
2873 	*entries = 0;
2874 
2875 	for_each_tracing_cpu(cpu) {
2876 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2877 		/*
2878 		 * If this buffer has skipped entries, then we hold all
2879 		 * entries for the trace and we need to ignore the
2880 		 * ones before the time stamp.
2881 		 */
2882 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2883 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2884 			/* total is the same as the entries */
2885 			*total += count;
2886 		} else
2887 			*total += count +
2888 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2889 		*entries += count;
2890 	}
2891 }
2892 
2893 static void print_lat_help_header(struct seq_file *m)
2894 {
2895 	seq_puts(m, "#                  _------=> CPU#            \n"
2896 		    "#                 / _-----=> irqs-off        \n"
2897 		    "#                | / _----=> need-resched    \n"
2898 		    "#                || / _---=> hardirq/softirq \n"
2899 		    "#                ||| / _--=> preempt-depth   \n"
2900 		    "#                |||| /     delay            \n"
2901 		    "#  cmd     pid   ||||| time  |   caller      \n"
2902 		    "#     \\   /      |||||  \\    |   /         \n");
2903 }
2904 
2905 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2906 {
2907 	unsigned long total;
2908 	unsigned long entries;
2909 
2910 	get_total_entries(buf, &total, &entries);
2911 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2912 		   entries, total, num_online_cpus());
2913 	seq_puts(m, "#\n");
2914 }
2915 
2916 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2917 {
2918 	print_event_info(buf, m);
2919 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2920 		    "#              | |       |          |         |\n");
2921 }
2922 
2923 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2924 {
2925 	print_event_info(buf, m);
2926 	seq_puts(m, "#                              _-----=> irqs-off\n"
2927 		    "#                             / _----=> need-resched\n"
2928 		    "#                            | / _---=> hardirq/softirq\n"
2929 		    "#                            || / _--=> preempt-depth\n"
2930 		    "#                            ||| /     delay\n"
2931 		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2932 		    "#              | |       |   ||||       |         |\n");
2933 }
2934 
2935 void
2936 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2937 {
2938 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2939 	struct trace_buffer *buf = iter->trace_buffer;
2940 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2941 	struct tracer *type = iter->trace;
2942 	unsigned long entries;
2943 	unsigned long total;
2944 	const char *name = "preemption";
2945 
2946 	name = type->name;
2947 
2948 	get_total_entries(buf, &total, &entries);
2949 
2950 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2951 		   name, UTS_RELEASE);
2952 	seq_puts(m, "# -----------------------------------"
2953 		 "---------------------------------\n");
2954 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2955 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2956 		   nsecs_to_usecs(data->saved_latency),
2957 		   entries,
2958 		   total,
2959 		   buf->cpu,
2960 #if defined(CONFIG_PREEMPT_NONE)
2961 		   "server",
2962 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2963 		   "desktop",
2964 #elif defined(CONFIG_PREEMPT)
2965 		   "preempt",
2966 #else
2967 		   "unknown",
2968 #endif
2969 		   /* These are reserved for later use */
2970 		   0, 0, 0, 0);
2971 #ifdef CONFIG_SMP
2972 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2973 #else
2974 	seq_puts(m, ")\n");
2975 #endif
2976 	seq_puts(m, "#    -----------------\n");
2977 	seq_printf(m, "#    | task: %.16s-%d "
2978 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2979 		   data->comm, data->pid,
2980 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2981 		   data->policy, data->rt_priority);
2982 	seq_puts(m, "#    -----------------\n");
2983 
2984 	if (data->critical_start) {
2985 		seq_puts(m, "#  => started at: ");
2986 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2987 		trace_print_seq(m, &iter->seq);
2988 		seq_puts(m, "\n#  => ended at:   ");
2989 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2990 		trace_print_seq(m, &iter->seq);
2991 		seq_puts(m, "\n#\n");
2992 	}
2993 
2994 	seq_puts(m, "#\n");
2995 }
2996 
2997 static void test_cpu_buff_start(struct trace_iterator *iter)
2998 {
2999 	struct trace_seq *s = &iter->seq;
3000 	struct trace_array *tr = iter->tr;
3001 
3002 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3003 		return;
3004 
3005 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3006 		return;
3007 
3008 	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3009 		return;
3010 
3011 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3012 		return;
3013 
3014 	if (iter->started)
3015 		cpumask_set_cpu(iter->cpu, iter->started);
3016 
3017 	/* Don't print started cpu buffer for the first entry of the trace */
3018 	if (iter->idx > 1)
3019 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3020 				iter->cpu);
3021 }
3022 
3023 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3024 {
3025 	struct trace_array *tr = iter->tr;
3026 	struct trace_seq *s = &iter->seq;
3027 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3028 	struct trace_entry *entry;
3029 	struct trace_event *event;
3030 
3031 	entry = iter->ent;
3032 
3033 	test_cpu_buff_start(iter);
3034 
3035 	event = ftrace_find_event(entry->type);
3036 
3037 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3038 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3039 			trace_print_lat_context(iter);
3040 		else
3041 			trace_print_context(iter);
3042 	}
3043 
3044 	if (trace_seq_has_overflowed(s))
3045 		return TRACE_TYPE_PARTIAL_LINE;
3046 
3047 	if (event)
3048 		return event->funcs->trace(iter, sym_flags, event);
3049 
3050 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3051 
3052 	return trace_handle_return(s);
3053 }
3054 
3055 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3056 {
3057 	struct trace_array *tr = iter->tr;
3058 	struct trace_seq *s = &iter->seq;
3059 	struct trace_entry *entry;
3060 	struct trace_event *event;
3061 
3062 	entry = iter->ent;
3063 
3064 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3065 		trace_seq_printf(s, "%d %d %llu ",
3066 				 entry->pid, iter->cpu, iter->ts);
3067 
3068 	if (trace_seq_has_overflowed(s))
3069 		return TRACE_TYPE_PARTIAL_LINE;
3070 
3071 	event = ftrace_find_event(entry->type);
3072 	if (event)
3073 		return event->funcs->raw(iter, 0, event);
3074 
3075 	trace_seq_printf(s, "%d ?\n", entry->type);
3076 
3077 	return trace_handle_return(s);
3078 }
3079 
3080 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3081 {
3082 	struct trace_array *tr = iter->tr;
3083 	struct trace_seq *s = &iter->seq;
3084 	unsigned char newline = '\n';
3085 	struct trace_entry *entry;
3086 	struct trace_event *event;
3087 
3088 	entry = iter->ent;
3089 
3090 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3091 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3092 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3093 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3094 		if (trace_seq_has_overflowed(s))
3095 			return TRACE_TYPE_PARTIAL_LINE;
3096 	}
3097 
3098 	event = ftrace_find_event(entry->type);
3099 	if (event) {
3100 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3101 		if (ret != TRACE_TYPE_HANDLED)
3102 			return ret;
3103 	}
3104 
3105 	SEQ_PUT_FIELD(s, newline);
3106 
3107 	return trace_handle_return(s);
3108 }
3109 
3110 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3111 {
3112 	struct trace_array *tr = iter->tr;
3113 	struct trace_seq *s = &iter->seq;
3114 	struct trace_entry *entry;
3115 	struct trace_event *event;
3116 
3117 	entry = iter->ent;
3118 
3119 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3120 		SEQ_PUT_FIELD(s, entry->pid);
3121 		SEQ_PUT_FIELD(s, iter->cpu);
3122 		SEQ_PUT_FIELD(s, iter->ts);
3123 		if (trace_seq_has_overflowed(s))
3124 			return TRACE_TYPE_PARTIAL_LINE;
3125 	}
3126 
3127 	event = ftrace_find_event(entry->type);
3128 	return event ? event->funcs->binary(iter, 0, event) :
3129 		TRACE_TYPE_HANDLED;
3130 }
3131 
3132 int trace_empty(struct trace_iterator *iter)
3133 {
3134 	struct ring_buffer_iter *buf_iter;
3135 	int cpu;
3136 
3137 	/* If we are looking at one CPU buffer, only check that one */
3138 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3139 		cpu = iter->cpu_file;
3140 		buf_iter = trace_buffer_iter(iter, cpu);
3141 		if (buf_iter) {
3142 			if (!ring_buffer_iter_empty(buf_iter))
3143 				return 0;
3144 		} else {
3145 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3146 				return 0;
3147 		}
3148 		return 1;
3149 	}
3150 
3151 	for_each_tracing_cpu(cpu) {
3152 		buf_iter = trace_buffer_iter(iter, cpu);
3153 		if (buf_iter) {
3154 			if (!ring_buffer_iter_empty(buf_iter))
3155 				return 0;
3156 		} else {
3157 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3158 				return 0;
3159 		}
3160 	}
3161 
3162 	return 1;
3163 }
3164 
3165 /*  Called with trace_event_read_lock() held. */
3166 enum print_line_t print_trace_line(struct trace_iterator *iter)
3167 {
3168 	struct trace_array *tr = iter->tr;
3169 	unsigned long trace_flags = tr->trace_flags;
3170 	enum print_line_t ret;
3171 
3172 	if (iter->lost_events) {
3173 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3174 				 iter->cpu, iter->lost_events);
3175 		if (trace_seq_has_overflowed(&iter->seq))
3176 			return TRACE_TYPE_PARTIAL_LINE;
3177 	}
3178 
3179 	if (iter->trace && iter->trace->print_line) {
3180 		ret = iter->trace->print_line(iter);
3181 		if (ret != TRACE_TYPE_UNHANDLED)
3182 			return ret;
3183 	}
3184 
3185 	if (iter->ent->type == TRACE_BPUTS &&
3186 			trace_flags & TRACE_ITER_PRINTK &&
3187 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3188 		return trace_print_bputs_msg_only(iter);
3189 
3190 	if (iter->ent->type == TRACE_BPRINT &&
3191 			trace_flags & TRACE_ITER_PRINTK &&
3192 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3193 		return trace_print_bprintk_msg_only(iter);
3194 
3195 	if (iter->ent->type == TRACE_PRINT &&
3196 			trace_flags & TRACE_ITER_PRINTK &&
3197 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3198 		return trace_print_printk_msg_only(iter);
3199 
3200 	if (trace_flags & TRACE_ITER_BIN)
3201 		return print_bin_fmt(iter);
3202 
3203 	if (trace_flags & TRACE_ITER_HEX)
3204 		return print_hex_fmt(iter);
3205 
3206 	if (trace_flags & TRACE_ITER_RAW)
3207 		return print_raw_fmt(iter);
3208 
3209 	return print_trace_fmt(iter);
3210 }
3211 
3212 void trace_latency_header(struct seq_file *m)
3213 {
3214 	struct trace_iterator *iter = m->private;
3215 	struct trace_array *tr = iter->tr;
3216 
3217 	/* print nothing if the buffers are empty */
3218 	if (trace_empty(iter))
3219 		return;
3220 
3221 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3222 		print_trace_header(m, iter);
3223 
3224 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3225 		print_lat_help_header(m);
3226 }
3227 
3228 void trace_default_header(struct seq_file *m)
3229 {
3230 	struct trace_iterator *iter = m->private;
3231 	struct trace_array *tr = iter->tr;
3232 	unsigned long trace_flags = tr->trace_flags;
3233 
3234 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3235 		return;
3236 
3237 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3238 		/* print nothing if the buffers are empty */
3239 		if (trace_empty(iter))
3240 			return;
3241 		print_trace_header(m, iter);
3242 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3243 			print_lat_help_header(m);
3244 	} else {
3245 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3246 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3247 				print_func_help_header_irq(iter->trace_buffer, m);
3248 			else
3249 				print_func_help_header(iter->trace_buffer, m);
3250 		}
3251 	}
3252 }
3253 
3254 static void test_ftrace_alive(struct seq_file *m)
3255 {
3256 	if (!ftrace_is_dead())
3257 		return;
3258 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3259 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3260 }
3261 
3262 #ifdef CONFIG_TRACER_MAX_TRACE
3263 static void show_snapshot_main_help(struct seq_file *m)
3264 {
3265 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3266 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3267 		    "#                      Takes a snapshot of the main buffer.\n"
3268 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3269 		    "#                      (Doesn't have to be '2' works with any number that\n"
3270 		    "#                       is not a '0' or '1')\n");
3271 }
3272 
3273 static void show_snapshot_percpu_help(struct seq_file *m)
3274 {
3275 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3276 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3277 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3278 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3279 #else
3280 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3281 		    "#                     Must use main snapshot file to allocate.\n");
3282 #endif
3283 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3284 		    "#                      (Doesn't have to be '2' works with any number that\n"
3285 		    "#                       is not a '0' or '1')\n");
3286 }
3287 
3288 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3289 {
3290 	if (iter->tr->allocated_snapshot)
3291 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3292 	else
3293 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3294 
3295 	seq_puts(m, "# Snapshot commands:\n");
3296 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3297 		show_snapshot_main_help(m);
3298 	else
3299 		show_snapshot_percpu_help(m);
3300 }
3301 #else
3302 /* Should never be called */
3303 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3304 #endif
3305 
3306 static int s_show(struct seq_file *m, void *v)
3307 {
3308 	struct trace_iterator *iter = v;
3309 	int ret;
3310 
3311 	if (iter->ent == NULL) {
3312 		if (iter->tr) {
3313 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3314 			seq_puts(m, "#\n");
3315 			test_ftrace_alive(m);
3316 		}
3317 		if (iter->snapshot && trace_empty(iter))
3318 			print_snapshot_help(m, iter);
3319 		else if (iter->trace && iter->trace->print_header)
3320 			iter->trace->print_header(m);
3321 		else
3322 			trace_default_header(m);
3323 
3324 	} else if (iter->leftover) {
3325 		/*
3326 		 * If we filled the seq_file buffer earlier, we
3327 		 * want to just show it now.
3328 		 */
3329 		ret = trace_print_seq(m, &iter->seq);
3330 
3331 		/* ret should this time be zero, but you never know */
3332 		iter->leftover = ret;
3333 
3334 	} else {
3335 		print_trace_line(iter);
3336 		ret = trace_print_seq(m, &iter->seq);
3337 		/*
3338 		 * If we overflow the seq_file buffer, then it will
3339 		 * ask us for this data again at start up.
3340 		 * Use that instead.
3341 		 *  ret is 0 if seq_file write succeeded.
3342 		 *        -1 otherwise.
3343 		 */
3344 		iter->leftover = ret;
3345 	}
3346 
3347 	return 0;
3348 }
3349 
3350 /*
3351  * Should be used after trace_array_get(), trace_types_lock
3352  * ensures that i_cdev was already initialized.
3353  */
3354 static inline int tracing_get_cpu(struct inode *inode)
3355 {
3356 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3357 		return (long)inode->i_cdev - 1;
3358 	return RING_BUFFER_ALL_CPUS;
3359 }
3360 
3361 static const struct seq_operations tracer_seq_ops = {
3362 	.start		= s_start,
3363 	.next		= s_next,
3364 	.stop		= s_stop,
3365 	.show		= s_show,
3366 };
3367 
3368 static struct trace_iterator *
3369 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3370 {
3371 	struct trace_array *tr = inode->i_private;
3372 	struct trace_iterator *iter;
3373 	int cpu;
3374 
3375 	if (tracing_disabled)
3376 		return ERR_PTR(-ENODEV);
3377 
3378 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3379 	if (!iter)
3380 		return ERR_PTR(-ENOMEM);
3381 
3382 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3383 				    GFP_KERNEL);
3384 	if (!iter->buffer_iter)
3385 		goto release;
3386 
3387 	/*
3388 	 * We make a copy of the current tracer to avoid concurrent
3389 	 * changes on it while we are reading.
3390 	 */
3391 	mutex_lock(&trace_types_lock);
3392 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3393 	if (!iter->trace)
3394 		goto fail;
3395 
3396 	*iter->trace = *tr->current_trace;
3397 
3398 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3399 		goto fail;
3400 
3401 	iter->tr = tr;
3402 
3403 #ifdef CONFIG_TRACER_MAX_TRACE
3404 	/* Currently only the top directory has a snapshot */
3405 	if (tr->current_trace->print_max || snapshot)
3406 		iter->trace_buffer = &tr->max_buffer;
3407 	else
3408 #endif
3409 		iter->trace_buffer = &tr->trace_buffer;
3410 	iter->snapshot = snapshot;
3411 	iter->pos = -1;
3412 	iter->cpu_file = tracing_get_cpu(inode);
3413 	mutex_init(&iter->mutex);
3414 
3415 	/* Notify the tracer early; before we stop tracing. */
3416 	if (iter->trace && iter->trace->open)
3417 		iter->trace->open(iter);
3418 
3419 	/* Annotate start of buffers if we had overruns */
3420 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3421 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3422 
3423 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3424 	if (trace_clocks[tr->clock_id].in_ns)
3425 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3426 
3427 	/* stop the trace while dumping if we are not opening "snapshot" */
3428 	if (!iter->snapshot)
3429 		tracing_stop_tr(tr);
3430 
3431 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3432 		for_each_tracing_cpu(cpu) {
3433 			iter->buffer_iter[cpu] =
3434 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3435 		}
3436 		ring_buffer_read_prepare_sync();
3437 		for_each_tracing_cpu(cpu) {
3438 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3439 			tracing_iter_reset(iter, cpu);
3440 		}
3441 	} else {
3442 		cpu = iter->cpu_file;
3443 		iter->buffer_iter[cpu] =
3444 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3445 		ring_buffer_read_prepare_sync();
3446 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3447 		tracing_iter_reset(iter, cpu);
3448 	}
3449 
3450 	mutex_unlock(&trace_types_lock);
3451 
3452 	return iter;
3453 
3454  fail:
3455 	mutex_unlock(&trace_types_lock);
3456 	kfree(iter->trace);
3457 	kfree(iter->buffer_iter);
3458 release:
3459 	seq_release_private(inode, file);
3460 	return ERR_PTR(-ENOMEM);
3461 }
3462 
3463 int tracing_open_generic(struct inode *inode, struct file *filp)
3464 {
3465 	if (tracing_disabled)
3466 		return -ENODEV;
3467 
3468 	filp->private_data = inode->i_private;
3469 	return 0;
3470 }
3471 
3472 bool tracing_is_disabled(void)
3473 {
3474 	return (tracing_disabled) ? true: false;
3475 }
3476 
3477 /*
3478  * Open and update trace_array ref count.
3479  * Must have the current trace_array passed to it.
3480  */
3481 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3482 {
3483 	struct trace_array *tr = inode->i_private;
3484 
3485 	if (tracing_disabled)
3486 		return -ENODEV;
3487 
3488 	if (trace_array_get(tr) < 0)
3489 		return -ENODEV;
3490 
3491 	filp->private_data = inode->i_private;
3492 
3493 	return 0;
3494 }
3495 
3496 static int tracing_release(struct inode *inode, struct file *file)
3497 {
3498 	struct trace_array *tr = inode->i_private;
3499 	struct seq_file *m = file->private_data;
3500 	struct trace_iterator *iter;
3501 	int cpu;
3502 
3503 	if (!(file->f_mode & FMODE_READ)) {
3504 		trace_array_put(tr);
3505 		return 0;
3506 	}
3507 
3508 	/* Writes do not use seq_file */
3509 	iter = m->private;
3510 	mutex_lock(&trace_types_lock);
3511 
3512 	for_each_tracing_cpu(cpu) {
3513 		if (iter->buffer_iter[cpu])
3514 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3515 	}
3516 
3517 	if (iter->trace && iter->trace->close)
3518 		iter->trace->close(iter);
3519 
3520 	if (!iter->snapshot)
3521 		/* reenable tracing if it was previously enabled */
3522 		tracing_start_tr(tr);
3523 
3524 	__trace_array_put(tr);
3525 
3526 	mutex_unlock(&trace_types_lock);
3527 
3528 	mutex_destroy(&iter->mutex);
3529 	free_cpumask_var(iter->started);
3530 	kfree(iter->trace);
3531 	kfree(iter->buffer_iter);
3532 	seq_release_private(inode, file);
3533 
3534 	return 0;
3535 }
3536 
3537 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3538 {
3539 	struct trace_array *tr = inode->i_private;
3540 
3541 	trace_array_put(tr);
3542 	return 0;
3543 }
3544 
3545 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3546 {
3547 	struct trace_array *tr = inode->i_private;
3548 
3549 	trace_array_put(tr);
3550 
3551 	return single_release(inode, file);
3552 }
3553 
3554 static int tracing_open(struct inode *inode, struct file *file)
3555 {
3556 	struct trace_array *tr = inode->i_private;
3557 	struct trace_iterator *iter;
3558 	int ret = 0;
3559 
3560 	if (trace_array_get(tr) < 0)
3561 		return -ENODEV;
3562 
3563 	/* If this file was open for write, then erase contents */
3564 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3565 		int cpu = tracing_get_cpu(inode);
3566 
3567 		if (cpu == RING_BUFFER_ALL_CPUS)
3568 			tracing_reset_online_cpus(&tr->trace_buffer);
3569 		else
3570 			tracing_reset(&tr->trace_buffer, cpu);
3571 	}
3572 
3573 	if (file->f_mode & FMODE_READ) {
3574 		iter = __tracing_open(inode, file, false);
3575 		if (IS_ERR(iter))
3576 			ret = PTR_ERR(iter);
3577 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3578 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3579 	}
3580 
3581 	if (ret < 0)
3582 		trace_array_put(tr);
3583 
3584 	return ret;
3585 }
3586 
3587 /*
3588  * Some tracers are not suitable for instance buffers.
3589  * A tracer is always available for the global array (toplevel)
3590  * or if it explicitly states that it is.
3591  */
3592 static bool
3593 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3594 {
3595 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3596 }
3597 
3598 /* Find the next tracer that this trace array may use */
3599 static struct tracer *
3600 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3601 {
3602 	while (t && !trace_ok_for_array(t, tr))
3603 		t = t->next;
3604 
3605 	return t;
3606 }
3607 
3608 static void *
3609 t_next(struct seq_file *m, void *v, loff_t *pos)
3610 {
3611 	struct trace_array *tr = m->private;
3612 	struct tracer *t = v;
3613 
3614 	(*pos)++;
3615 
3616 	if (t)
3617 		t = get_tracer_for_array(tr, t->next);
3618 
3619 	return t;
3620 }
3621 
3622 static void *t_start(struct seq_file *m, loff_t *pos)
3623 {
3624 	struct trace_array *tr = m->private;
3625 	struct tracer *t;
3626 	loff_t l = 0;
3627 
3628 	mutex_lock(&trace_types_lock);
3629 
3630 	t = get_tracer_for_array(tr, trace_types);
3631 	for (; t && l < *pos; t = t_next(m, t, &l))
3632 			;
3633 
3634 	return t;
3635 }
3636 
3637 static void t_stop(struct seq_file *m, void *p)
3638 {
3639 	mutex_unlock(&trace_types_lock);
3640 }
3641 
3642 static int t_show(struct seq_file *m, void *v)
3643 {
3644 	struct tracer *t = v;
3645 
3646 	if (!t)
3647 		return 0;
3648 
3649 	seq_puts(m, t->name);
3650 	if (t->next)
3651 		seq_putc(m, ' ');
3652 	else
3653 		seq_putc(m, '\n');
3654 
3655 	return 0;
3656 }
3657 
3658 static const struct seq_operations show_traces_seq_ops = {
3659 	.start		= t_start,
3660 	.next		= t_next,
3661 	.stop		= t_stop,
3662 	.show		= t_show,
3663 };
3664 
3665 static int show_traces_open(struct inode *inode, struct file *file)
3666 {
3667 	struct trace_array *tr = inode->i_private;
3668 	struct seq_file *m;
3669 	int ret;
3670 
3671 	if (tracing_disabled)
3672 		return -ENODEV;
3673 
3674 	ret = seq_open(file, &show_traces_seq_ops);
3675 	if (ret)
3676 		return ret;
3677 
3678 	m = file->private_data;
3679 	m->private = tr;
3680 
3681 	return 0;
3682 }
3683 
3684 static ssize_t
3685 tracing_write_stub(struct file *filp, const char __user *ubuf,
3686 		   size_t count, loff_t *ppos)
3687 {
3688 	return count;
3689 }
3690 
3691 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3692 {
3693 	int ret;
3694 
3695 	if (file->f_mode & FMODE_READ)
3696 		ret = seq_lseek(file, offset, whence);
3697 	else
3698 		file->f_pos = ret = 0;
3699 
3700 	return ret;
3701 }
3702 
3703 static const struct file_operations tracing_fops = {
3704 	.open		= tracing_open,
3705 	.read		= seq_read,
3706 	.write		= tracing_write_stub,
3707 	.llseek		= tracing_lseek,
3708 	.release	= tracing_release,
3709 };
3710 
3711 static const struct file_operations show_traces_fops = {
3712 	.open		= show_traces_open,
3713 	.read		= seq_read,
3714 	.release	= seq_release,
3715 	.llseek		= seq_lseek,
3716 };
3717 
3718 /*
3719  * The tracer itself will not take this lock, but still we want
3720  * to provide a consistent cpumask to user-space:
3721  */
3722 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3723 
3724 /*
3725  * Temporary storage for the character representation of the
3726  * CPU bitmask (and one more byte for the newline):
3727  */
3728 static char mask_str[NR_CPUS + 1];
3729 
3730 static ssize_t
3731 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3732 		     size_t count, loff_t *ppos)
3733 {
3734 	struct trace_array *tr = file_inode(filp)->i_private;
3735 	int len;
3736 
3737 	mutex_lock(&tracing_cpumask_update_lock);
3738 
3739 	len = snprintf(mask_str, count, "%*pb\n",
3740 		       cpumask_pr_args(tr->tracing_cpumask));
3741 	if (len >= count) {
3742 		count = -EINVAL;
3743 		goto out_err;
3744 	}
3745 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3746 
3747 out_err:
3748 	mutex_unlock(&tracing_cpumask_update_lock);
3749 
3750 	return count;
3751 }
3752 
3753 static ssize_t
3754 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3755 		      size_t count, loff_t *ppos)
3756 {
3757 	struct trace_array *tr = file_inode(filp)->i_private;
3758 	cpumask_var_t tracing_cpumask_new;
3759 	int err, cpu;
3760 
3761 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3762 		return -ENOMEM;
3763 
3764 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3765 	if (err)
3766 		goto err_unlock;
3767 
3768 	mutex_lock(&tracing_cpumask_update_lock);
3769 
3770 	local_irq_disable();
3771 	arch_spin_lock(&tr->max_lock);
3772 	for_each_tracing_cpu(cpu) {
3773 		/*
3774 		 * Increase/decrease the disabled counter if we are
3775 		 * about to flip a bit in the cpumask:
3776 		 */
3777 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3778 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3779 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3780 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3781 		}
3782 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3783 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3784 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3785 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3786 		}
3787 	}
3788 	arch_spin_unlock(&tr->max_lock);
3789 	local_irq_enable();
3790 
3791 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3792 
3793 	mutex_unlock(&tracing_cpumask_update_lock);
3794 	free_cpumask_var(tracing_cpumask_new);
3795 
3796 	return count;
3797 
3798 err_unlock:
3799 	free_cpumask_var(tracing_cpumask_new);
3800 
3801 	return err;
3802 }
3803 
3804 static const struct file_operations tracing_cpumask_fops = {
3805 	.open		= tracing_open_generic_tr,
3806 	.read		= tracing_cpumask_read,
3807 	.write		= tracing_cpumask_write,
3808 	.release	= tracing_release_generic_tr,
3809 	.llseek		= generic_file_llseek,
3810 };
3811 
3812 static int tracing_trace_options_show(struct seq_file *m, void *v)
3813 {
3814 	struct tracer_opt *trace_opts;
3815 	struct trace_array *tr = m->private;
3816 	u32 tracer_flags;
3817 	int i;
3818 
3819 	mutex_lock(&trace_types_lock);
3820 	tracer_flags = tr->current_trace->flags->val;
3821 	trace_opts = tr->current_trace->flags->opts;
3822 
3823 	for (i = 0; trace_options[i]; i++) {
3824 		if (tr->trace_flags & (1 << i))
3825 			seq_printf(m, "%s\n", trace_options[i]);
3826 		else
3827 			seq_printf(m, "no%s\n", trace_options[i]);
3828 	}
3829 
3830 	for (i = 0; trace_opts[i].name; i++) {
3831 		if (tracer_flags & trace_opts[i].bit)
3832 			seq_printf(m, "%s\n", trace_opts[i].name);
3833 		else
3834 			seq_printf(m, "no%s\n", trace_opts[i].name);
3835 	}
3836 	mutex_unlock(&trace_types_lock);
3837 
3838 	return 0;
3839 }
3840 
3841 static int __set_tracer_option(struct trace_array *tr,
3842 			       struct tracer_flags *tracer_flags,
3843 			       struct tracer_opt *opts, int neg)
3844 {
3845 	struct tracer *trace = tracer_flags->trace;
3846 	int ret;
3847 
3848 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3849 	if (ret)
3850 		return ret;
3851 
3852 	if (neg)
3853 		tracer_flags->val &= ~opts->bit;
3854 	else
3855 		tracer_flags->val |= opts->bit;
3856 	return 0;
3857 }
3858 
3859 /* Try to assign a tracer specific option */
3860 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3861 {
3862 	struct tracer *trace = tr->current_trace;
3863 	struct tracer_flags *tracer_flags = trace->flags;
3864 	struct tracer_opt *opts = NULL;
3865 	int i;
3866 
3867 	for (i = 0; tracer_flags->opts[i].name; i++) {
3868 		opts = &tracer_flags->opts[i];
3869 
3870 		if (strcmp(cmp, opts->name) == 0)
3871 			return __set_tracer_option(tr, trace->flags, opts, neg);
3872 	}
3873 
3874 	return -EINVAL;
3875 }
3876 
3877 /* Some tracers require overwrite to stay enabled */
3878 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3879 {
3880 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3881 		return -1;
3882 
3883 	return 0;
3884 }
3885 
3886 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3887 {
3888 	/* do nothing if flag is already set */
3889 	if (!!(tr->trace_flags & mask) == !!enabled)
3890 		return 0;
3891 
3892 	/* Give the tracer a chance to approve the change */
3893 	if (tr->current_trace->flag_changed)
3894 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3895 			return -EINVAL;
3896 
3897 	if (enabled)
3898 		tr->trace_flags |= mask;
3899 	else
3900 		tr->trace_flags &= ~mask;
3901 
3902 	if (mask == TRACE_ITER_RECORD_CMD)
3903 		trace_event_enable_cmd_record(enabled);
3904 
3905 	if (mask == TRACE_ITER_EVENT_FORK)
3906 		trace_event_follow_fork(tr, enabled);
3907 
3908 	if (mask == TRACE_ITER_OVERWRITE) {
3909 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3912 #endif
3913 	}
3914 
3915 	if (mask == TRACE_ITER_PRINTK) {
3916 		trace_printk_start_stop_comm(enabled);
3917 		trace_printk_control(enabled);
3918 	}
3919 
3920 	return 0;
3921 }
3922 
3923 static int trace_set_options(struct trace_array *tr, char *option)
3924 {
3925 	char *cmp;
3926 	int neg = 0;
3927 	int ret = -ENODEV;
3928 	int i;
3929 	size_t orig_len = strlen(option);
3930 
3931 	cmp = strstrip(option);
3932 
3933 	if (strncmp(cmp, "no", 2) == 0) {
3934 		neg = 1;
3935 		cmp += 2;
3936 	}
3937 
3938 	mutex_lock(&trace_types_lock);
3939 
3940 	for (i = 0; trace_options[i]; i++) {
3941 		if (strcmp(cmp, trace_options[i]) == 0) {
3942 			ret = set_tracer_flag(tr, 1 << i, !neg);
3943 			break;
3944 		}
3945 	}
3946 
3947 	/* If no option could be set, test the specific tracer options */
3948 	if (!trace_options[i])
3949 		ret = set_tracer_option(tr, cmp, neg);
3950 
3951 	mutex_unlock(&trace_types_lock);
3952 
3953 	/*
3954 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
3955 	 * turn it back into a space.
3956 	 */
3957 	if (orig_len > strlen(option))
3958 		option[strlen(option)] = ' ';
3959 
3960 	return ret;
3961 }
3962 
3963 static void __init apply_trace_boot_options(void)
3964 {
3965 	char *buf = trace_boot_options_buf;
3966 	char *option;
3967 
3968 	while (true) {
3969 		option = strsep(&buf, ",");
3970 
3971 		if (!option)
3972 			break;
3973 
3974 		if (*option)
3975 			trace_set_options(&global_trace, option);
3976 
3977 		/* Put back the comma to allow this to be called again */
3978 		if (buf)
3979 			*(buf - 1) = ',';
3980 	}
3981 }
3982 
3983 static ssize_t
3984 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3985 			size_t cnt, loff_t *ppos)
3986 {
3987 	struct seq_file *m = filp->private_data;
3988 	struct trace_array *tr = m->private;
3989 	char buf[64];
3990 	int ret;
3991 
3992 	if (cnt >= sizeof(buf))
3993 		return -EINVAL;
3994 
3995 	if (copy_from_user(buf, ubuf, cnt))
3996 		return -EFAULT;
3997 
3998 	buf[cnt] = 0;
3999 
4000 	ret = trace_set_options(tr, buf);
4001 	if (ret < 0)
4002 		return ret;
4003 
4004 	*ppos += cnt;
4005 
4006 	return cnt;
4007 }
4008 
4009 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4010 {
4011 	struct trace_array *tr = inode->i_private;
4012 	int ret;
4013 
4014 	if (tracing_disabled)
4015 		return -ENODEV;
4016 
4017 	if (trace_array_get(tr) < 0)
4018 		return -ENODEV;
4019 
4020 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4021 	if (ret < 0)
4022 		trace_array_put(tr);
4023 
4024 	return ret;
4025 }
4026 
4027 static const struct file_operations tracing_iter_fops = {
4028 	.open		= tracing_trace_options_open,
4029 	.read		= seq_read,
4030 	.llseek		= seq_lseek,
4031 	.release	= tracing_single_release_tr,
4032 	.write		= tracing_trace_options_write,
4033 };
4034 
4035 static const char readme_msg[] =
4036 	"tracing mini-HOWTO:\n\n"
4037 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4038 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4039 	" Important files:\n"
4040 	"  trace\t\t\t- The static contents of the buffer\n"
4041 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4042 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4043 	"  current_tracer\t- function and latency tracers\n"
4044 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4045 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4046 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4047 	"  trace_clock\t\t-change the clock used to order events\n"
4048 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4049 	"      global:   Synced across CPUs but slows tracing down.\n"
4050 	"     counter:   Not a clock, but just an increment\n"
4051 	"      uptime:   Jiffy counter from time of boot\n"
4052 	"        perf:   Same clock that perf events use\n"
4053 #ifdef CONFIG_X86_64
4054 	"     x86-tsc:   TSC cycle counter\n"
4055 #endif
4056 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4057 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4058 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4059 	"\t\t\t  Remove sub-buffer with rmdir\n"
4060 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4061 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4062 	"\t\t\t  option name\n"
4063 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4064 #ifdef CONFIG_DYNAMIC_FTRACE
4065 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4066 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4067 	"\t\t\t  functions\n"
4068 	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4069 	"\t     modules: Can select a group via module\n"
4070 	"\t      Format: :mod:<module-name>\n"
4071 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4072 	"\t    triggers: a command to perform when function is hit\n"
4073 	"\t      Format: <function>:<trigger>[:count]\n"
4074 	"\t     trigger: traceon, traceoff\n"
4075 	"\t\t      enable_event:<system>:<event>\n"
4076 	"\t\t      disable_event:<system>:<event>\n"
4077 #ifdef CONFIG_STACKTRACE
4078 	"\t\t      stacktrace\n"
4079 #endif
4080 #ifdef CONFIG_TRACER_SNAPSHOT
4081 	"\t\t      snapshot\n"
4082 #endif
4083 	"\t\t      dump\n"
4084 	"\t\t      cpudump\n"
4085 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4086 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4087 	"\t     The first one will disable tracing every time do_fault is hit\n"
4088 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4089 	"\t       The first time do trap is hit and it disables tracing, the\n"
4090 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4091 	"\t       the counter will not decrement. It only decrements when the\n"
4092 	"\t       trigger did work\n"
4093 	"\t     To remove trigger without count:\n"
4094 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4095 	"\t     To remove trigger with a count:\n"
4096 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4097 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4098 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4099 	"\t    modules: Can select a group via module command :mod:\n"
4100 	"\t    Does not accept triggers\n"
4101 #endif /* CONFIG_DYNAMIC_FTRACE */
4102 #ifdef CONFIG_FUNCTION_TRACER
4103 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4104 	"\t\t    (function)\n"
4105 #endif
4106 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4107 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4108 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4109 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4110 #endif
4111 #ifdef CONFIG_TRACER_SNAPSHOT
4112 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4113 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4114 	"\t\t\t  information\n"
4115 #endif
4116 #ifdef CONFIG_STACK_TRACER
4117 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4118 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4119 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4120 	"\t\t\t  new trace)\n"
4121 #ifdef CONFIG_DYNAMIC_FTRACE
4122 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4123 	"\t\t\t  traces\n"
4124 #endif
4125 #endif /* CONFIG_STACK_TRACER */
4126 #ifdef CONFIG_KPROBE_EVENT
4127 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4128 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4129 #endif
4130 #ifdef CONFIG_UPROBE_EVENT
4131 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4132 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4133 #endif
4134 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4135 	"\t  accepts: event-definitions (one definition per line)\n"
4136 	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4137 	"\t           -:[<group>/]<event>\n"
4138 #ifdef CONFIG_KPROBE_EVENT
4139 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4140 #endif
4141 #ifdef CONFIG_UPROBE_EVENT
4142 	"\t    place: <path>:<offset>\n"
4143 #endif
4144 	"\t     args: <name>=fetcharg[:type]\n"
4145 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4146 	"\t           $stack<index>, $stack, $retval, $comm\n"
4147 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4148 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4149 #endif
4150 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4151 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4152 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4153 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4154 	"\t\t\t  events\n"
4155 	"      filter\t\t- If set, only events passing filter are traced\n"
4156 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4157 	"\t\t\t  <event>:\n"
4158 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4159 	"      filter\t\t- If set, only events passing filter are traced\n"
4160 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4161 	"\t    Format: <trigger>[:count][if <filter>]\n"
4162 	"\t   trigger: traceon, traceoff\n"
4163 	"\t            enable_event:<system>:<event>\n"
4164 	"\t            disable_event:<system>:<event>\n"
4165 #ifdef CONFIG_HIST_TRIGGERS
4166 	"\t            enable_hist:<system>:<event>\n"
4167 	"\t            disable_hist:<system>:<event>\n"
4168 #endif
4169 #ifdef CONFIG_STACKTRACE
4170 	"\t\t    stacktrace\n"
4171 #endif
4172 #ifdef CONFIG_TRACER_SNAPSHOT
4173 	"\t\t    snapshot\n"
4174 #endif
4175 #ifdef CONFIG_HIST_TRIGGERS
4176 	"\t\t    hist (see below)\n"
4177 #endif
4178 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4179 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4180 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4181 	"\t                  events/block/block_unplug/trigger\n"
4182 	"\t   The first disables tracing every time block_unplug is hit.\n"
4183 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4184 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4185 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4186 	"\t   Like function triggers, the counter is only decremented if it\n"
4187 	"\t    enabled or disabled tracing.\n"
4188 	"\t   To remove a trigger without a count:\n"
4189 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4190 	"\t   To remove a trigger with a count:\n"
4191 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4192 	"\t   Filters can be ignored when removing a trigger.\n"
4193 #ifdef CONFIG_HIST_TRIGGERS
4194 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4195 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4196 	"\t            [:values=<field1[,field2,...]>]\n"
4197 	"\t            [:sort=<field1[,field2,...]>]\n"
4198 	"\t            [:size=#entries]\n"
4199 	"\t            [:pause][:continue][:clear]\n"
4200 	"\t            [:name=histname1]\n"
4201 	"\t            [if <filter>]\n\n"
4202 	"\t    When a matching event is hit, an entry is added to a hash\n"
4203 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4204 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4205 	"\t    correspond to fields in the event's format description.  Keys\n"
4206 	"\t    can be any field, or the special string 'stacktrace'.\n"
4207 	"\t    Compound keys consisting of up to two fields can be specified\n"
4208 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4209 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4210 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4211 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4212 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4213 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4214 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4215 	"\t    its histogram data will be shared with other triggers of the\n"
4216 	"\t    same name, and trigger hits will update this common data.\n\n"
4217 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4218 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4219 	"\t    triggers attached to an event, there will be a table for each\n"
4220 	"\t    trigger in the output.  The table displayed for a named\n"
4221 	"\t    trigger will be the same as any other instance having the\n"
4222 	"\t    same name.  The default format used to display a given field\n"
4223 	"\t    can be modified by appending any of the following modifiers\n"
4224 	"\t    to the field name, as applicable:\n\n"
4225 	"\t            .hex        display a number as a hex value\n"
4226 	"\t            .sym        display an address as a symbol\n"
4227 	"\t            .sym-offset display an address as a symbol and offset\n"
4228 	"\t            .execname   display a common_pid as a program name\n"
4229 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4230 	"\t            .log2       display log2 value rather than raw number\n\n"
4231 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4232 	"\t    trigger or to start a hist trigger but not log any events\n"
4233 	"\t    until told to do so.  'continue' can be used to start or\n"
4234 	"\t    restart a paused hist trigger.\n\n"
4235 	"\t    The 'clear' parameter will clear the contents of a running\n"
4236 	"\t    hist trigger and leave its current paused/active state\n"
4237 	"\t    unchanged.\n\n"
4238 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4239 	"\t    have one event conditionally start and stop another event's\n"
4240 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4241 	"\t    the enable_event and disable_event triggers.\n"
4242 #endif
4243 ;
4244 
4245 static ssize_t
4246 tracing_readme_read(struct file *filp, char __user *ubuf,
4247 		       size_t cnt, loff_t *ppos)
4248 {
4249 	return simple_read_from_buffer(ubuf, cnt, ppos,
4250 					readme_msg, strlen(readme_msg));
4251 }
4252 
4253 static const struct file_operations tracing_readme_fops = {
4254 	.open		= tracing_open_generic,
4255 	.read		= tracing_readme_read,
4256 	.llseek		= generic_file_llseek,
4257 };
4258 
4259 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4260 {
4261 	unsigned int *ptr = v;
4262 
4263 	if (*pos || m->count)
4264 		ptr++;
4265 
4266 	(*pos)++;
4267 
4268 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4269 	     ptr++) {
4270 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4271 			continue;
4272 
4273 		return ptr;
4274 	}
4275 
4276 	return NULL;
4277 }
4278 
4279 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4280 {
4281 	void *v;
4282 	loff_t l = 0;
4283 
4284 	preempt_disable();
4285 	arch_spin_lock(&trace_cmdline_lock);
4286 
4287 	v = &savedcmd->map_cmdline_to_pid[0];
4288 	while (l <= *pos) {
4289 		v = saved_cmdlines_next(m, v, &l);
4290 		if (!v)
4291 			return NULL;
4292 	}
4293 
4294 	return v;
4295 }
4296 
4297 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4298 {
4299 	arch_spin_unlock(&trace_cmdline_lock);
4300 	preempt_enable();
4301 }
4302 
4303 static int saved_cmdlines_show(struct seq_file *m, void *v)
4304 {
4305 	char buf[TASK_COMM_LEN];
4306 	unsigned int *pid = v;
4307 
4308 	__trace_find_cmdline(*pid, buf);
4309 	seq_printf(m, "%d %s\n", *pid, buf);
4310 	return 0;
4311 }
4312 
4313 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4314 	.start		= saved_cmdlines_start,
4315 	.next		= saved_cmdlines_next,
4316 	.stop		= saved_cmdlines_stop,
4317 	.show		= saved_cmdlines_show,
4318 };
4319 
4320 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4321 {
4322 	if (tracing_disabled)
4323 		return -ENODEV;
4324 
4325 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4326 }
4327 
4328 static const struct file_operations tracing_saved_cmdlines_fops = {
4329 	.open		= tracing_saved_cmdlines_open,
4330 	.read		= seq_read,
4331 	.llseek		= seq_lseek,
4332 	.release	= seq_release,
4333 };
4334 
4335 static ssize_t
4336 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4337 				 size_t cnt, loff_t *ppos)
4338 {
4339 	char buf[64];
4340 	int r;
4341 
4342 	arch_spin_lock(&trace_cmdline_lock);
4343 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4344 	arch_spin_unlock(&trace_cmdline_lock);
4345 
4346 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4347 }
4348 
4349 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4350 {
4351 	kfree(s->saved_cmdlines);
4352 	kfree(s->map_cmdline_to_pid);
4353 	kfree(s);
4354 }
4355 
4356 static int tracing_resize_saved_cmdlines(unsigned int val)
4357 {
4358 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4359 
4360 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4361 	if (!s)
4362 		return -ENOMEM;
4363 
4364 	if (allocate_cmdlines_buffer(val, s) < 0) {
4365 		kfree(s);
4366 		return -ENOMEM;
4367 	}
4368 
4369 	arch_spin_lock(&trace_cmdline_lock);
4370 	savedcmd_temp = savedcmd;
4371 	savedcmd = s;
4372 	arch_spin_unlock(&trace_cmdline_lock);
4373 	free_saved_cmdlines_buffer(savedcmd_temp);
4374 
4375 	return 0;
4376 }
4377 
4378 static ssize_t
4379 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4380 				  size_t cnt, loff_t *ppos)
4381 {
4382 	unsigned long val;
4383 	int ret;
4384 
4385 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4386 	if (ret)
4387 		return ret;
4388 
4389 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4390 	if (!val || val > PID_MAX_DEFAULT)
4391 		return -EINVAL;
4392 
4393 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4394 	if (ret < 0)
4395 		return ret;
4396 
4397 	*ppos += cnt;
4398 
4399 	return cnt;
4400 }
4401 
4402 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4403 	.open		= tracing_open_generic,
4404 	.read		= tracing_saved_cmdlines_size_read,
4405 	.write		= tracing_saved_cmdlines_size_write,
4406 };
4407 
4408 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4409 static union trace_enum_map_item *
4410 update_enum_map(union trace_enum_map_item *ptr)
4411 {
4412 	if (!ptr->map.enum_string) {
4413 		if (ptr->tail.next) {
4414 			ptr = ptr->tail.next;
4415 			/* Set ptr to the next real item (skip head) */
4416 			ptr++;
4417 		} else
4418 			return NULL;
4419 	}
4420 	return ptr;
4421 }
4422 
4423 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4424 {
4425 	union trace_enum_map_item *ptr = v;
4426 
4427 	/*
4428 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4429 	 * This really should never happen.
4430 	 */
4431 	ptr = update_enum_map(ptr);
4432 	if (WARN_ON_ONCE(!ptr))
4433 		return NULL;
4434 
4435 	ptr++;
4436 
4437 	(*pos)++;
4438 
4439 	ptr = update_enum_map(ptr);
4440 
4441 	return ptr;
4442 }
4443 
4444 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4445 {
4446 	union trace_enum_map_item *v;
4447 	loff_t l = 0;
4448 
4449 	mutex_lock(&trace_enum_mutex);
4450 
4451 	v = trace_enum_maps;
4452 	if (v)
4453 		v++;
4454 
4455 	while (v && l < *pos) {
4456 		v = enum_map_next(m, v, &l);
4457 	}
4458 
4459 	return v;
4460 }
4461 
4462 static void enum_map_stop(struct seq_file *m, void *v)
4463 {
4464 	mutex_unlock(&trace_enum_mutex);
4465 }
4466 
4467 static int enum_map_show(struct seq_file *m, void *v)
4468 {
4469 	union trace_enum_map_item *ptr = v;
4470 
4471 	seq_printf(m, "%s %ld (%s)\n",
4472 		   ptr->map.enum_string, ptr->map.enum_value,
4473 		   ptr->map.system);
4474 
4475 	return 0;
4476 }
4477 
4478 static const struct seq_operations tracing_enum_map_seq_ops = {
4479 	.start		= enum_map_start,
4480 	.next		= enum_map_next,
4481 	.stop		= enum_map_stop,
4482 	.show		= enum_map_show,
4483 };
4484 
4485 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4486 {
4487 	if (tracing_disabled)
4488 		return -ENODEV;
4489 
4490 	return seq_open(filp, &tracing_enum_map_seq_ops);
4491 }
4492 
4493 static const struct file_operations tracing_enum_map_fops = {
4494 	.open		= tracing_enum_map_open,
4495 	.read		= seq_read,
4496 	.llseek		= seq_lseek,
4497 	.release	= seq_release,
4498 };
4499 
4500 static inline union trace_enum_map_item *
4501 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4502 {
4503 	/* Return tail of array given the head */
4504 	return ptr + ptr->head.length + 1;
4505 }
4506 
4507 static void
4508 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4509 			   int len)
4510 {
4511 	struct trace_enum_map **stop;
4512 	struct trace_enum_map **map;
4513 	union trace_enum_map_item *map_array;
4514 	union trace_enum_map_item *ptr;
4515 
4516 	stop = start + len;
4517 
4518 	/*
4519 	 * The trace_enum_maps contains the map plus a head and tail item,
4520 	 * where the head holds the module and length of array, and the
4521 	 * tail holds a pointer to the next list.
4522 	 */
4523 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4524 	if (!map_array) {
4525 		pr_warn("Unable to allocate trace enum mapping\n");
4526 		return;
4527 	}
4528 
4529 	mutex_lock(&trace_enum_mutex);
4530 
4531 	if (!trace_enum_maps)
4532 		trace_enum_maps = map_array;
4533 	else {
4534 		ptr = trace_enum_maps;
4535 		for (;;) {
4536 			ptr = trace_enum_jmp_to_tail(ptr);
4537 			if (!ptr->tail.next)
4538 				break;
4539 			ptr = ptr->tail.next;
4540 
4541 		}
4542 		ptr->tail.next = map_array;
4543 	}
4544 	map_array->head.mod = mod;
4545 	map_array->head.length = len;
4546 	map_array++;
4547 
4548 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4549 		map_array->map = **map;
4550 		map_array++;
4551 	}
4552 	memset(map_array, 0, sizeof(*map_array));
4553 
4554 	mutex_unlock(&trace_enum_mutex);
4555 }
4556 
4557 static void trace_create_enum_file(struct dentry *d_tracer)
4558 {
4559 	trace_create_file("enum_map", 0444, d_tracer,
4560 			  NULL, &tracing_enum_map_fops);
4561 }
4562 
4563 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4564 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4565 static inline void trace_insert_enum_map_file(struct module *mod,
4566 			      struct trace_enum_map **start, int len) { }
4567 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4568 
4569 static void trace_insert_enum_map(struct module *mod,
4570 				  struct trace_enum_map **start, int len)
4571 {
4572 	struct trace_enum_map **map;
4573 
4574 	if (len <= 0)
4575 		return;
4576 
4577 	map = start;
4578 
4579 	trace_event_enum_update(map, len);
4580 
4581 	trace_insert_enum_map_file(mod, start, len);
4582 }
4583 
4584 static ssize_t
4585 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4586 		       size_t cnt, loff_t *ppos)
4587 {
4588 	struct trace_array *tr = filp->private_data;
4589 	char buf[MAX_TRACER_SIZE+2];
4590 	int r;
4591 
4592 	mutex_lock(&trace_types_lock);
4593 	r = sprintf(buf, "%s\n", tr->current_trace->name);
4594 	mutex_unlock(&trace_types_lock);
4595 
4596 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4597 }
4598 
4599 int tracer_init(struct tracer *t, struct trace_array *tr)
4600 {
4601 	tracing_reset_online_cpus(&tr->trace_buffer);
4602 	return t->init(tr);
4603 }
4604 
4605 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4606 {
4607 	int cpu;
4608 
4609 	for_each_tracing_cpu(cpu)
4610 		per_cpu_ptr(buf->data, cpu)->entries = val;
4611 }
4612 
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 /* resize @tr's buffer to the size of @size_tr's entries */
4615 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4616 					struct trace_buffer *size_buf, int cpu_id)
4617 {
4618 	int cpu, ret = 0;
4619 
4620 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4621 		for_each_tracing_cpu(cpu) {
4622 			ret = ring_buffer_resize(trace_buf->buffer,
4623 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4624 			if (ret < 0)
4625 				break;
4626 			per_cpu_ptr(trace_buf->data, cpu)->entries =
4627 				per_cpu_ptr(size_buf->data, cpu)->entries;
4628 		}
4629 	} else {
4630 		ret = ring_buffer_resize(trace_buf->buffer,
4631 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4632 		if (ret == 0)
4633 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4634 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4635 	}
4636 
4637 	return ret;
4638 }
4639 #endif /* CONFIG_TRACER_MAX_TRACE */
4640 
4641 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4642 					unsigned long size, int cpu)
4643 {
4644 	int ret;
4645 
4646 	/*
4647 	 * If kernel or user changes the size of the ring buffer
4648 	 * we use the size that was given, and we can forget about
4649 	 * expanding it later.
4650 	 */
4651 	ring_buffer_expanded = true;
4652 
4653 	/* May be called before buffers are initialized */
4654 	if (!tr->trace_buffer.buffer)
4655 		return 0;
4656 
4657 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4658 	if (ret < 0)
4659 		return ret;
4660 
4661 #ifdef CONFIG_TRACER_MAX_TRACE
4662 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4663 	    !tr->current_trace->use_max_tr)
4664 		goto out;
4665 
4666 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4667 	if (ret < 0) {
4668 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4669 						     &tr->trace_buffer, cpu);
4670 		if (r < 0) {
4671 			/*
4672 			 * AARGH! We are left with different
4673 			 * size max buffer!!!!
4674 			 * The max buffer is our "snapshot" buffer.
4675 			 * When a tracer needs a snapshot (one of the
4676 			 * latency tracers), it swaps the max buffer
4677 			 * with the saved snap shot. We succeeded to
4678 			 * update the size of the main buffer, but failed to
4679 			 * update the size of the max buffer. But when we tried
4680 			 * to reset the main buffer to the original size, we
4681 			 * failed there too. This is very unlikely to
4682 			 * happen, but if it does, warn and kill all
4683 			 * tracing.
4684 			 */
4685 			WARN_ON(1);
4686 			tracing_disabled = 1;
4687 		}
4688 		return ret;
4689 	}
4690 
4691 	if (cpu == RING_BUFFER_ALL_CPUS)
4692 		set_buffer_entries(&tr->max_buffer, size);
4693 	else
4694 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4695 
4696  out:
4697 #endif /* CONFIG_TRACER_MAX_TRACE */
4698 
4699 	if (cpu == RING_BUFFER_ALL_CPUS)
4700 		set_buffer_entries(&tr->trace_buffer, size);
4701 	else
4702 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4703 
4704 	return ret;
4705 }
4706 
4707 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4708 					  unsigned long size, int cpu_id)
4709 {
4710 	int ret = size;
4711 
4712 	mutex_lock(&trace_types_lock);
4713 
4714 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4715 		/* make sure, this cpu is enabled in the mask */
4716 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4717 			ret = -EINVAL;
4718 			goto out;
4719 		}
4720 	}
4721 
4722 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4723 	if (ret < 0)
4724 		ret = -ENOMEM;
4725 
4726 out:
4727 	mutex_unlock(&trace_types_lock);
4728 
4729 	return ret;
4730 }
4731 
4732 
4733 /**
4734  * tracing_update_buffers - used by tracing facility to expand ring buffers
4735  *
4736  * To save on memory when the tracing is never used on a system with it
4737  * configured in. The ring buffers are set to a minimum size. But once
4738  * a user starts to use the tracing facility, then they need to grow
4739  * to their default size.
4740  *
4741  * This function is to be called when a tracer is about to be used.
4742  */
4743 int tracing_update_buffers(void)
4744 {
4745 	int ret = 0;
4746 
4747 	mutex_lock(&trace_types_lock);
4748 	if (!ring_buffer_expanded)
4749 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4750 						RING_BUFFER_ALL_CPUS);
4751 	mutex_unlock(&trace_types_lock);
4752 
4753 	return ret;
4754 }
4755 
4756 struct trace_option_dentry;
4757 
4758 static void
4759 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4760 
4761 /*
4762  * Used to clear out the tracer before deletion of an instance.
4763  * Must have trace_types_lock held.
4764  */
4765 static void tracing_set_nop(struct trace_array *tr)
4766 {
4767 	if (tr->current_trace == &nop_trace)
4768 		return;
4769 
4770 	tr->current_trace->enabled--;
4771 
4772 	if (tr->current_trace->reset)
4773 		tr->current_trace->reset(tr);
4774 
4775 	tr->current_trace = &nop_trace;
4776 }
4777 
4778 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4779 {
4780 	/* Only enable if the directory has been created already. */
4781 	if (!tr->dir)
4782 		return;
4783 
4784 	create_trace_option_files(tr, t);
4785 }
4786 
4787 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4788 {
4789 	struct tracer *t;
4790 #ifdef CONFIG_TRACER_MAX_TRACE
4791 	bool had_max_tr;
4792 #endif
4793 	int ret = 0;
4794 
4795 	mutex_lock(&trace_types_lock);
4796 
4797 	if (!ring_buffer_expanded) {
4798 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4799 						RING_BUFFER_ALL_CPUS);
4800 		if (ret < 0)
4801 			goto out;
4802 		ret = 0;
4803 	}
4804 
4805 	for (t = trace_types; t; t = t->next) {
4806 		if (strcmp(t->name, buf) == 0)
4807 			break;
4808 	}
4809 	if (!t) {
4810 		ret = -EINVAL;
4811 		goto out;
4812 	}
4813 	if (t == tr->current_trace)
4814 		goto out;
4815 
4816 	/* Some tracers are only allowed for the top level buffer */
4817 	if (!trace_ok_for_array(t, tr)) {
4818 		ret = -EINVAL;
4819 		goto out;
4820 	}
4821 
4822 	/* If trace pipe files are being read, we can't change the tracer */
4823 	if (tr->current_trace->ref) {
4824 		ret = -EBUSY;
4825 		goto out;
4826 	}
4827 
4828 	trace_branch_disable();
4829 
4830 	tr->current_trace->enabled--;
4831 
4832 	if (tr->current_trace->reset)
4833 		tr->current_trace->reset(tr);
4834 
4835 	/* Current trace needs to be nop_trace before synchronize_sched */
4836 	tr->current_trace = &nop_trace;
4837 
4838 #ifdef CONFIG_TRACER_MAX_TRACE
4839 	had_max_tr = tr->allocated_snapshot;
4840 
4841 	if (had_max_tr && !t->use_max_tr) {
4842 		/*
4843 		 * We need to make sure that the update_max_tr sees that
4844 		 * current_trace changed to nop_trace to keep it from
4845 		 * swapping the buffers after we resize it.
4846 		 * The update_max_tr is called from interrupts disabled
4847 		 * so a synchronized_sched() is sufficient.
4848 		 */
4849 		synchronize_sched();
4850 		free_snapshot(tr);
4851 	}
4852 #endif
4853 
4854 #ifdef CONFIG_TRACER_MAX_TRACE
4855 	if (t->use_max_tr && !had_max_tr) {
4856 		ret = alloc_snapshot(tr);
4857 		if (ret < 0)
4858 			goto out;
4859 	}
4860 #endif
4861 
4862 	if (t->init) {
4863 		ret = tracer_init(t, tr);
4864 		if (ret)
4865 			goto out;
4866 	}
4867 
4868 	tr->current_trace = t;
4869 	tr->current_trace->enabled++;
4870 	trace_branch_enable(tr);
4871  out:
4872 	mutex_unlock(&trace_types_lock);
4873 
4874 	return ret;
4875 }
4876 
4877 static ssize_t
4878 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4879 			size_t cnt, loff_t *ppos)
4880 {
4881 	struct trace_array *tr = filp->private_data;
4882 	char buf[MAX_TRACER_SIZE+1];
4883 	int i;
4884 	size_t ret;
4885 	int err;
4886 
4887 	ret = cnt;
4888 
4889 	if (cnt > MAX_TRACER_SIZE)
4890 		cnt = MAX_TRACER_SIZE;
4891 
4892 	if (copy_from_user(buf, ubuf, cnt))
4893 		return -EFAULT;
4894 
4895 	buf[cnt] = 0;
4896 
4897 	/* strip ending whitespace. */
4898 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4899 		buf[i] = 0;
4900 
4901 	err = tracing_set_tracer(tr, buf);
4902 	if (err)
4903 		return err;
4904 
4905 	*ppos += ret;
4906 
4907 	return ret;
4908 }
4909 
4910 static ssize_t
4911 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4912 		   size_t cnt, loff_t *ppos)
4913 {
4914 	char buf[64];
4915 	int r;
4916 
4917 	r = snprintf(buf, sizeof(buf), "%ld\n",
4918 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4919 	if (r > sizeof(buf))
4920 		r = sizeof(buf);
4921 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4922 }
4923 
4924 static ssize_t
4925 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4926 		    size_t cnt, loff_t *ppos)
4927 {
4928 	unsigned long val;
4929 	int ret;
4930 
4931 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4932 	if (ret)
4933 		return ret;
4934 
4935 	*ptr = val * 1000;
4936 
4937 	return cnt;
4938 }
4939 
4940 static ssize_t
4941 tracing_thresh_read(struct file *filp, char __user *ubuf,
4942 		    size_t cnt, loff_t *ppos)
4943 {
4944 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4945 }
4946 
4947 static ssize_t
4948 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4949 		     size_t cnt, loff_t *ppos)
4950 {
4951 	struct trace_array *tr = filp->private_data;
4952 	int ret;
4953 
4954 	mutex_lock(&trace_types_lock);
4955 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4956 	if (ret < 0)
4957 		goto out;
4958 
4959 	if (tr->current_trace->update_thresh) {
4960 		ret = tr->current_trace->update_thresh(tr);
4961 		if (ret < 0)
4962 			goto out;
4963 	}
4964 
4965 	ret = cnt;
4966 out:
4967 	mutex_unlock(&trace_types_lock);
4968 
4969 	return ret;
4970 }
4971 
4972 #ifdef CONFIG_TRACER_MAX_TRACE
4973 
4974 static ssize_t
4975 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4976 		     size_t cnt, loff_t *ppos)
4977 {
4978 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4979 }
4980 
4981 static ssize_t
4982 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4983 		      size_t cnt, loff_t *ppos)
4984 {
4985 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4986 }
4987 
4988 #endif
4989 
4990 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4991 {
4992 	struct trace_array *tr = inode->i_private;
4993 	struct trace_iterator *iter;
4994 	int ret = 0;
4995 
4996 	if (tracing_disabled)
4997 		return -ENODEV;
4998 
4999 	if (trace_array_get(tr) < 0)
5000 		return -ENODEV;
5001 
5002 	mutex_lock(&trace_types_lock);
5003 
5004 	/* create a buffer to store the information to pass to userspace */
5005 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5006 	if (!iter) {
5007 		ret = -ENOMEM;
5008 		__trace_array_put(tr);
5009 		goto out;
5010 	}
5011 
5012 	trace_seq_init(&iter->seq);
5013 	iter->trace = tr->current_trace;
5014 
5015 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5016 		ret = -ENOMEM;
5017 		goto fail;
5018 	}
5019 
5020 	/* trace pipe does not show start of buffer */
5021 	cpumask_setall(iter->started);
5022 
5023 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5024 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5025 
5026 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5027 	if (trace_clocks[tr->clock_id].in_ns)
5028 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5029 
5030 	iter->tr = tr;
5031 	iter->trace_buffer = &tr->trace_buffer;
5032 	iter->cpu_file = tracing_get_cpu(inode);
5033 	mutex_init(&iter->mutex);
5034 	filp->private_data = iter;
5035 
5036 	if (iter->trace->pipe_open)
5037 		iter->trace->pipe_open(iter);
5038 
5039 	nonseekable_open(inode, filp);
5040 
5041 	tr->current_trace->ref++;
5042 out:
5043 	mutex_unlock(&trace_types_lock);
5044 	return ret;
5045 
5046 fail:
5047 	kfree(iter->trace);
5048 	kfree(iter);
5049 	__trace_array_put(tr);
5050 	mutex_unlock(&trace_types_lock);
5051 	return ret;
5052 }
5053 
5054 static int tracing_release_pipe(struct inode *inode, struct file *file)
5055 {
5056 	struct trace_iterator *iter = file->private_data;
5057 	struct trace_array *tr = inode->i_private;
5058 
5059 	mutex_lock(&trace_types_lock);
5060 
5061 	tr->current_trace->ref--;
5062 
5063 	if (iter->trace->pipe_close)
5064 		iter->trace->pipe_close(iter);
5065 
5066 	mutex_unlock(&trace_types_lock);
5067 
5068 	free_cpumask_var(iter->started);
5069 	mutex_destroy(&iter->mutex);
5070 	kfree(iter);
5071 
5072 	trace_array_put(tr);
5073 
5074 	return 0;
5075 }
5076 
5077 static unsigned int
5078 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5079 {
5080 	struct trace_array *tr = iter->tr;
5081 
5082 	/* Iterators are static, they should be filled or empty */
5083 	if (trace_buffer_iter(iter, iter->cpu_file))
5084 		return POLLIN | POLLRDNORM;
5085 
5086 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5087 		/*
5088 		 * Always select as readable when in blocking mode
5089 		 */
5090 		return POLLIN | POLLRDNORM;
5091 	else
5092 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5093 					     filp, poll_table);
5094 }
5095 
5096 static unsigned int
5097 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5098 {
5099 	struct trace_iterator *iter = filp->private_data;
5100 
5101 	return trace_poll(iter, filp, poll_table);
5102 }
5103 
5104 /* Must be called with iter->mutex held. */
5105 static int tracing_wait_pipe(struct file *filp)
5106 {
5107 	struct trace_iterator *iter = filp->private_data;
5108 	int ret;
5109 
5110 	while (trace_empty(iter)) {
5111 
5112 		if ((filp->f_flags & O_NONBLOCK)) {
5113 			return -EAGAIN;
5114 		}
5115 
5116 		/*
5117 		 * We block until we read something and tracing is disabled.
5118 		 * We still block if tracing is disabled, but we have never
5119 		 * read anything. This allows a user to cat this file, and
5120 		 * then enable tracing. But after we have read something,
5121 		 * we give an EOF when tracing is again disabled.
5122 		 *
5123 		 * iter->pos will be 0 if we haven't read anything.
5124 		 */
5125 		if (!tracing_is_on() && iter->pos)
5126 			break;
5127 
5128 		mutex_unlock(&iter->mutex);
5129 
5130 		ret = wait_on_pipe(iter, false);
5131 
5132 		mutex_lock(&iter->mutex);
5133 
5134 		if (ret)
5135 			return ret;
5136 	}
5137 
5138 	return 1;
5139 }
5140 
5141 /*
5142  * Consumer reader.
5143  */
5144 static ssize_t
5145 tracing_read_pipe(struct file *filp, char __user *ubuf,
5146 		  size_t cnt, loff_t *ppos)
5147 {
5148 	struct trace_iterator *iter = filp->private_data;
5149 	ssize_t sret;
5150 
5151 	/* return any leftover data */
5152 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5153 	if (sret != -EBUSY)
5154 		return sret;
5155 
5156 	trace_seq_init(&iter->seq);
5157 
5158 	/*
5159 	 * Avoid more than one consumer on a single file descriptor
5160 	 * This is just a matter of traces coherency, the ring buffer itself
5161 	 * is protected.
5162 	 */
5163 	mutex_lock(&iter->mutex);
5164 	if (iter->trace->read) {
5165 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5166 		if (sret)
5167 			goto out;
5168 	}
5169 
5170 waitagain:
5171 	sret = tracing_wait_pipe(filp);
5172 	if (sret <= 0)
5173 		goto out;
5174 
5175 	/* stop when tracing is finished */
5176 	if (trace_empty(iter)) {
5177 		sret = 0;
5178 		goto out;
5179 	}
5180 
5181 	if (cnt >= PAGE_SIZE)
5182 		cnt = PAGE_SIZE - 1;
5183 
5184 	/* reset all but tr, trace, and overruns */
5185 	memset(&iter->seq, 0,
5186 	       sizeof(struct trace_iterator) -
5187 	       offsetof(struct trace_iterator, seq));
5188 	cpumask_clear(iter->started);
5189 	iter->pos = -1;
5190 
5191 	trace_event_read_lock();
5192 	trace_access_lock(iter->cpu_file);
5193 	while (trace_find_next_entry_inc(iter) != NULL) {
5194 		enum print_line_t ret;
5195 		int save_len = iter->seq.seq.len;
5196 
5197 		ret = print_trace_line(iter);
5198 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5199 			/* don't print partial lines */
5200 			iter->seq.seq.len = save_len;
5201 			break;
5202 		}
5203 		if (ret != TRACE_TYPE_NO_CONSUME)
5204 			trace_consume(iter);
5205 
5206 		if (trace_seq_used(&iter->seq) >= cnt)
5207 			break;
5208 
5209 		/*
5210 		 * Setting the full flag means we reached the trace_seq buffer
5211 		 * size and we should leave by partial output condition above.
5212 		 * One of the trace_seq_* functions is not used properly.
5213 		 */
5214 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5215 			  iter->ent->type);
5216 	}
5217 	trace_access_unlock(iter->cpu_file);
5218 	trace_event_read_unlock();
5219 
5220 	/* Now copy what we have to the user */
5221 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5222 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5223 		trace_seq_init(&iter->seq);
5224 
5225 	/*
5226 	 * If there was nothing to send to user, in spite of consuming trace
5227 	 * entries, go back to wait for more entries.
5228 	 */
5229 	if (sret == -EBUSY)
5230 		goto waitagain;
5231 
5232 out:
5233 	mutex_unlock(&iter->mutex);
5234 
5235 	return sret;
5236 }
5237 
5238 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5239 				     unsigned int idx)
5240 {
5241 	__free_page(spd->pages[idx]);
5242 }
5243 
5244 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5245 	.can_merge		= 0,
5246 	.confirm		= generic_pipe_buf_confirm,
5247 	.release		= generic_pipe_buf_release,
5248 	.steal			= generic_pipe_buf_steal,
5249 	.get			= generic_pipe_buf_get,
5250 };
5251 
5252 static size_t
5253 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5254 {
5255 	size_t count;
5256 	int save_len;
5257 	int ret;
5258 
5259 	/* Seq buffer is page-sized, exactly what we need. */
5260 	for (;;) {
5261 		save_len = iter->seq.seq.len;
5262 		ret = print_trace_line(iter);
5263 
5264 		if (trace_seq_has_overflowed(&iter->seq)) {
5265 			iter->seq.seq.len = save_len;
5266 			break;
5267 		}
5268 
5269 		/*
5270 		 * This should not be hit, because it should only
5271 		 * be set if the iter->seq overflowed. But check it
5272 		 * anyway to be safe.
5273 		 */
5274 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5275 			iter->seq.seq.len = save_len;
5276 			break;
5277 		}
5278 
5279 		count = trace_seq_used(&iter->seq) - save_len;
5280 		if (rem < count) {
5281 			rem = 0;
5282 			iter->seq.seq.len = save_len;
5283 			break;
5284 		}
5285 
5286 		if (ret != TRACE_TYPE_NO_CONSUME)
5287 			trace_consume(iter);
5288 		rem -= count;
5289 		if (!trace_find_next_entry_inc(iter))	{
5290 			rem = 0;
5291 			iter->ent = NULL;
5292 			break;
5293 		}
5294 	}
5295 
5296 	return rem;
5297 }
5298 
5299 static ssize_t tracing_splice_read_pipe(struct file *filp,
5300 					loff_t *ppos,
5301 					struct pipe_inode_info *pipe,
5302 					size_t len,
5303 					unsigned int flags)
5304 {
5305 	struct page *pages_def[PIPE_DEF_BUFFERS];
5306 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5307 	struct trace_iterator *iter = filp->private_data;
5308 	struct splice_pipe_desc spd = {
5309 		.pages		= pages_def,
5310 		.partial	= partial_def,
5311 		.nr_pages	= 0, /* This gets updated below. */
5312 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5313 		.flags		= flags,
5314 		.ops		= &tracing_pipe_buf_ops,
5315 		.spd_release	= tracing_spd_release_pipe,
5316 	};
5317 	ssize_t ret;
5318 	size_t rem;
5319 	unsigned int i;
5320 
5321 	if (splice_grow_spd(pipe, &spd))
5322 		return -ENOMEM;
5323 
5324 	mutex_lock(&iter->mutex);
5325 
5326 	if (iter->trace->splice_read) {
5327 		ret = iter->trace->splice_read(iter, filp,
5328 					       ppos, pipe, len, flags);
5329 		if (ret)
5330 			goto out_err;
5331 	}
5332 
5333 	ret = tracing_wait_pipe(filp);
5334 	if (ret <= 0)
5335 		goto out_err;
5336 
5337 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5338 		ret = -EFAULT;
5339 		goto out_err;
5340 	}
5341 
5342 	trace_event_read_lock();
5343 	trace_access_lock(iter->cpu_file);
5344 
5345 	/* Fill as many pages as possible. */
5346 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5347 		spd.pages[i] = alloc_page(GFP_KERNEL);
5348 		if (!spd.pages[i])
5349 			break;
5350 
5351 		rem = tracing_fill_pipe_page(rem, iter);
5352 
5353 		/* Copy the data into the page, so we can start over. */
5354 		ret = trace_seq_to_buffer(&iter->seq,
5355 					  page_address(spd.pages[i]),
5356 					  trace_seq_used(&iter->seq));
5357 		if (ret < 0) {
5358 			__free_page(spd.pages[i]);
5359 			break;
5360 		}
5361 		spd.partial[i].offset = 0;
5362 		spd.partial[i].len = trace_seq_used(&iter->seq);
5363 
5364 		trace_seq_init(&iter->seq);
5365 	}
5366 
5367 	trace_access_unlock(iter->cpu_file);
5368 	trace_event_read_unlock();
5369 	mutex_unlock(&iter->mutex);
5370 
5371 	spd.nr_pages = i;
5372 
5373 	if (i)
5374 		ret = splice_to_pipe(pipe, &spd);
5375 	else
5376 		ret = 0;
5377 out:
5378 	splice_shrink_spd(&spd);
5379 	return ret;
5380 
5381 out_err:
5382 	mutex_unlock(&iter->mutex);
5383 	goto out;
5384 }
5385 
5386 static ssize_t
5387 tracing_entries_read(struct file *filp, char __user *ubuf,
5388 		     size_t cnt, loff_t *ppos)
5389 {
5390 	struct inode *inode = file_inode(filp);
5391 	struct trace_array *tr = inode->i_private;
5392 	int cpu = tracing_get_cpu(inode);
5393 	char buf[64];
5394 	int r = 0;
5395 	ssize_t ret;
5396 
5397 	mutex_lock(&trace_types_lock);
5398 
5399 	if (cpu == RING_BUFFER_ALL_CPUS) {
5400 		int cpu, buf_size_same;
5401 		unsigned long size;
5402 
5403 		size = 0;
5404 		buf_size_same = 1;
5405 		/* check if all cpu sizes are same */
5406 		for_each_tracing_cpu(cpu) {
5407 			/* fill in the size from first enabled cpu */
5408 			if (size == 0)
5409 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5410 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5411 				buf_size_same = 0;
5412 				break;
5413 			}
5414 		}
5415 
5416 		if (buf_size_same) {
5417 			if (!ring_buffer_expanded)
5418 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5419 					    size >> 10,
5420 					    trace_buf_size >> 10);
5421 			else
5422 				r = sprintf(buf, "%lu\n", size >> 10);
5423 		} else
5424 			r = sprintf(buf, "X\n");
5425 	} else
5426 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5427 
5428 	mutex_unlock(&trace_types_lock);
5429 
5430 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5431 	return ret;
5432 }
5433 
5434 static ssize_t
5435 tracing_entries_write(struct file *filp, const char __user *ubuf,
5436 		      size_t cnt, loff_t *ppos)
5437 {
5438 	struct inode *inode = file_inode(filp);
5439 	struct trace_array *tr = inode->i_private;
5440 	unsigned long val;
5441 	int ret;
5442 
5443 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5444 	if (ret)
5445 		return ret;
5446 
5447 	/* must have at least 1 entry */
5448 	if (!val)
5449 		return -EINVAL;
5450 
5451 	/* value is in KB */
5452 	val <<= 10;
5453 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5454 	if (ret < 0)
5455 		return ret;
5456 
5457 	*ppos += cnt;
5458 
5459 	return cnt;
5460 }
5461 
5462 static ssize_t
5463 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5464 				size_t cnt, loff_t *ppos)
5465 {
5466 	struct trace_array *tr = filp->private_data;
5467 	char buf[64];
5468 	int r, cpu;
5469 	unsigned long size = 0, expanded_size = 0;
5470 
5471 	mutex_lock(&trace_types_lock);
5472 	for_each_tracing_cpu(cpu) {
5473 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5474 		if (!ring_buffer_expanded)
5475 			expanded_size += trace_buf_size >> 10;
5476 	}
5477 	if (ring_buffer_expanded)
5478 		r = sprintf(buf, "%lu\n", size);
5479 	else
5480 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5481 	mutex_unlock(&trace_types_lock);
5482 
5483 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5484 }
5485 
5486 static ssize_t
5487 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5488 			  size_t cnt, loff_t *ppos)
5489 {
5490 	/*
5491 	 * There is no need to read what the user has written, this function
5492 	 * is just to make sure that there is no error when "echo" is used
5493 	 */
5494 
5495 	*ppos += cnt;
5496 
5497 	return cnt;
5498 }
5499 
5500 static int
5501 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5502 {
5503 	struct trace_array *tr = inode->i_private;
5504 
5505 	/* disable tracing ? */
5506 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5507 		tracer_tracing_off(tr);
5508 	/* resize the ring buffer to 0 */
5509 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5510 
5511 	trace_array_put(tr);
5512 
5513 	return 0;
5514 }
5515 
5516 static ssize_t
5517 tracing_mark_write(struct file *filp, const char __user *ubuf,
5518 					size_t cnt, loff_t *fpos)
5519 {
5520 	unsigned long addr = (unsigned long)ubuf;
5521 	struct trace_array *tr = filp->private_data;
5522 	struct ring_buffer_event *event;
5523 	struct ring_buffer *buffer;
5524 	struct print_entry *entry;
5525 	unsigned long irq_flags;
5526 	struct page *pages[2];
5527 	void *map_page[2];
5528 	int nr_pages = 1;
5529 	ssize_t written;
5530 	int offset;
5531 	int size;
5532 	int len;
5533 	int ret;
5534 	int i;
5535 
5536 	if (tracing_disabled)
5537 		return -EINVAL;
5538 
5539 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5540 		return -EINVAL;
5541 
5542 	if (cnt > TRACE_BUF_SIZE)
5543 		cnt = TRACE_BUF_SIZE;
5544 
5545 	/*
5546 	 * Userspace is injecting traces into the kernel trace buffer.
5547 	 * We want to be as non intrusive as possible.
5548 	 * To do so, we do not want to allocate any special buffers
5549 	 * or take any locks, but instead write the userspace data
5550 	 * straight into the ring buffer.
5551 	 *
5552 	 * First we need to pin the userspace buffer into memory,
5553 	 * which, most likely it is, because it just referenced it.
5554 	 * But there's no guarantee that it is. By using get_user_pages_fast()
5555 	 * and kmap_atomic/kunmap_atomic() we can get access to the
5556 	 * pages directly. We then write the data directly into the
5557 	 * ring buffer.
5558 	 */
5559 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5560 
5561 	/* check if we cross pages */
5562 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5563 		nr_pages = 2;
5564 
5565 	offset = addr & (PAGE_SIZE - 1);
5566 	addr &= PAGE_MASK;
5567 
5568 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5569 	if (ret < nr_pages) {
5570 		while (--ret >= 0)
5571 			put_page(pages[ret]);
5572 		written = -EFAULT;
5573 		goto out;
5574 	}
5575 
5576 	for (i = 0; i < nr_pages; i++)
5577 		map_page[i] = kmap_atomic(pages[i]);
5578 
5579 	local_save_flags(irq_flags);
5580 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5581 	buffer = tr->trace_buffer.buffer;
5582 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5583 					  irq_flags, preempt_count());
5584 	if (!event) {
5585 		/* Ring buffer disabled, return as if not open for write */
5586 		written = -EBADF;
5587 		goto out_unlock;
5588 	}
5589 
5590 	entry = ring_buffer_event_data(event);
5591 	entry->ip = _THIS_IP_;
5592 
5593 	if (nr_pages == 2) {
5594 		len = PAGE_SIZE - offset;
5595 		memcpy(&entry->buf, map_page[0] + offset, len);
5596 		memcpy(&entry->buf[len], map_page[1], cnt - len);
5597 	} else
5598 		memcpy(&entry->buf, map_page[0] + offset, cnt);
5599 
5600 	if (entry->buf[cnt - 1] != '\n') {
5601 		entry->buf[cnt] = '\n';
5602 		entry->buf[cnt + 1] = '\0';
5603 	} else
5604 		entry->buf[cnt] = '\0';
5605 
5606 	__buffer_unlock_commit(buffer, event);
5607 
5608 	written = cnt;
5609 
5610 	*fpos += written;
5611 
5612  out_unlock:
5613 	for (i = nr_pages - 1; i >= 0; i--) {
5614 		kunmap_atomic(map_page[i]);
5615 		put_page(pages[i]);
5616 	}
5617  out:
5618 	return written;
5619 }
5620 
5621 static int tracing_clock_show(struct seq_file *m, void *v)
5622 {
5623 	struct trace_array *tr = m->private;
5624 	int i;
5625 
5626 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5627 		seq_printf(m,
5628 			"%s%s%s%s", i ? " " : "",
5629 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5630 			i == tr->clock_id ? "]" : "");
5631 	seq_putc(m, '\n');
5632 
5633 	return 0;
5634 }
5635 
5636 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5637 {
5638 	int i;
5639 
5640 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5641 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5642 			break;
5643 	}
5644 	if (i == ARRAY_SIZE(trace_clocks))
5645 		return -EINVAL;
5646 
5647 	mutex_lock(&trace_types_lock);
5648 
5649 	tr->clock_id = i;
5650 
5651 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5652 
5653 	/*
5654 	 * New clock may not be consistent with the previous clock.
5655 	 * Reset the buffer so that it doesn't have incomparable timestamps.
5656 	 */
5657 	tracing_reset_online_cpus(&tr->trace_buffer);
5658 
5659 #ifdef CONFIG_TRACER_MAX_TRACE
5660 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5661 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5662 	tracing_reset_online_cpus(&tr->max_buffer);
5663 #endif
5664 
5665 	mutex_unlock(&trace_types_lock);
5666 
5667 	return 0;
5668 }
5669 
5670 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5671 				   size_t cnt, loff_t *fpos)
5672 {
5673 	struct seq_file *m = filp->private_data;
5674 	struct trace_array *tr = m->private;
5675 	char buf[64];
5676 	const char *clockstr;
5677 	int ret;
5678 
5679 	if (cnt >= sizeof(buf))
5680 		return -EINVAL;
5681 
5682 	if (copy_from_user(buf, ubuf, cnt))
5683 		return -EFAULT;
5684 
5685 	buf[cnt] = 0;
5686 
5687 	clockstr = strstrip(buf);
5688 
5689 	ret = tracing_set_clock(tr, clockstr);
5690 	if (ret)
5691 		return ret;
5692 
5693 	*fpos += cnt;
5694 
5695 	return cnt;
5696 }
5697 
5698 static int tracing_clock_open(struct inode *inode, struct file *file)
5699 {
5700 	struct trace_array *tr = inode->i_private;
5701 	int ret;
5702 
5703 	if (tracing_disabled)
5704 		return -ENODEV;
5705 
5706 	if (trace_array_get(tr))
5707 		return -ENODEV;
5708 
5709 	ret = single_open(file, tracing_clock_show, inode->i_private);
5710 	if (ret < 0)
5711 		trace_array_put(tr);
5712 
5713 	return ret;
5714 }
5715 
5716 struct ftrace_buffer_info {
5717 	struct trace_iterator	iter;
5718 	void			*spare;
5719 	unsigned int		read;
5720 };
5721 
5722 #ifdef CONFIG_TRACER_SNAPSHOT
5723 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5724 {
5725 	struct trace_array *tr = inode->i_private;
5726 	struct trace_iterator *iter;
5727 	struct seq_file *m;
5728 	int ret = 0;
5729 
5730 	if (trace_array_get(tr) < 0)
5731 		return -ENODEV;
5732 
5733 	if (file->f_mode & FMODE_READ) {
5734 		iter = __tracing_open(inode, file, true);
5735 		if (IS_ERR(iter))
5736 			ret = PTR_ERR(iter);
5737 	} else {
5738 		/* Writes still need the seq_file to hold the private data */
5739 		ret = -ENOMEM;
5740 		m = kzalloc(sizeof(*m), GFP_KERNEL);
5741 		if (!m)
5742 			goto out;
5743 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5744 		if (!iter) {
5745 			kfree(m);
5746 			goto out;
5747 		}
5748 		ret = 0;
5749 
5750 		iter->tr = tr;
5751 		iter->trace_buffer = &tr->max_buffer;
5752 		iter->cpu_file = tracing_get_cpu(inode);
5753 		m->private = iter;
5754 		file->private_data = m;
5755 	}
5756 out:
5757 	if (ret < 0)
5758 		trace_array_put(tr);
5759 
5760 	return ret;
5761 }
5762 
5763 static ssize_t
5764 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5765 		       loff_t *ppos)
5766 {
5767 	struct seq_file *m = filp->private_data;
5768 	struct trace_iterator *iter = m->private;
5769 	struct trace_array *tr = iter->tr;
5770 	unsigned long val;
5771 	int ret;
5772 
5773 	ret = tracing_update_buffers();
5774 	if (ret < 0)
5775 		return ret;
5776 
5777 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5778 	if (ret)
5779 		return ret;
5780 
5781 	mutex_lock(&trace_types_lock);
5782 
5783 	if (tr->current_trace->use_max_tr) {
5784 		ret = -EBUSY;
5785 		goto out;
5786 	}
5787 
5788 	switch (val) {
5789 	case 0:
5790 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5791 			ret = -EINVAL;
5792 			break;
5793 		}
5794 		if (tr->allocated_snapshot)
5795 			free_snapshot(tr);
5796 		break;
5797 	case 1:
5798 /* Only allow per-cpu swap if the ring buffer supports it */
5799 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5800 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5801 			ret = -EINVAL;
5802 			break;
5803 		}
5804 #endif
5805 		if (!tr->allocated_snapshot) {
5806 			ret = alloc_snapshot(tr);
5807 			if (ret < 0)
5808 				break;
5809 		}
5810 		local_irq_disable();
5811 		/* Now, we're going to swap */
5812 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5813 			update_max_tr(tr, current, smp_processor_id());
5814 		else
5815 			update_max_tr_single(tr, current, iter->cpu_file);
5816 		local_irq_enable();
5817 		break;
5818 	default:
5819 		if (tr->allocated_snapshot) {
5820 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5821 				tracing_reset_online_cpus(&tr->max_buffer);
5822 			else
5823 				tracing_reset(&tr->max_buffer, iter->cpu_file);
5824 		}
5825 		break;
5826 	}
5827 
5828 	if (ret >= 0) {
5829 		*ppos += cnt;
5830 		ret = cnt;
5831 	}
5832 out:
5833 	mutex_unlock(&trace_types_lock);
5834 	return ret;
5835 }
5836 
5837 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5838 {
5839 	struct seq_file *m = file->private_data;
5840 	int ret;
5841 
5842 	ret = tracing_release(inode, file);
5843 
5844 	if (file->f_mode & FMODE_READ)
5845 		return ret;
5846 
5847 	/* If write only, the seq_file is just a stub */
5848 	if (m)
5849 		kfree(m->private);
5850 	kfree(m);
5851 
5852 	return 0;
5853 }
5854 
5855 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5856 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5857 				    size_t count, loff_t *ppos);
5858 static int tracing_buffers_release(struct inode *inode, struct file *file);
5859 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5860 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5861 
5862 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5863 {
5864 	struct ftrace_buffer_info *info;
5865 	int ret;
5866 
5867 	ret = tracing_buffers_open(inode, filp);
5868 	if (ret < 0)
5869 		return ret;
5870 
5871 	info = filp->private_data;
5872 
5873 	if (info->iter.trace->use_max_tr) {
5874 		tracing_buffers_release(inode, filp);
5875 		return -EBUSY;
5876 	}
5877 
5878 	info->iter.snapshot = true;
5879 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5880 
5881 	return ret;
5882 }
5883 
5884 #endif /* CONFIG_TRACER_SNAPSHOT */
5885 
5886 
5887 static const struct file_operations tracing_thresh_fops = {
5888 	.open		= tracing_open_generic,
5889 	.read		= tracing_thresh_read,
5890 	.write		= tracing_thresh_write,
5891 	.llseek		= generic_file_llseek,
5892 };
5893 
5894 #ifdef CONFIG_TRACER_MAX_TRACE
5895 static const struct file_operations tracing_max_lat_fops = {
5896 	.open		= tracing_open_generic,
5897 	.read		= tracing_max_lat_read,
5898 	.write		= tracing_max_lat_write,
5899 	.llseek		= generic_file_llseek,
5900 };
5901 #endif
5902 
5903 static const struct file_operations set_tracer_fops = {
5904 	.open		= tracing_open_generic,
5905 	.read		= tracing_set_trace_read,
5906 	.write		= tracing_set_trace_write,
5907 	.llseek		= generic_file_llseek,
5908 };
5909 
5910 static const struct file_operations tracing_pipe_fops = {
5911 	.open		= tracing_open_pipe,
5912 	.poll		= tracing_poll_pipe,
5913 	.read		= tracing_read_pipe,
5914 	.splice_read	= tracing_splice_read_pipe,
5915 	.release	= tracing_release_pipe,
5916 	.llseek		= no_llseek,
5917 };
5918 
5919 static const struct file_operations tracing_entries_fops = {
5920 	.open		= tracing_open_generic_tr,
5921 	.read		= tracing_entries_read,
5922 	.write		= tracing_entries_write,
5923 	.llseek		= generic_file_llseek,
5924 	.release	= tracing_release_generic_tr,
5925 };
5926 
5927 static const struct file_operations tracing_total_entries_fops = {
5928 	.open		= tracing_open_generic_tr,
5929 	.read		= tracing_total_entries_read,
5930 	.llseek		= generic_file_llseek,
5931 	.release	= tracing_release_generic_tr,
5932 };
5933 
5934 static const struct file_operations tracing_free_buffer_fops = {
5935 	.open		= tracing_open_generic_tr,
5936 	.write		= tracing_free_buffer_write,
5937 	.release	= tracing_free_buffer_release,
5938 };
5939 
5940 static const struct file_operations tracing_mark_fops = {
5941 	.open		= tracing_open_generic_tr,
5942 	.write		= tracing_mark_write,
5943 	.llseek		= generic_file_llseek,
5944 	.release	= tracing_release_generic_tr,
5945 };
5946 
5947 static const struct file_operations trace_clock_fops = {
5948 	.open		= tracing_clock_open,
5949 	.read		= seq_read,
5950 	.llseek		= seq_lseek,
5951 	.release	= tracing_single_release_tr,
5952 	.write		= tracing_clock_write,
5953 };
5954 
5955 #ifdef CONFIG_TRACER_SNAPSHOT
5956 static const struct file_operations snapshot_fops = {
5957 	.open		= tracing_snapshot_open,
5958 	.read		= seq_read,
5959 	.write		= tracing_snapshot_write,
5960 	.llseek		= tracing_lseek,
5961 	.release	= tracing_snapshot_release,
5962 };
5963 
5964 static const struct file_operations snapshot_raw_fops = {
5965 	.open		= snapshot_raw_open,
5966 	.read		= tracing_buffers_read,
5967 	.release	= tracing_buffers_release,
5968 	.splice_read	= tracing_buffers_splice_read,
5969 	.llseek		= no_llseek,
5970 };
5971 
5972 #endif /* CONFIG_TRACER_SNAPSHOT */
5973 
5974 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5975 {
5976 	struct trace_array *tr = inode->i_private;
5977 	struct ftrace_buffer_info *info;
5978 	int ret;
5979 
5980 	if (tracing_disabled)
5981 		return -ENODEV;
5982 
5983 	if (trace_array_get(tr) < 0)
5984 		return -ENODEV;
5985 
5986 	info = kzalloc(sizeof(*info), GFP_KERNEL);
5987 	if (!info) {
5988 		trace_array_put(tr);
5989 		return -ENOMEM;
5990 	}
5991 
5992 	mutex_lock(&trace_types_lock);
5993 
5994 	info->iter.tr		= tr;
5995 	info->iter.cpu_file	= tracing_get_cpu(inode);
5996 	info->iter.trace	= tr->current_trace;
5997 	info->iter.trace_buffer = &tr->trace_buffer;
5998 	info->spare		= NULL;
5999 	/* Force reading ring buffer for first read */
6000 	info->read		= (unsigned int)-1;
6001 
6002 	filp->private_data = info;
6003 
6004 	tr->current_trace->ref++;
6005 
6006 	mutex_unlock(&trace_types_lock);
6007 
6008 	ret = nonseekable_open(inode, filp);
6009 	if (ret < 0)
6010 		trace_array_put(tr);
6011 
6012 	return ret;
6013 }
6014 
6015 static unsigned int
6016 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6017 {
6018 	struct ftrace_buffer_info *info = filp->private_data;
6019 	struct trace_iterator *iter = &info->iter;
6020 
6021 	return trace_poll(iter, filp, poll_table);
6022 }
6023 
6024 static ssize_t
6025 tracing_buffers_read(struct file *filp, char __user *ubuf,
6026 		     size_t count, loff_t *ppos)
6027 {
6028 	struct ftrace_buffer_info *info = filp->private_data;
6029 	struct trace_iterator *iter = &info->iter;
6030 	ssize_t ret;
6031 	ssize_t size;
6032 
6033 	if (!count)
6034 		return 0;
6035 
6036 #ifdef CONFIG_TRACER_MAX_TRACE
6037 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6038 		return -EBUSY;
6039 #endif
6040 
6041 	if (!info->spare)
6042 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6043 							  iter->cpu_file);
6044 	if (!info->spare)
6045 		return -ENOMEM;
6046 
6047 	/* Do we have previous read data to read? */
6048 	if (info->read < PAGE_SIZE)
6049 		goto read;
6050 
6051  again:
6052 	trace_access_lock(iter->cpu_file);
6053 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6054 				    &info->spare,
6055 				    count,
6056 				    iter->cpu_file, 0);
6057 	trace_access_unlock(iter->cpu_file);
6058 
6059 	if (ret < 0) {
6060 		if (trace_empty(iter)) {
6061 			if ((filp->f_flags & O_NONBLOCK))
6062 				return -EAGAIN;
6063 
6064 			ret = wait_on_pipe(iter, false);
6065 			if (ret)
6066 				return ret;
6067 
6068 			goto again;
6069 		}
6070 		return 0;
6071 	}
6072 
6073 	info->read = 0;
6074  read:
6075 	size = PAGE_SIZE - info->read;
6076 	if (size > count)
6077 		size = count;
6078 
6079 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6080 	if (ret == size)
6081 		return -EFAULT;
6082 
6083 	size -= ret;
6084 
6085 	*ppos += size;
6086 	info->read += size;
6087 
6088 	return size;
6089 }
6090 
6091 static int tracing_buffers_release(struct inode *inode, struct file *file)
6092 {
6093 	struct ftrace_buffer_info *info = file->private_data;
6094 	struct trace_iterator *iter = &info->iter;
6095 
6096 	mutex_lock(&trace_types_lock);
6097 
6098 	iter->tr->current_trace->ref--;
6099 
6100 	__trace_array_put(iter->tr);
6101 
6102 	if (info->spare)
6103 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6104 	kfree(info);
6105 
6106 	mutex_unlock(&trace_types_lock);
6107 
6108 	return 0;
6109 }
6110 
6111 struct buffer_ref {
6112 	struct ring_buffer	*buffer;
6113 	void			*page;
6114 	int			ref;
6115 };
6116 
6117 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6118 				    struct pipe_buffer *buf)
6119 {
6120 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6121 
6122 	if (--ref->ref)
6123 		return;
6124 
6125 	ring_buffer_free_read_page(ref->buffer, ref->page);
6126 	kfree(ref);
6127 	buf->private = 0;
6128 }
6129 
6130 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6131 				struct pipe_buffer *buf)
6132 {
6133 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6134 
6135 	ref->ref++;
6136 }
6137 
6138 /* Pipe buffer operations for a buffer. */
6139 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6140 	.can_merge		= 0,
6141 	.confirm		= generic_pipe_buf_confirm,
6142 	.release		= buffer_pipe_buf_release,
6143 	.steal			= generic_pipe_buf_steal,
6144 	.get			= buffer_pipe_buf_get,
6145 };
6146 
6147 /*
6148  * Callback from splice_to_pipe(), if we need to release some pages
6149  * at the end of the spd in case we error'ed out in filling the pipe.
6150  */
6151 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6152 {
6153 	struct buffer_ref *ref =
6154 		(struct buffer_ref *)spd->partial[i].private;
6155 
6156 	if (--ref->ref)
6157 		return;
6158 
6159 	ring_buffer_free_read_page(ref->buffer, ref->page);
6160 	kfree(ref);
6161 	spd->partial[i].private = 0;
6162 }
6163 
6164 static ssize_t
6165 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6166 			    struct pipe_inode_info *pipe, size_t len,
6167 			    unsigned int flags)
6168 {
6169 	struct ftrace_buffer_info *info = file->private_data;
6170 	struct trace_iterator *iter = &info->iter;
6171 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6172 	struct page *pages_def[PIPE_DEF_BUFFERS];
6173 	struct splice_pipe_desc spd = {
6174 		.pages		= pages_def,
6175 		.partial	= partial_def,
6176 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6177 		.flags		= flags,
6178 		.ops		= &buffer_pipe_buf_ops,
6179 		.spd_release	= buffer_spd_release,
6180 	};
6181 	struct buffer_ref *ref;
6182 	int entries, size, i;
6183 	ssize_t ret = 0;
6184 
6185 #ifdef CONFIG_TRACER_MAX_TRACE
6186 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6187 		return -EBUSY;
6188 #endif
6189 
6190 	if (splice_grow_spd(pipe, &spd))
6191 		return -ENOMEM;
6192 
6193 	if (*ppos & (PAGE_SIZE - 1))
6194 		return -EINVAL;
6195 
6196 	if (len & (PAGE_SIZE - 1)) {
6197 		if (len < PAGE_SIZE)
6198 			return -EINVAL;
6199 		len &= PAGE_MASK;
6200 	}
6201 
6202  again:
6203 	trace_access_lock(iter->cpu_file);
6204 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6205 
6206 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6207 		struct page *page;
6208 		int r;
6209 
6210 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6211 		if (!ref) {
6212 			ret = -ENOMEM;
6213 			break;
6214 		}
6215 
6216 		ref->ref = 1;
6217 		ref->buffer = iter->trace_buffer->buffer;
6218 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6219 		if (!ref->page) {
6220 			ret = -ENOMEM;
6221 			kfree(ref);
6222 			break;
6223 		}
6224 
6225 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6226 					  len, iter->cpu_file, 1);
6227 		if (r < 0) {
6228 			ring_buffer_free_read_page(ref->buffer, ref->page);
6229 			kfree(ref);
6230 			break;
6231 		}
6232 
6233 		/*
6234 		 * zero out any left over data, this is going to
6235 		 * user land.
6236 		 */
6237 		size = ring_buffer_page_len(ref->page);
6238 		if (size < PAGE_SIZE)
6239 			memset(ref->page + size, 0, PAGE_SIZE - size);
6240 
6241 		page = virt_to_page(ref->page);
6242 
6243 		spd.pages[i] = page;
6244 		spd.partial[i].len = PAGE_SIZE;
6245 		spd.partial[i].offset = 0;
6246 		spd.partial[i].private = (unsigned long)ref;
6247 		spd.nr_pages++;
6248 		*ppos += PAGE_SIZE;
6249 
6250 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6251 	}
6252 
6253 	trace_access_unlock(iter->cpu_file);
6254 	spd.nr_pages = i;
6255 
6256 	/* did we read anything? */
6257 	if (!spd.nr_pages) {
6258 		if (ret)
6259 			return ret;
6260 
6261 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6262 			return -EAGAIN;
6263 
6264 		ret = wait_on_pipe(iter, true);
6265 		if (ret)
6266 			return ret;
6267 
6268 		goto again;
6269 	}
6270 
6271 	ret = splice_to_pipe(pipe, &spd);
6272 	splice_shrink_spd(&spd);
6273 
6274 	return ret;
6275 }
6276 
6277 static const struct file_operations tracing_buffers_fops = {
6278 	.open		= tracing_buffers_open,
6279 	.read		= tracing_buffers_read,
6280 	.poll		= tracing_buffers_poll,
6281 	.release	= tracing_buffers_release,
6282 	.splice_read	= tracing_buffers_splice_read,
6283 	.llseek		= no_llseek,
6284 };
6285 
6286 static ssize_t
6287 tracing_stats_read(struct file *filp, char __user *ubuf,
6288 		   size_t count, loff_t *ppos)
6289 {
6290 	struct inode *inode = file_inode(filp);
6291 	struct trace_array *tr = inode->i_private;
6292 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6293 	int cpu = tracing_get_cpu(inode);
6294 	struct trace_seq *s;
6295 	unsigned long cnt;
6296 	unsigned long long t;
6297 	unsigned long usec_rem;
6298 
6299 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6300 	if (!s)
6301 		return -ENOMEM;
6302 
6303 	trace_seq_init(s);
6304 
6305 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6306 	trace_seq_printf(s, "entries: %ld\n", cnt);
6307 
6308 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6309 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6310 
6311 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6312 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6313 
6314 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6315 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6316 
6317 	if (trace_clocks[tr->clock_id].in_ns) {
6318 		/* local or global for trace_clock */
6319 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6320 		usec_rem = do_div(t, USEC_PER_SEC);
6321 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6322 								t, usec_rem);
6323 
6324 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6325 		usec_rem = do_div(t, USEC_PER_SEC);
6326 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6327 	} else {
6328 		/* counter or tsc mode for trace_clock */
6329 		trace_seq_printf(s, "oldest event ts: %llu\n",
6330 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6331 
6332 		trace_seq_printf(s, "now ts: %llu\n",
6333 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6334 	}
6335 
6336 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6337 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6338 
6339 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6340 	trace_seq_printf(s, "read events: %ld\n", cnt);
6341 
6342 	count = simple_read_from_buffer(ubuf, count, ppos,
6343 					s->buffer, trace_seq_used(s));
6344 
6345 	kfree(s);
6346 
6347 	return count;
6348 }
6349 
6350 static const struct file_operations tracing_stats_fops = {
6351 	.open		= tracing_open_generic_tr,
6352 	.read		= tracing_stats_read,
6353 	.llseek		= generic_file_llseek,
6354 	.release	= tracing_release_generic_tr,
6355 };
6356 
6357 #ifdef CONFIG_DYNAMIC_FTRACE
6358 
6359 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6360 {
6361 	return 0;
6362 }
6363 
6364 static ssize_t
6365 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6366 		  size_t cnt, loff_t *ppos)
6367 {
6368 	static char ftrace_dyn_info_buffer[1024];
6369 	static DEFINE_MUTEX(dyn_info_mutex);
6370 	unsigned long *p = filp->private_data;
6371 	char *buf = ftrace_dyn_info_buffer;
6372 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6373 	int r;
6374 
6375 	mutex_lock(&dyn_info_mutex);
6376 	r = sprintf(buf, "%ld ", *p);
6377 
6378 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6379 	buf[r++] = '\n';
6380 
6381 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6382 
6383 	mutex_unlock(&dyn_info_mutex);
6384 
6385 	return r;
6386 }
6387 
6388 static const struct file_operations tracing_dyn_info_fops = {
6389 	.open		= tracing_open_generic,
6390 	.read		= tracing_read_dyn_info,
6391 	.llseek		= generic_file_llseek,
6392 };
6393 #endif /* CONFIG_DYNAMIC_FTRACE */
6394 
6395 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6396 static void
6397 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6398 {
6399 	tracing_snapshot();
6400 }
6401 
6402 static void
6403 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6404 {
6405 	unsigned long *count = (long *)data;
6406 
6407 	if (!*count)
6408 		return;
6409 
6410 	if (*count != -1)
6411 		(*count)--;
6412 
6413 	tracing_snapshot();
6414 }
6415 
6416 static int
6417 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6418 		      struct ftrace_probe_ops *ops, void *data)
6419 {
6420 	long count = (long)data;
6421 
6422 	seq_printf(m, "%ps:", (void *)ip);
6423 
6424 	seq_puts(m, "snapshot");
6425 
6426 	if (count == -1)
6427 		seq_puts(m, ":unlimited\n");
6428 	else
6429 		seq_printf(m, ":count=%ld\n", count);
6430 
6431 	return 0;
6432 }
6433 
6434 static struct ftrace_probe_ops snapshot_probe_ops = {
6435 	.func			= ftrace_snapshot,
6436 	.print			= ftrace_snapshot_print,
6437 };
6438 
6439 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6440 	.func			= ftrace_count_snapshot,
6441 	.print			= ftrace_snapshot_print,
6442 };
6443 
6444 static int
6445 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6446 			       char *glob, char *cmd, char *param, int enable)
6447 {
6448 	struct ftrace_probe_ops *ops;
6449 	void *count = (void *)-1;
6450 	char *number;
6451 	int ret;
6452 
6453 	/* hash funcs only work with set_ftrace_filter */
6454 	if (!enable)
6455 		return -EINVAL;
6456 
6457 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6458 
6459 	if (glob[0] == '!') {
6460 		unregister_ftrace_function_probe_func(glob+1, ops);
6461 		return 0;
6462 	}
6463 
6464 	if (!param)
6465 		goto out_reg;
6466 
6467 	number = strsep(&param, ":");
6468 
6469 	if (!strlen(number))
6470 		goto out_reg;
6471 
6472 	/*
6473 	 * We use the callback data field (which is a pointer)
6474 	 * as our counter.
6475 	 */
6476 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6477 	if (ret)
6478 		return ret;
6479 
6480  out_reg:
6481 	ret = register_ftrace_function_probe(glob, ops, count);
6482 
6483 	if (ret >= 0)
6484 		alloc_snapshot(&global_trace);
6485 
6486 	return ret < 0 ? ret : 0;
6487 }
6488 
6489 static struct ftrace_func_command ftrace_snapshot_cmd = {
6490 	.name			= "snapshot",
6491 	.func			= ftrace_trace_snapshot_callback,
6492 };
6493 
6494 static __init int register_snapshot_cmd(void)
6495 {
6496 	return register_ftrace_command(&ftrace_snapshot_cmd);
6497 }
6498 #else
6499 static inline __init int register_snapshot_cmd(void) { return 0; }
6500 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6501 
6502 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6503 {
6504 	if (WARN_ON(!tr->dir))
6505 		return ERR_PTR(-ENODEV);
6506 
6507 	/* Top directory uses NULL as the parent */
6508 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6509 		return NULL;
6510 
6511 	/* All sub buffers have a descriptor */
6512 	return tr->dir;
6513 }
6514 
6515 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6516 {
6517 	struct dentry *d_tracer;
6518 
6519 	if (tr->percpu_dir)
6520 		return tr->percpu_dir;
6521 
6522 	d_tracer = tracing_get_dentry(tr);
6523 	if (IS_ERR(d_tracer))
6524 		return NULL;
6525 
6526 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6527 
6528 	WARN_ONCE(!tr->percpu_dir,
6529 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6530 
6531 	return tr->percpu_dir;
6532 }
6533 
6534 static struct dentry *
6535 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6536 		      void *data, long cpu, const struct file_operations *fops)
6537 {
6538 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6539 
6540 	if (ret) /* See tracing_get_cpu() */
6541 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6542 	return ret;
6543 }
6544 
6545 static void
6546 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6547 {
6548 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6549 	struct dentry *d_cpu;
6550 	char cpu_dir[30]; /* 30 characters should be more than enough */
6551 
6552 	if (!d_percpu)
6553 		return;
6554 
6555 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6556 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6557 	if (!d_cpu) {
6558 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6559 		return;
6560 	}
6561 
6562 	/* per cpu trace_pipe */
6563 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6564 				tr, cpu, &tracing_pipe_fops);
6565 
6566 	/* per cpu trace */
6567 	trace_create_cpu_file("trace", 0644, d_cpu,
6568 				tr, cpu, &tracing_fops);
6569 
6570 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6571 				tr, cpu, &tracing_buffers_fops);
6572 
6573 	trace_create_cpu_file("stats", 0444, d_cpu,
6574 				tr, cpu, &tracing_stats_fops);
6575 
6576 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6577 				tr, cpu, &tracing_entries_fops);
6578 
6579 #ifdef CONFIG_TRACER_SNAPSHOT
6580 	trace_create_cpu_file("snapshot", 0644, d_cpu,
6581 				tr, cpu, &snapshot_fops);
6582 
6583 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6584 				tr, cpu, &snapshot_raw_fops);
6585 #endif
6586 }
6587 
6588 #ifdef CONFIG_FTRACE_SELFTEST
6589 /* Let selftest have access to static functions in this file */
6590 #include "trace_selftest.c"
6591 #endif
6592 
6593 static ssize_t
6594 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6595 			loff_t *ppos)
6596 {
6597 	struct trace_option_dentry *topt = filp->private_data;
6598 	char *buf;
6599 
6600 	if (topt->flags->val & topt->opt->bit)
6601 		buf = "1\n";
6602 	else
6603 		buf = "0\n";
6604 
6605 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6606 }
6607 
6608 static ssize_t
6609 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6610 			 loff_t *ppos)
6611 {
6612 	struct trace_option_dentry *topt = filp->private_data;
6613 	unsigned long val;
6614 	int ret;
6615 
6616 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6617 	if (ret)
6618 		return ret;
6619 
6620 	if (val != 0 && val != 1)
6621 		return -EINVAL;
6622 
6623 	if (!!(topt->flags->val & topt->opt->bit) != val) {
6624 		mutex_lock(&trace_types_lock);
6625 		ret = __set_tracer_option(topt->tr, topt->flags,
6626 					  topt->opt, !val);
6627 		mutex_unlock(&trace_types_lock);
6628 		if (ret)
6629 			return ret;
6630 	}
6631 
6632 	*ppos += cnt;
6633 
6634 	return cnt;
6635 }
6636 
6637 
6638 static const struct file_operations trace_options_fops = {
6639 	.open = tracing_open_generic,
6640 	.read = trace_options_read,
6641 	.write = trace_options_write,
6642 	.llseek	= generic_file_llseek,
6643 };
6644 
6645 /*
6646  * In order to pass in both the trace_array descriptor as well as the index
6647  * to the flag that the trace option file represents, the trace_array
6648  * has a character array of trace_flags_index[], which holds the index
6649  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6650  * The address of this character array is passed to the flag option file
6651  * read/write callbacks.
6652  *
6653  * In order to extract both the index and the trace_array descriptor,
6654  * get_tr_index() uses the following algorithm.
6655  *
6656  *   idx = *ptr;
6657  *
6658  * As the pointer itself contains the address of the index (remember
6659  * index[1] == 1).
6660  *
6661  * Then to get the trace_array descriptor, by subtracting that index
6662  * from the ptr, we get to the start of the index itself.
6663  *
6664  *   ptr - idx == &index[0]
6665  *
6666  * Then a simple container_of() from that pointer gets us to the
6667  * trace_array descriptor.
6668  */
6669 static void get_tr_index(void *data, struct trace_array **ptr,
6670 			 unsigned int *pindex)
6671 {
6672 	*pindex = *(unsigned char *)data;
6673 
6674 	*ptr = container_of(data - *pindex, struct trace_array,
6675 			    trace_flags_index);
6676 }
6677 
6678 static ssize_t
6679 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6680 			loff_t *ppos)
6681 {
6682 	void *tr_index = filp->private_data;
6683 	struct trace_array *tr;
6684 	unsigned int index;
6685 	char *buf;
6686 
6687 	get_tr_index(tr_index, &tr, &index);
6688 
6689 	if (tr->trace_flags & (1 << index))
6690 		buf = "1\n";
6691 	else
6692 		buf = "0\n";
6693 
6694 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6695 }
6696 
6697 static ssize_t
6698 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6699 			 loff_t *ppos)
6700 {
6701 	void *tr_index = filp->private_data;
6702 	struct trace_array *tr;
6703 	unsigned int index;
6704 	unsigned long val;
6705 	int ret;
6706 
6707 	get_tr_index(tr_index, &tr, &index);
6708 
6709 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6710 	if (ret)
6711 		return ret;
6712 
6713 	if (val != 0 && val != 1)
6714 		return -EINVAL;
6715 
6716 	mutex_lock(&trace_types_lock);
6717 	ret = set_tracer_flag(tr, 1 << index, val);
6718 	mutex_unlock(&trace_types_lock);
6719 
6720 	if (ret < 0)
6721 		return ret;
6722 
6723 	*ppos += cnt;
6724 
6725 	return cnt;
6726 }
6727 
6728 static const struct file_operations trace_options_core_fops = {
6729 	.open = tracing_open_generic,
6730 	.read = trace_options_core_read,
6731 	.write = trace_options_core_write,
6732 	.llseek = generic_file_llseek,
6733 };
6734 
6735 struct dentry *trace_create_file(const char *name,
6736 				 umode_t mode,
6737 				 struct dentry *parent,
6738 				 void *data,
6739 				 const struct file_operations *fops)
6740 {
6741 	struct dentry *ret;
6742 
6743 	ret = tracefs_create_file(name, mode, parent, data, fops);
6744 	if (!ret)
6745 		pr_warn("Could not create tracefs '%s' entry\n", name);
6746 
6747 	return ret;
6748 }
6749 
6750 
6751 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6752 {
6753 	struct dentry *d_tracer;
6754 
6755 	if (tr->options)
6756 		return tr->options;
6757 
6758 	d_tracer = tracing_get_dentry(tr);
6759 	if (IS_ERR(d_tracer))
6760 		return NULL;
6761 
6762 	tr->options = tracefs_create_dir("options", d_tracer);
6763 	if (!tr->options) {
6764 		pr_warn("Could not create tracefs directory 'options'\n");
6765 		return NULL;
6766 	}
6767 
6768 	return tr->options;
6769 }
6770 
6771 static void
6772 create_trace_option_file(struct trace_array *tr,
6773 			 struct trace_option_dentry *topt,
6774 			 struct tracer_flags *flags,
6775 			 struct tracer_opt *opt)
6776 {
6777 	struct dentry *t_options;
6778 
6779 	t_options = trace_options_init_dentry(tr);
6780 	if (!t_options)
6781 		return;
6782 
6783 	topt->flags = flags;
6784 	topt->opt = opt;
6785 	topt->tr = tr;
6786 
6787 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6788 				    &trace_options_fops);
6789 
6790 }
6791 
6792 static void
6793 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6794 {
6795 	struct trace_option_dentry *topts;
6796 	struct trace_options *tr_topts;
6797 	struct tracer_flags *flags;
6798 	struct tracer_opt *opts;
6799 	int cnt;
6800 	int i;
6801 
6802 	if (!tracer)
6803 		return;
6804 
6805 	flags = tracer->flags;
6806 
6807 	if (!flags || !flags->opts)
6808 		return;
6809 
6810 	/*
6811 	 * If this is an instance, only create flags for tracers
6812 	 * the instance may have.
6813 	 */
6814 	if (!trace_ok_for_array(tracer, tr))
6815 		return;
6816 
6817 	for (i = 0; i < tr->nr_topts; i++) {
6818 		/* Make sure there's no duplicate flags. */
6819 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6820 			return;
6821 	}
6822 
6823 	opts = flags->opts;
6824 
6825 	for (cnt = 0; opts[cnt].name; cnt++)
6826 		;
6827 
6828 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6829 	if (!topts)
6830 		return;
6831 
6832 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6833 			    GFP_KERNEL);
6834 	if (!tr_topts) {
6835 		kfree(topts);
6836 		return;
6837 	}
6838 
6839 	tr->topts = tr_topts;
6840 	tr->topts[tr->nr_topts].tracer = tracer;
6841 	tr->topts[tr->nr_topts].topts = topts;
6842 	tr->nr_topts++;
6843 
6844 	for (cnt = 0; opts[cnt].name; cnt++) {
6845 		create_trace_option_file(tr, &topts[cnt], flags,
6846 					 &opts[cnt]);
6847 		WARN_ONCE(topts[cnt].entry == NULL,
6848 			  "Failed to create trace option: %s",
6849 			  opts[cnt].name);
6850 	}
6851 }
6852 
6853 static struct dentry *
6854 create_trace_option_core_file(struct trace_array *tr,
6855 			      const char *option, long index)
6856 {
6857 	struct dentry *t_options;
6858 
6859 	t_options = trace_options_init_dentry(tr);
6860 	if (!t_options)
6861 		return NULL;
6862 
6863 	return trace_create_file(option, 0644, t_options,
6864 				 (void *)&tr->trace_flags_index[index],
6865 				 &trace_options_core_fops);
6866 }
6867 
6868 static void create_trace_options_dir(struct trace_array *tr)
6869 {
6870 	struct dentry *t_options;
6871 	bool top_level = tr == &global_trace;
6872 	int i;
6873 
6874 	t_options = trace_options_init_dentry(tr);
6875 	if (!t_options)
6876 		return;
6877 
6878 	for (i = 0; trace_options[i]; i++) {
6879 		if (top_level ||
6880 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6881 			create_trace_option_core_file(tr, trace_options[i], i);
6882 	}
6883 }
6884 
6885 static ssize_t
6886 rb_simple_read(struct file *filp, char __user *ubuf,
6887 	       size_t cnt, loff_t *ppos)
6888 {
6889 	struct trace_array *tr = filp->private_data;
6890 	char buf[64];
6891 	int r;
6892 
6893 	r = tracer_tracing_is_on(tr);
6894 	r = sprintf(buf, "%d\n", r);
6895 
6896 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6897 }
6898 
6899 static ssize_t
6900 rb_simple_write(struct file *filp, const char __user *ubuf,
6901 		size_t cnt, loff_t *ppos)
6902 {
6903 	struct trace_array *tr = filp->private_data;
6904 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6905 	unsigned long val;
6906 	int ret;
6907 
6908 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6909 	if (ret)
6910 		return ret;
6911 
6912 	if (buffer) {
6913 		mutex_lock(&trace_types_lock);
6914 		if (val) {
6915 			tracer_tracing_on(tr);
6916 			if (tr->current_trace->start)
6917 				tr->current_trace->start(tr);
6918 		} else {
6919 			tracer_tracing_off(tr);
6920 			if (tr->current_trace->stop)
6921 				tr->current_trace->stop(tr);
6922 		}
6923 		mutex_unlock(&trace_types_lock);
6924 	}
6925 
6926 	(*ppos)++;
6927 
6928 	return cnt;
6929 }
6930 
6931 static const struct file_operations rb_simple_fops = {
6932 	.open		= tracing_open_generic_tr,
6933 	.read		= rb_simple_read,
6934 	.write		= rb_simple_write,
6935 	.release	= tracing_release_generic_tr,
6936 	.llseek		= default_llseek,
6937 };
6938 
6939 struct dentry *trace_instance_dir;
6940 
6941 static void
6942 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6943 
6944 static int
6945 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6946 {
6947 	enum ring_buffer_flags rb_flags;
6948 
6949 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6950 
6951 	buf->tr = tr;
6952 
6953 	buf->buffer = ring_buffer_alloc(size, rb_flags);
6954 	if (!buf->buffer)
6955 		return -ENOMEM;
6956 
6957 	buf->data = alloc_percpu(struct trace_array_cpu);
6958 	if (!buf->data) {
6959 		ring_buffer_free(buf->buffer);
6960 		return -ENOMEM;
6961 	}
6962 
6963 	/* Allocate the first page for all buffers */
6964 	set_buffer_entries(&tr->trace_buffer,
6965 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6966 
6967 	return 0;
6968 }
6969 
6970 static int allocate_trace_buffers(struct trace_array *tr, int size)
6971 {
6972 	int ret;
6973 
6974 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6975 	if (ret)
6976 		return ret;
6977 
6978 #ifdef CONFIG_TRACER_MAX_TRACE
6979 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6980 				    allocate_snapshot ? size : 1);
6981 	if (WARN_ON(ret)) {
6982 		ring_buffer_free(tr->trace_buffer.buffer);
6983 		free_percpu(tr->trace_buffer.data);
6984 		return -ENOMEM;
6985 	}
6986 	tr->allocated_snapshot = allocate_snapshot;
6987 
6988 	/*
6989 	 * Only the top level trace array gets its snapshot allocated
6990 	 * from the kernel command line.
6991 	 */
6992 	allocate_snapshot = false;
6993 #endif
6994 	return 0;
6995 }
6996 
6997 static void free_trace_buffer(struct trace_buffer *buf)
6998 {
6999 	if (buf->buffer) {
7000 		ring_buffer_free(buf->buffer);
7001 		buf->buffer = NULL;
7002 		free_percpu(buf->data);
7003 		buf->data = NULL;
7004 	}
7005 }
7006 
7007 static void free_trace_buffers(struct trace_array *tr)
7008 {
7009 	if (!tr)
7010 		return;
7011 
7012 	free_trace_buffer(&tr->trace_buffer);
7013 
7014 #ifdef CONFIG_TRACER_MAX_TRACE
7015 	free_trace_buffer(&tr->max_buffer);
7016 #endif
7017 }
7018 
7019 static void init_trace_flags_index(struct trace_array *tr)
7020 {
7021 	int i;
7022 
7023 	/* Used by the trace options files */
7024 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7025 		tr->trace_flags_index[i] = i;
7026 }
7027 
7028 static void __update_tracer_options(struct trace_array *tr)
7029 {
7030 	struct tracer *t;
7031 
7032 	for (t = trace_types; t; t = t->next)
7033 		add_tracer_options(tr, t);
7034 }
7035 
7036 static void update_tracer_options(struct trace_array *tr)
7037 {
7038 	mutex_lock(&trace_types_lock);
7039 	__update_tracer_options(tr);
7040 	mutex_unlock(&trace_types_lock);
7041 }
7042 
7043 static int instance_mkdir(const char *name)
7044 {
7045 	struct trace_array *tr;
7046 	int ret;
7047 
7048 	mutex_lock(&trace_types_lock);
7049 
7050 	ret = -EEXIST;
7051 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7052 		if (tr->name && strcmp(tr->name, name) == 0)
7053 			goto out_unlock;
7054 	}
7055 
7056 	ret = -ENOMEM;
7057 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7058 	if (!tr)
7059 		goto out_unlock;
7060 
7061 	tr->name = kstrdup(name, GFP_KERNEL);
7062 	if (!tr->name)
7063 		goto out_free_tr;
7064 
7065 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7066 		goto out_free_tr;
7067 
7068 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7069 
7070 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7071 
7072 	raw_spin_lock_init(&tr->start_lock);
7073 
7074 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7075 
7076 	tr->current_trace = &nop_trace;
7077 
7078 	INIT_LIST_HEAD(&tr->systems);
7079 	INIT_LIST_HEAD(&tr->events);
7080 
7081 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7082 		goto out_free_tr;
7083 
7084 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7085 	if (!tr->dir)
7086 		goto out_free_tr;
7087 
7088 	ret = event_trace_add_tracer(tr->dir, tr);
7089 	if (ret) {
7090 		tracefs_remove_recursive(tr->dir);
7091 		goto out_free_tr;
7092 	}
7093 
7094 	init_tracer_tracefs(tr, tr->dir);
7095 	init_trace_flags_index(tr);
7096 	__update_tracer_options(tr);
7097 
7098 	list_add(&tr->list, &ftrace_trace_arrays);
7099 
7100 	mutex_unlock(&trace_types_lock);
7101 
7102 	return 0;
7103 
7104  out_free_tr:
7105 	free_trace_buffers(tr);
7106 	free_cpumask_var(tr->tracing_cpumask);
7107 	kfree(tr->name);
7108 	kfree(tr);
7109 
7110  out_unlock:
7111 	mutex_unlock(&trace_types_lock);
7112 
7113 	return ret;
7114 
7115 }
7116 
7117 static int instance_rmdir(const char *name)
7118 {
7119 	struct trace_array *tr;
7120 	int found = 0;
7121 	int ret;
7122 	int i;
7123 
7124 	mutex_lock(&trace_types_lock);
7125 
7126 	ret = -ENODEV;
7127 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7128 		if (tr->name && strcmp(tr->name, name) == 0) {
7129 			found = 1;
7130 			break;
7131 		}
7132 	}
7133 	if (!found)
7134 		goto out_unlock;
7135 
7136 	ret = -EBUSY;
7137 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7138 		goto out_unlock;
7139 
7140 	list_del(&tr->list);
7141 
7142 	/* Disable all the flags that were enabled coming in */
7143 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7144 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7145 			set_tracer_flag(tr, 1 << i, 0);
7146 	}
7147 
7148 	tracing_set_nop(tr);
7149 	event_trace_del_tracer(tr);
7150 	ftrace_destroy_function_files(tr);
7151 	tracefs_remove_recursive(tr->dir);
7152 	free_trace_buffers(tr);
7153 
7154 	for (i = 0; i < tr->nr_topts; i++) {
7155 		kfree(tr->topts[i].topts);
7156 	}
7157 	kfree(tr->topts);
7158 
7159 	kfree(tr->name);
7160 	kfree(tr);
7161 
7162 	ret = 0;
7163 
7164  out_unlock:
7165 	mutex_unlock(&trace_types_lock);
7166 
7167 	return ret;
7168 }
7169 
7170 static __init void create_trace_instances(struct dentry *d_tracer)
7171 {
7172 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7173 							 instance_mkdir,
7174 							 instance_rmdir);
7175 	if (WARN_ON(!trace_instance_dir))
7176 		return;
7177 }
7178 
7179 static void
7180 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7181 {
7182 	int cpu;
7183 
7184 	trace_create_file("available_tracers", 0444, d_tracer,
7185 			tr, &show_traces_fops);
7186 
7187 	trace_create_file("current_tracer", 0644, d_tracer,
7188 			tr, &set_tracer_fops);
7189 
7190 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7191 			  tr, &tracing_cpumask_fops);
7192 
7193 	trace_create_file("trace_options", 0644, d_tracer,
7194 			  tr, &tracing_iter_fops);
7195 
7196 	trace_create_file("trace", 0644, d_tracer,
7197 			  tr, &tracing_fops);
7198 
7199 	trace_create_file("trace_pipe", 0444, d_tracer,
7200 			  tr, &tracing_pipe_fops);
7201 
7202 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7203 			  tr, &tracing_entries_fops);
7204 
7205 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7206 			  tr, &tracing_total_entries_fops);
7207 
7208 	trace_create_file("free_buffer", 0200, d_tracer,
7209 			  tr, &tracing_free_buffer_fops);
7210 
7211 	trace_create_file("trace_marker", 0220, d_tracer,
7212 			  tr, &tracing_mark_fops);
7213 
7214 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7215 			  &trace_clock_fops);
7216 
7217 	trace_create_file("tracing_on", 0644, d_tracer,
7218 			  tr, &rb_simple_fops);
7219 
7220 	create_trace_options_dir(tr);
7221 
7222 #ifdef CONFIG_TRACER_MAX_TRACE
7223 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7224 			&tr->max_latency, &tracing_max_lat_fops);
7225 #endif
7226 
7227 	if (ftrace_create_function_files(tr, d_tracer))
7228 		WARN(1, "Could not allocate function filter files");
7229 
7230 #ifdef CONFIG_TRACER_SNAPSHOT
7231 	trace_create_file("snapshot", 0644, d_tracer,
7232 			  tr, &snapshot_fops);
7233 #endif
7234 
7235 	for_each_tracing_cpu(cpu)
7236 		tracing_init_tracefs_percpu(tr, cpu);
7237 
7238 	ftrace_init_tracefs(tr, d_tracer);
7239 }
7240 
7241 static struct vfsmount *trace_automount(void *ingore)
7242 {
7243 	struct vfsmount *mnt;
7244 	struct file_system_type *type;
7245 
7246 	/*
7247 	 * To maintain backward compatibility for tools that mount
7248 	 * debugfs to get to the tracing facility, tracefs is automatically
7249 	 * mounted to the debugfs/tracing directory.
7250 	 */
7251 	type = get_fs_type("tracefs");
7252 	if (!type)
7253 		return NULL;
7254 	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7255 	put_filesystem(type);
7256 	if (IS_ERR(mnt))
7257 		return NULL;
7258 	mntget(mnt);
7259 
7260 	return mnt;
7261 }
7262 
7263 /**
7264  * tracing_init_dentry - initialize top level trace array
7265  *
7266  * This is called when creating files or directories in the tracing
7267  * directory. It is called via fs_initcall() by any of the boot up code
7268  * and expects to return the dentry of the top level tracing directory.
7269  */
7270 struct dentry *tracing_init_dentry(void)
7271 {
7272 	struct trace_array *tr = &global_trace;
7273 
7274 	/* The top level trace array uses  NULL as parent */
7275 	if (tr->dir)
7276 		return NULL;
7277 
7278 	if (WARN_ON(!tracefs_initialized()) ||
7279 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7280 		 WARN_ON(!debugfs_initialized())))
7281 		return ERR_PTR(-ENODEV);
7282 
7283 	/*
7284 	 * As there may still be users that expect the tracing
7285 	 * files to exist in debugfs/tracing, we must automount
7286 	 * the tracefs file system there, so older tools still
7287 	 * work with the newer kerenl.
7288 	 */
7289 	tr->dir = debugfs_create_automount("tracing", NULL,
7290 					   trace_automount, NULL);
7291 	if (!tr->dir) {
7292 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7293 		return ERR_PTR(-ENOMEM);
7294 	}
7295 
7296 	return NULL;
7297 }
7298 
7299 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7300 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7301 
7302 static void __init trace_enum_init(void)
7303 {
7304 	int len;
7305 
7306 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7307 	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7308 }
7309 
7310 #ifdef CONFIG_MODULES
7311 static void trace_module_add_enums(struct module *mod)
7312 {
7313 	if (!mod->num_trace_enums)
7314 		return;
7315 
7316 	/*
7317 	 * Modules with bad taint do not have events created, do
7318 	 * not bother with enums either.
7319 	 */
7320 	if (trace_module_has_bad_taint(mod))
7321 		return;
7322 
7323 	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7324 }
7325 
7326 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7327 static void trace_module_remove_enums(struct module *mod)
7328 {
7329 	union trace_enum_map_item *map;
7330 	union trace_enum_map_item **last = &trace_enum_maps;
7331 
7332 	if (!mod->num_trace_enums)
7333 		return;
7334 
7335 	mutex_lock(&trace_enum_mutex);
7336 
7337 	map = trace_enum_maps;
7338 
7339 	while (map) {
7340 		if (map->head.mod == mod)
7341 			break;
7342 		map = trace_enum_jmp_to_tail(map);
7343 		last = &map->tail.next;
7344 		map = map->tail.next;
7345 	}
7346 	if (!map)
7347 		goto out;
7348 
7349 	*last = trace_enum_jmp_to_tail(map)->tail.next;
7350 	kfree(map);
7351  out:
7352 	mutex_unlock(&trace_enum_mutex);
7353 }
7354 #else
7355 static inline void trace_module_remove_enums(struct module *mod) { }
7356 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7357 
7358 static int trace_module_notify(struct notifier_block *self,
7359 			       unsigned long val, void *data)
7360 {
7361 	struct module *mod = data;
7362 
7363 	switch (val) {
7364 	case MODULE_STATE_COMING:
7365 		trace_module_add_enums(mod);
7366 		break;
7367 	case MODULE_STATE_GOING:
7368 		trace_module_remove_enums(mod);
7369 		break;
7370 	}
7371 
7372 	return 0;
7373 }
7374 
7375 static struct notifier_block trace_module_nb = {
7376 	.notifier_call = trace_module_notify,
7377 	.priority = 0,
7378 };
7379 #endif /* CONFIG_MODULES */
7380 
7381 static __init int tracer_init_tracefs(void)
7382 {
7383 	struct dentry *d_tracer;
7384 
7385 	trace_access_lock_init();
7386 
7387 	d_tracer = tracing_init_dentry();
7388 	if (IS_ERR(d_tracer))
7389 		return 0;
7390 
7391 	init_tracer_tracefs(&global_trace, d_tracer);
7392 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7393 
7394 	trace_create_file("tracing_thresh", 0644, d_tracer,
7395 			&global_trace, &tracing_thresh_fops);
7396 
7397 	trace_create_file("README", 0444, d_tracer,
7398 			NULL, &tracing_readme_fops);
7399 
7400 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7401 			NULL, &tracing_saved_cmdlines_fops);
7402 
7403 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7404 			  NULL, &tracing_saved_cmdlines_size_fops);
7405 
7406 	trace_enum_init();
7407 
7408 	trace_create_enum_file(d_tracer);
7409 
7410 #ifdef CONFIG_MODULES
7411 	register_module_notifier(&trace_module_nb);
7412 #endif
7413 
7414 #ifdef CONFIG_DYNAMIC_FTRACE
7415 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7416 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7417 #endif
7418 
7419 	create_trace_instances(d_tracer);
7420 
7421 	update_tracer_options(&global_trace);
7422 
7423 	return 0;
7424 }
7425 
7426 static int trace_panic_handler(struct notifier_block *this,
7427 			       unsigned long event, void *unused)
7428 {
7429 	if (ftrace_dump_on_oops)
7430 		ftrace_dump(ftrace_dump_on_oops);
7431 	return NOTIFY_OK;
7432 }
7433 
7434 static struct notifier_block trace_panic_notifier = {
7435 	.notifier_call  = trace_panic_handler,
7436 	.next           = NULL,
7437 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7438 };
7439 
7440 static int trace_die_handler(struct notifier_block *self,
7441 			     unsigned long val,
7442 			     void *data)
7443 {
7444 	switch (val) {
7445 	case DIE_OOPS:
7446 		if (ftrace_dump_on_oops)
7447 			ftrace_dump(ftrace_dump_on_oops);
7448 		break;
7449 	default:
7450 		break;
7451 	}
7452 	return NOTIFY_OK;
7453 }
7454 
7455 static struct notifier_block trace_die_notifier = {
7456 	.notifier_call = trace_die_handler,
7457 	.priority = 200
7458 };
7459 
7460 /*
7461  * printk is set to max of 1024, we really don't need it that big.
7462  * Nothing should be printing 1000 characters anyway.
7463  */
7464 #define TRACE_MAX_PRINT		1000
7465 
7466 /*
7467  * Define here KERN_TRACE so that we have one place to modify
7468  * it if we decide to change what log level the ftrace dump
7469  * should be at.
7470  */
7471 #define KERN_TRACE		KERN_EMERG
7472 
7473 void
7474 trace_printk_seq(struct trace_seq *s)
7475 {
7476 	/* Probably should print a warning here. */
7477 	if (s->seq.len >= TRACE_MAX_PRINT)
7478 		s->seq.len = TRACE_MAX_PRINT;
7479 
7480 	/*
7481 	 * More paranoid code. Although the buffer size is set to
7482 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7483 	 * an extra layer of protection.
7484 	 */
7485 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7486 		s->seq.len = s->seq.size - 1;
7487 
7488 	/* should be zero ended, but we are paranoid. */
7489 	s->buffer[s->seq.len] = 0;
7490 
7491 	printk(KERN_TRACE "%s", s->buffer);
7492 
7493 	trace_seq_init(s);
7494 }
7495 
7496 void trace_init_global_iter(struct trace_iterator *iter)
7497 {
7498 	iter->tr = &global_trace;
7499 	iter->trace = iter->tr->current_trace;
7500 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7501 	iter->trace_buffer = &global_trace.trace_buffer;
7502 
7503 	if (iter->trace && iter->trace->open)
7504 		iter->trace->open(iter);
7505 
7506 	/* Annotate start of buffers if we had overruns */
7507 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7508 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7509 
7510 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7511 	if (trace_clocks[iter->tr->clock_id].in_ns)
7512 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7513 }
7514 
7515 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7516 {
7517 	/* use static because iter can be a bit big for the stack */
7518 	static struct trace_iterator iter;
7519 	static atomic_t dump_running;
7520 	struct trace_array *tr = &global_trace;
7521 	unsigned int old_userobj;
7522 	unsigned long flags;
7523 	int cnt = 0, cpu;
7524 
7525 	/* Only allow one dump user at a time. */
7526 	if (atomic_inc_return(&dump_running) != 1) {
7527 		atomic_dec(&dump_running);
7528 		return;
7529 	}
7530 
7531 	/*
7532 	 * Always turn off tracing when we dump.
7533 	 * We don't need to show trace output of what happens
7534 	 * between multiple crashes.
7535 	 *
7536 	 * If the user does a sysrq-z, then they can re-enable
7537 	 * tracing with echo 1 > tracing_on.
7538 	 */
7539 	tracing_off();
7540 
7541 	local_irq_save(flags);
7542 
7543 	/* Simulate the iterator */
7544 	trace_init_global_iter(&iter);
7545 
7546 	for_each_tracing_cpu(cpu) {
7547 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7548 	}
7549 
7550 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7551 
7552 	/* don't look at user memory in panic mode */
7553 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7554 
7555 	switch (oops_dump_mode) {
7556 	case DUMP_ALL:
7557 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7558 		break;
7559 	case DUMP_ORIG:
7560 		iter.cpu_file = raw_smp_processor_id();
7561 		break;
7562 	case DUMP_NONE:
7563 		goto out_enable;
7564 	default:
7565 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7566 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7567 	}
7568 
7569 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7570 
7571 	/* Did function tracer already get disabled? */
7572 	if (ftrace_is_dead()) {
7573 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7574 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7575 	}
7576 
7577 	/*
7578 	 * We need to stop all tracing on all CPUS to read the
7579 	 * the next buffer. This is a bit expensive, but is
7580 	 * not done often. We fill all what we can read,
7581 	 * and then release the locks again.
7582 	 */
7583 
7584 	while (!trace_empty(&iter)) {
7585 
7586 		if (!cnt)
7587 			printk(KERN_TRACE "---------------------------------\n");
7588 
7589 		cnt++;
7590 
7591 		/* reset all but tr, trace, and overruns */
7592 		memset(&iter.seq, 0,
7593 		       sizeof(struct trace_iterator) -
7594 		       offsetof(struct trace_iterator, seq));
7595 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7596 		iter.pos = -1;
7597 
7598 		if (trace_find_next_entry_inc(&iter) != NULL) {
7599 			int ret;
7600 
7601 			ret = print_trace_line(&iter);
7602 			if (ret != TRACE_TYPE_NO_CONSUME)
7603 				trace_consume(&iter);
7604 		}
7605 		touch_nmi_watchdog();
7606 
7607 		trace_printk_seq(&iter.seq);
7608 	}
7609 
7610 	if (!cnt)
7611 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7612 	else
7613 		printk(KERN_TRACE "---------------------------------\n");
7614 
7615  out_enable:
7616 	tr->trace_flags |= old_userobj;
7617 
7618 	for_each_tracing_cpu(cpu) {
7619 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7620 	}
7621  	atomic_dec(&dump_running);
7622 	local_irq_restore(flags);
7623 }
7624 EXPORT_SYMBOL_GPL(ftrace_dump);
7625 
7626 __init static int tracer_alloc_buffers(void)
7627 {
7628 	int ring_buf_size;
7629 	int ret = -ENOMEM;
7630 
7631 	/*
7632 	 * Make sure we don't accidently add more trace options
7633 	 * than we have bits for.
7634 	 */
7635 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7636 
7637 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7638 		goto out;
7639 
7640 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7641 		goto out_free_buffer_mask;
7642 
7643 	/* Only allocate trace_printk buffers if a trace_printk exists */
7644 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7645 		/* Must be called before global_trace.buffer is allocated */
7646 		trace_printk_init_buffers();
7647 
7648 	/* To save memory, keep the ring buffer size to its minimum */
7649 	if (ring_buffer_expanded)
7650 		ring_buf_size = trace_buf_size;
7651 	else
7652 		ring_buf_size = 1;
7653 
7654 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7655 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7656 
7657 	raw_spin_lock_init(&global_trace.start_lock);
7658 
7659 	/* Used for event triggers */
7660 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7661 	if (!temp_buffer)
7662 		goto out_free_cpumask;
7663 
7664 	if (trace_create_savedcmd() < 0)
7665 		goto out_free_temp_buffer;
7666 
7667 	/* TODO: make the number of buffers hot pluggable with CPUS */
7668 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7669 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7670 		WARN_ON(1);
7671 		goto out_free_savedcmd;
7672 	}
7673 
7674 	if (global_trace.buffer_disabled)
7675 		tracing_off();
7676 
7677 	if (trace_boot_clock) {
7678 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7679 		if (ret < 0)
7680 			pr_warn("Trace clock %s not defined, going back to default\n",
7681 				trace_boot_clock);
7682 	}
7683 
7684 	/*
7685 	 * register_tracer() might reference current_trace, so it
7686 	 * needs to be set before we register anything. This is
7687 	 * just a bootstrap of current_trace anyway.
7688 	 */
7689 	global_trace.current_trace = &nop_trace;
7690 
7691 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7692 
7693 	ftrace_init_global_array_ops(&global_trace);
7694 
7695 	init_trace_flags_index(&global_trace);
7696 
7697 	register_tracer(&nop_trace);
7698 
7699 	/* All seems OK, enable tracing */
7700 	tracing_disabled = 0;
7701 
7702 	atomic_notifier_chain_register(&panic_notifier_list,
7703 				       &trace_panic_notifier);
7704 
7705 	register_die_notifier(&trace_die_notifier);
7706 
7707 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7708 
7709 	INIT_LIST_HEAD(&global_trace.systems);
7710 	INIT_LIST_HEAD(&global_trace.events);
7711 	list_add(&global_trace.list, &ftrace_trace_arrays);
7712 
7713 	apply_trace_boot_options();
7714 
7715 	register_snapshot_cmd();
7716 
7717 	return 0;
7718 
7719 out_free_savedcmd:
7720 	free_saved_cmdlines_buffer(savedcmd);
7721 out_free_temp_buffer:
7722 	ring_buffer_free(temp_buffer);
7723 out_free_cpumask:
7724 	free_cpumask_var(global_trace.tracing_cpumask);
7725 out_free_buffer_mask:
7726 	free_cpumask_var(tracing_buffer_mask);
7727 out:
7728 	return ret;
7729 }
7730 
7731 void __init trace_init(void)
7732 {
7733 	if (tracepoint_printk) {
7734 		tracepoint_print_iter =
7735 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7736 		if (WARN_ON(!tracepoint_print_iter))
7737 			tracepoint_printk = 0;
7738 	}
7739 	tracer_alloc_buffers();
7740 	trace_event_init();
7741 }
7742 
7743 __init static int clear_boot_tracer(void)
7744 {
7745 	/*
7746 	 * The default tracer at boot buffer is an init section.
7747 	 * This function is called in lateinit. If we did not
7748 	 * find the boot tracer, then clear it out, to prevent
7749 	 * later registration from accessing the buffer that is
7750 	 * about to be freed.
7751 	 */
7752 	if (!default_bootup_tracer)
7753 		return 0;
7754 
7755 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7756 	       default_bootup_tracer);
7757 	default_bootup_tracer = NULL;
7758 
7759 	return 0;
7760 }
7761 
7762 fs_initcall(tracer_init_tracefs);
7763 late_initcall(clear_boot_tracer);
7764