xref: /linux-6.15/kernel/trace/trace.c (revision aad108aa)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
5  * Copyright (C) 2008 Ingo Molnar <[email protected]>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <[email protected]>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44 
45 #include "trace.h"
46 #include "trace_output.h"
47 
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53 
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62 
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67 
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71 
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 	{ }
75 };
76 
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 	return 0;
81 }
82 
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89 
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97 
98 cpumask_var_t __read_mostly	tracing_buffer_mask;
99 
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115 
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117 
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120 
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 	struct module			*mod;
125 	unsigned long			length;
126 };
127 
128 union trace_enum_map_item;
129 
130 struct trace_enum_map_tail {
131 	/*
132 	 * "end" is first and points to NULL as it must be different
133 	 * than "mod" or "enum_string"
134 	 */
135 	union trace_enum_map_item	*next;
136 	const char			*end;	/* points to NULL */
137 };
138 
139 static DEFINE_MUTEX(trace_enum_mutex);
140 
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149 	struct trace_enum_map		map;
150 	struct trace_enum_map_head	head;
151 	struct trace_enum_map_tail	tail;
152 };
153 
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156 
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158 
159 #define MAX_TRACER_SIZE		100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162 
163 static bool allocate_snapshot;
164 
165 static int __init set_cmdline_ftrace(char *str)
166 {
167 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 	default_bootup_tracer = bootup_tracer_buf;
169 	/* We are using ftrace early, expand it */
170 	ring_buffer_expanded = true;
171 	return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174 
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 	if (*str++ != '=' || !*str) {
178 		ftrace_dump_on_oops = DUMP_ALL;
179 		return 1;
180 	}
181 
182 	if (!strcmp("orig_cpu", str)) {
183 		ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186 
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190 
191 static int __init stop_trace_on_warning(char *str)
192 {
193 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 		__disable_trace_on_warning = 1;
195 	return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198 
199 static int __init boot_alloc_snapshot(char *str)
200 {
201 	allocate_snapshot = true;
202 	/* We also need the main ring buffer expanded */
203 	ring_buffer_expanded = true;
204 	return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207 
208 
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210 
211 static int __init set_trace_boot_options(char *str)
212 {
213 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 	return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217 
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220 
221 static int __init set_trace_boot_clock(char *str)
222 {
223 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 	trace_boot_clock = trace_boot_clock_buf;
225 	return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228 
229 static int __init set_tracepoint_printk(char *str)
230 {
231 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 		tracepoint_printk = 1;
233 	return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236 
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 	nsec += 500;
240 	do_div(nsec, 1000);
241 	return nsec;
242 }
243 
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS						\
246 	(FUNCTION_DEFAULT_FLAGS |					\
247 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
248 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
249 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
250 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251 
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
254 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255 
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 	TRACE_ITER_EVENT_FORK
259 
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273 	.trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275 
276 LIST_HEAD(ftrace_trace_arrays);
277 
278 int trace_array_get(struct trace_array *this_tr)
279 {
280 	struct trace_array *tr;
281 	int ret = -ENODEV;
282 
283 	mutex_lock(&trace_types_lock);
284 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 		if (tr == this_tr) {
286 			tr->ref++;
287 			ret = 0;
288 			break;
289 		}
290 	}
291 	mutex_unlock(&trace_types_lock);
292 
293 	return ret;
294 }
295 
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 	WARN_ON(!this_tr->ref);
299 	this_tr->ref--;
300 }
301 
302 void trace_array_put(struct trace_array *this_tr)
303 {
304 	mutex_lock(&trace_types_lock);
305 	__trace_array_put(this_tr);
306 	mutex_unlock(&trace_types_lock);
307 }
308 
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 			      struct ring_buffer *buffer,
311 			      struct ring_buffer_event *event)
312 {
313 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 	    !filter_match_preds(call->filter, rec)) {
315 		__trace_event_discard_commit(buffer, event);
316 		return 1;
317 	}
318 
319 	return 0;
320 }
321 
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 	vfree(pid_list->pids);
325 	kfree(pid_list);
326 }
327 
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 	/*
339 	 * If pid_max changed after filtered_pids was created, we
340 	 * by default ignore all pids greater than the previous pid_max.
341 	 */
342 	if (search_pid >= filtered_pids->pid_max)
343 		return false;
344 
345 	return test_bit(search_pid, filtered_pids->pids);
346 }
347 
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 	/*
361 	 * Return false, because if filtered_pids does not exist,
362 	 * all pids are good to trace.
363 	 */
364 	if (!filtered_pids)
365 		return false;
366 
367 	return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369 
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 				  struct task_struct *self,
384 				  struct task_struct *task)
385 {
386 	if (!pid_list)
387 		return;
388 
389 	/* For forks, we only add if the forking task is listed */
390 	if (self) {
391 		if (!trace_find_filtered_pid(pid_list, self->pid))
392 			return;
393 	}
394 
395 	/* Sorry, but we don't support pid_max changing after setting */
396 	if (task->pid >= pid_list->pid_max)
397 		return;
398 
399 	/* "self" is set for forks, and NULL for exits */
400 	if (self)
401 		set_bit(task->pid, pid_list->pids);
402 	else
403 		clear_bit(task->pid, pid_list->pids);
404 }
405 
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 	unsigned long pid = (unsigned long)v;
421 
422 	(*pos)++;
423 
424 	/* pid already is +1 of the actual prevous bit */
425 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426 
427 	/* Return pid + 1 to allow zero to be represented */
428 	if (pid < pid_list->pid_max)
429 		return (void *)(pid + 1);
430 
431 	return NULL;
432 }
433 
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 	unsigned long pid;
448 	loff_t l = 0;
449 
450 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 	if (pid >= pid_list->pid_max)
452 		return NULL;
453 
454 	/* Return pid + 1 so that zero can be the exit value */
455 	for (pid++; pid && l < *pos;
456 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 		;
458 	return (void *)pid;
459 }
460 
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 	unsigned long pid = (unsigned long)v - 1;
472 
473 	seq_printf(m, "%lu\n", pid);
474 	return 0;
475 }
476 
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE		127
479 
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 		    struct trace_pid_list **new_pid_list,
482 		    const char __user *ubuf, size_t cnt)
483 {
484 	struct trace_pid_list *pid_list;
485 	struct trace_parser parser;
486 	unsigned long val;
487 	int nr_pids = 0;
488 	ssize_t read = 0;
489 	ssize_t ret = 0;
490 	loff_t pos;
491 	pid_t pid;
492 
493 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 		return -ENOMEM;
495 
496 	/*
497 	 * Always recreate a new array. The write is an all or nothing
498 	 * operation. Always create a new array when adding new pids by
499 	 * the user. If the operation fails, then the current list is
500 	 * not modified.
501 	 */
502 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 	if (!pid_list)
504 		return -ENOMEM;
505 
506 	pid_list->pid_max = READ_ONCE(pid_max);
507 
508 	/* Only truncating will shrink pid_max */
509 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 		pid_list->pid_max = filtered_pids->pid_max;
511 
512 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 	if (!pid_list->pids) {
514 		kfree(pid_list);
515 		return -ENOMEM;
516 	}
517 
518 	if (filtered_pids) {
519 		/* copy the current bits to the new max */
520 		pid = find_first_bit(filtered_pids->pids,
521 				     filtered_pids->pid_max);
522 		while (pid < filtered_pids->pid_max) {
523 			set_bit(pid, pid_list->pids);
524 			pid = find_next_bit(filtered_pids->pids,
525 					    filtered_pids->pid_max,
526 					    pid + 1);
527 			nr_pids++;
528 		}
529 	}
530 
531 	while (cnt > 0) {
532 
533 		pos = 0;
534 
535 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
536 		if (ret < 0 || !trace_parser_loaded(&parser))
537 			break;
538 
539 		read += ret;
540 		ubuf += ret;
541 		cnt -= ret;
542 
543 		parser.buffer[parser.idx] = 0;
544 
545 		ret = -EINVAL;
546 		if (kstrtoul(parser.buffer, 0, &val))
547 			break;
548 		if (val >= pid_list->pid_max)
549 			break;
550 
551 		pid = (pid_t)val;
552 
553 		set_bit(pid, pid_list->pids);
554 		nr_pids++;
555 
556 		trace_parser_clear(&parser);
557 		ret = 0;
558 	}
559 	trace_parser_put(&parser);
560 
561 	if (ret < 0) {
562 		trace_free_pid_list(pid_list);
563 		return ret;
564 	}
565 
566 	if (!nr_pids) {
567 		/* Cleared the list of pids */
568 		trace_free_pid_list(pid_list);
569 		read = ret;
570 		pid_list = NULL;
571 	}
572 
573 	*new_pid_list = pid_list;
574 
575 	return read;
576 }
577 
578 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
579 {
580 	u64 ts;
581 
582 	/* Early boot up does not have a buffer yet */
583 	if (!buf->buffer)
584 		return trace_clock_local();
585 
586 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
587 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
588 
589 	return ts;
590 }
591 
592 cycle_t ftrace_now(int cpu)
593 {
594 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
595 }
596 
597 /**
598  * tracing_is_enabled - Show if global_trace has been disabled
599  *
600  * Shows if the global trace has been enabled or not. It uses the
601  * mirror flag "buffer_disabled" to be used in fast paths such as for
602  * the irqsoff tracer. But it may be inaccurate due to races. If you
603  * need to know the accurate state, use tracing_is_on() which is a little
604  * slower, but accurate.
605  */
606 int tracing_is_enabled(void)
607 {
608 	/*
609 	 * For quick access (irqsoff uses this in fast path), just
610 	 * return the mirror variable of the state of the ring buffer.
611 	 * It's a little racy, but we don't really care.
612 	 */
613 	smp_rmb();
614 	return !global_trace.buffer_disabled;
615 }
616 
617 /*
618  * trace_buf_size is the size in bytes that is allocated
619  * for a buffer. Note, the number of bytes is always rounded
620  * to page size.
621  *
622  * This number is purposely set to a low number of 16384.
623  * If the dump on oops happens, it will be much appreciated
624  * to not have to wait for all that output. Anyway this can be
625  * boot time and run time configurable.
626  */
627 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
628 
629 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
630 
631 /* trace_types holds a link list of available tracers. */
632 static struct tracer		*trace_types __read_mostly;
633 
634 /*
635  * trace_types_lock is used to protect the trace_types list.
636  */
637 DEFINE_MUTEX(trace_types_lock);
638 
639 /*
640  * serialize the access of the ring buffer
641  *
642  * ring buffer serializes readers, but it is low level protection.
643  * The validity of the events (which returns by ring_buffer_peek() ..etc)
644  * are not protected by ring buffer.
645  *
646  * The content of events may become garbage if we allow other process consumes
647  * these events concurrently:
648  *   A) the page of the consumed events may become a normal page
649  *      (not reader page) in ring buffer, and this page will be rewrited
650  *      by events producer.
651  *   B) The page of the consumed events may become a page for splice_read,
652  *      and this page will be returned to system.
653  *
654  * These primitives allow multi process access to different cpu ring buffer
655  * concurrently.
656  *
657  * These primitives don't distinguish read-only and read-consume access.
658  * Multi read-only access are also serialized.
659  */
660 
661 #ifdef CONFIG_SMP
662 static DECLARE_RWSEM(all_cpu_access_lock);
663 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
664 
665 static inline void trace_access_lock(int cpu)
666 {
667 	if (cpu == RING_BUFFER_ALL_CPUS) {
668 		/* gain it for accessing the whole ring buffer. */
669 		down_write(&all_cpu_access_lock);
670 	} else {
671 		/* gain it for accessing a cpu ring buffer. */
672 
673 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
674 		down_read(&all_cpu_access_lock);
675 
676 		/* Secondly block other access to this @cpu ring buffer. */
677 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
678 	}
679 }
680 
681 static inline void trace_access_unlock(int cpu)
682 {
683 	if (cpu == RING_BUFFER_ALL_CPUS) {
684 		up_write(&all_cpu_access_lock);
685 	} else {
686 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
687 		up_read(&all_cpu_access_lock);
688 	}
689 }
690 
691 static inline void trace_access_lock_init(void)
692 {
693 	int cpu;
694 
695 	for_each_possible_cpu(cpu)
696 		mutex_init(&per_cpu(cpu_access_lock, cpu));
697 }
698 
699 #else
700 
701 static DEFINE_MUTEX(access_lock);
702 
703 static inline void trace_access_lock(int cpu)
704 {
705 	(void)cpu;
706 	mutex_lock(&access_lock);
707 }
708 
709 static inline void trace_access_unlock(int cpu)
710 {
711 	(void)cpu;
712 	mutex_unlock(&access_lock);
713 }
714 
715 static inline void trace_access_lock_init(void)
716 {
717 }
718 
719 #endif
720 
721 #ifdef CONFIG_STACKTRACE
722 static void __ftrace_trace_stack(struct ring_buffer *buffer,
723 				 unsigned long flags,
724 				 int skip, int pc, struct pt_regs *regs);
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726 				      struct ring_buffer *buffer,
727 				      unsigned long flags,
728 				      int skip, int pc, struct pt_regs *regs);
729 
730 #else
731 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
732 					unsigned long flags,
733 					int skip, int pc, struct pt_regs *regs)
734 {
735 }
736 static inline void ftrace_trace_stack(struct trace_array *tr,
737 				      struct ring_buffer *buffer,
738 				      unsigned long flags,
739 				      int skip, int pc, struct pt_regs *regs)
740 {
741 }
742 
743 #endif
744 
745 static void tracer_tracing_on(struct trace_array *tr)
746 {
747 	if (tr->trace_buffer.buffer)
748 		ring_buffer_record_on(tr->trace_buffer.buffer);
749 	/*
750 	 * This flag is looked at when buffers haven't been allocated
751 	 * yet, or by some tracers (like irqsoff), that just want to
752 	 * know if the ring buffer has been disabled, but it can handle
753 	 * races of where it gets disabled but we still do a record.
754 	 * As the check is in the fast path of the tracers, it is more
755 	 * important to be fast than accurate.
756 	 */
757 	tr->buffer_disabled = 0;
758 	/* Make the flag seen by readers */
759 	smp_wmb();
760 }
761 
762 /**
763  * tracing_on - enable tracing buffers
764  *
765  * This function enables tracing buffers that may have been
766  * disabled with tracing_off.
767  */
768 void tracing_on(void)
769 {
770 	tracer_tracing_on(&global_trace);
771 }
772 EXPORT_SYMBOL_GPL(tracing_on);
773 
774 /**
775  * __trace_puts - write a constant string into the trace buffer.
776  * @ip:	   The address of the caller
777  * @str:   The constant string to write
778  * @size:  The size of the string.
779  */
780 int __trace_puts(unsigned long ip, const char *str, int size)
781 {
782 	struct ring_buffer_event *event;
783 	struct ring_buffer *buffer;
784 	struct print_entry *entry;
785 	unsigned long irq_flags;
786 	int alloc;
787 	int pc;
788 
789 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
790 		return 0;
791 
792 	pc = preempt_count();
793 
794 	if (unlikely(tracing_selftest_running || tracing_disabled))
795 		return 0;
796 
797 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
798 
799 	local_save_flags(irq_flags);
800 	buffer = global_trace.trace_buffer.buffer;
801 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
802 					  irq_flags, pc);
803 	if (!event)
804 		return 0;
805 
806 	entry = ring_buffer_event_data(event);
807 	entry->ip = ip;
808 
809 	memcpy(&entry->buf, str, size);
810 
811 	/* Add a newline if necessary */
812 	if (entry->buf[size - 1] != '\n') {
813 		entry->buf[size] = '\n';
814 		entry->buf[size + 1] = '\0';
815 	} else
816 		entry->buf[size] = '\0';
817 
818 	__buffer_unlock_commit(buffer, event);
819 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
820 
821 	return size;
822 }
823 EXPORT_SYMBOL_GPL(__trace_puts);
824 
825 /**
826  * __trace_bputs - write the pointer to a constant string into trace buffer
827  * @ip:	   The address of the caller
828  * @str:   The constant string to write to the buffer to
829  */
830 int __trace_bputs(unsigned long ip, const char *str)
831 {
832 	struct ring_buffer_event *event;
833 	struct ring_buffer *buffer;
834 	struct bputs_entry *entry;
835 	unsigned long irq_flags;
836 	int size = sizeof(struct bputs_entry);
837 	int pc;
838 
839 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
840 		return 0;
841 
842 	pc = preempt_count();
843 
844 	if (unlikely(tracing_selftest_running || tracing_disabled))
845 		return 0;
846 
847 	local_save_flags(irq_flags);
848 	buffer = global_trace.trace_buffer.buffer;
849 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
850 					  irq_flags, pc);
851 	if (!event)
852 		return 0;
853 
854 	entry = ring_buffer_event_data(event);
855 	entry->ip			= ip;
856 	entry->str			= str;
857 
858 	__buffer_unlock_commit(buffer, event);
859 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
860 
861 	return 1;
862 }
863 EXPORT_SYMBOL_GPL(__trace_bputs);
864 
865 #ifdef CONFIG_TRACER_SNAPSHOT
866 /**
867  * trace_snapshot - take a snapshot of the current buffer.
868  *
869  * This causes a swap between the snapshot buffer and the current live
870  * tracing buffer. You can use this to take snapshots of the live
871  * trace when some condition is triggered, but continue to trace.
872  *
873  * Note, make sure to allocate the snapshot with either
874  * a tracing_snapshot_alloc(), or by doing it manually
875  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
876  *
877  * If the snapshot buffer is not allocated, it will stop tracing.
878  * Basically making a permanent snapshot.
879  */
880 void tracing_snapshot(void)
881 {
882 	struct trace_array *tr = &global_trace;
883 	struct tracer *tracer = tr->current_trace;
884 	unsigned long flags;
885 
886 	if (in_nmi()) {
887 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
888 		internal_trace_puts("*** snapshot is being ignored        ***\n");
889 		return;
890 	}
891 
892 	if (!tr->allocated_snapshot) {
893 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
894 		internal_trace_puts("*** stopping trace here!   ***\n");
895 		tracing_off();
896 		return;
897 	}
898 
899 	/* Note, snapshot can not be used when the tracer uses it */
900 	if (tracer->use_max_tr) {
901 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
902 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
903 		return;
904 	}
905 
906 	local_irq_save(flags);
907 	update_max_tr(tr, current, smp_processor_id());
908 	local_irq_restore(flags);
909 }
910 EXPORT_SYMBOL_GPL(tracing_snapshot);
911 
912 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
913 					struct trace_buffer *size_buf, int cpu_id);
914 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
915 
916 static int alloc_snapshot(struct trace_array *tr)
917 {
918 	int ret;
919 
920 	if (!tr->allocated_snapshot) {
921 
922 		/* allocate spare buffer */
923 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
924 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
925 		if (ret < 0)
926 			return ret;
927 
928 		tr->allocated_snapshot = true;
929 	}
930 
931 	return 0;
932 }
933 
934 static void free_snapshot(struct trace_array *tr)
935 {
936 	/*
937 	 * We don't free the ring buffer. instead, resize it because
938 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
939 	 * we want preserve it.
940 	 */
941 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
942 	set_buffer_entries(&tr->max_buffer, 1);
943 	tracing_reset_online_cpus(&tr->max_buffer);
944 	tr->allocated_snapshot = false;
945 }
946 
947 /**
948  * tracing_alloc_snapshot - allocate snapshot buffer.
949  *
950  * This only allocates the snapshot buffer if it isn't already
951  * allocated - it doesn't also take a snapshot.
952  *
953  * This is meant to be used in cases where the snapshot buffer needs
954  * to be set up for events that can't sleep but need to be able to
955  * trigger a snapshot.
956  */
957 int tracing_alloc_snapshot(void)
958 {
959 	struct trace_array *tr = &global_trace;
960 	int ret;
961 
962 	ret = alloc_snapshot(tr);
963 	WARN_ON(ret < 0);
964 
965 	return ret;
966 }
967 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
968 
969 /**
970  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
971  *
972  * This is similar to trace_snapshot(), but it will allocate the
973  * snapshot buffer if it isn't already allocated. Use this only
974  * where it is safe to sleep, as the allocation may sleep.
975  *
976  * This causes a swap between the snapshot buffer and the current live
977  * tracing buffer. You can use this to take snapshots of the live
978  * trace when some condition is triggered, but continue to trace.
979  */
980 void tracing_snapshot_alloc(void)
981 {
982 	int ret;
983 
984 	ret = tracing_alloc_snapshot();
985 	if (ret < 0)
986 		return;
987 
988 	tracing_snapshot();
989 }
990 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
991 #else
992 void tracing_snapshot(void)
993 {
994 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
995 }
996 EXPORT_SYMBOL_GPL(tracing_snapshot);
997 int tracing_alloc_snapshot(void)
998 {
999 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1000 	return -ENODEV;
1001 }
1002 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1003 void tracing_snapshot_alloc(void)
1004 {
1005 	/* Give warning */
1006 	tracing_snapshot();
1007 }
1008 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1009 #endif /* CONFIG_TRACER_SNAPSHOT */
1010 
1011 static void tracer_tracing_off(struct trace_array *tr)
1012 {
1013 	if (tr->trace_buffer.buffer)
1014 		ring_buffer_record_off(tr->trace_buffer.buffer);
1015 	/*
1016 	 * This flag is looked at when buffers haven't been allocated
1017 	 * yet, or by some tracers (like irqsoff), that just want to
1018 	 * know if the ring buffer has been disabled, but it can handle
1019 	 * races of where it gets disabled but we still do a record.
1020 	 * As the check is in the fast path of the tracers, it is more
1021 	 * important to be fast than accurate.
1022 	 */
1023 	tr->buffer_disabled = 1;
1024 	/* Make the flag seen by readers */
1025 	smp_wmb();
1026 }
1027 
1028 /**
1029  * tracing_off - turn off tracing buffers
1030  *
1031  * This function stops the tracing buffers from recording data.
1032  * It does not disable any overhead the tracers themselves may
1033  * be causing. This function simply causes all recording to
1034  * the ring buffers to fail.
1035  */
1036 void tracing_off(void)
1037 {
1038 	tracer_tracing_off(&global_trace);
1039 }
1040 EXPORT_SYMBOL_GPL(tracing_off);
1041 
1042 void disable_trace_on_warning(void)
1043 {
1044 	if (__disable_trace_on_warning)
1045 		tracing_off();
1046 }
1047 
1048 /**
1049  * tracer_tracing_is_on - show real state of ring buffer enabled
1050  * @tr : the trace array to know if ring buffer is enabled
1051  *
1052  * Shows real state of the ring buffer if it is enabled or not.
1053  */
1054 static int tracer_tracing_is_on(struct trace_array *tr)
1055 {
1056 	if (tr->trace_buffer.buffer)
1057 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1058 	return !tr->buffer_disabled;
1059 }
1060 
1061 /**
1062  * tracing_is_on - show state of ring buffers enabled
1063  */
1064 int tracing_is_on(void)
1065 {
1066 	return tracer_tracing_is_on(&global_trace);
1067 }
1068 EXPORT_SYMBOL_GPL(tracing_is_on);
1069 
1070 static int __init set_buf_size(char *str)
1071 {
1072 	unsigned long buf_size;
1073 
1074 	if (!str)
1075 		return 0;
1076 	buf_size = memparse(str, &str);
1077 	/* nr_entries can not be zero */
1078 	if (buf_size == 0)
1079 		return 0;
1080 	trace_buf_size = buf_size;
1081 	return 1;
1082 }
1083 __setup("trace_buf_size=", set_buf_size);
1084 
1085 static int __init set_tracing_thresh(char *str)
1086 {
1087 	unsigned long threshold;
1088 	int ret;
1089 
1090 	if (!str)
1091 		return 0;
1092 	ret = kstrtoul(str, 0, &threshold);
1093 	if (ret < 0)
1094 		return 0;
1095 	tracing_thresh = threshold * 1000;
1096 	return 1;
1097 }
1098 __setup("tracing_thresh=", set_tracing_thresh);
1099 
1100 unsigned long nsecs_to_usecs(unsigned long nsecs)
1101 {
1102 	return nsecs / 1000;
1103 }
1104 
1105 /*
1106  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1107  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1108  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1109  * of strings in the order that the enums were defined.
1110  */
1111 #undef C
1112 #define C(a, b) b
1113 
1114 /* These must match the bit postions in trace_iterator_flags */
1115 static const char *trace_options[] = {
1116 	TRACE_FLAGS
1117 	NULL
1118 };
1119 
1120 static struct {
1121 	u64 (*func)(void);
1122 	const char *name;
1123 	int in_ns;		/* is this clock in nanoseconds? */
1124 } trace_clocks[] = {
1125 	{ trace_clock_local,		"local",	1 },
1126 	{ trace_clock_global,		"global",	1 },
1127 	{ trace_clock_counter,		"counter",	0 },
1128 	{ trace_clock_jiffies,		"uptime",	0 },
1129 	{ trace_clock,			"perf",		1 },
1130 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1131 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1132 	ARCH_TRACE_CLOCKS
1133 };
1134 
1135 /*
1136  * trace_parser_get_init - gets the buffer for trace parser
1137  */
1138 int trace_parser_get_init(struct trace_parser *parser, int size)
1139 {
1140 	memset(parser, 0, sizeof(*parser));
1141 
1142 	parser->buffer = kmalloc(size, GFP_KERNEL);
1143 	if (!parser->buffer)
1144 		return 1;
1145 
1146 	parser->size = size;
1147 	return 0;
1148 }
1149 
1150 /*
1151  * trace_parser_put - frees the buffer for trace parser
1152  */
1153 void trace_parser_put(struct trace_parser *parser)
1154 {
1155 	kfree(parser->buffer);
1156 }
1157 
1158 /*
1159  * trace_get_user - reads the user input string separated by  space
1160  * (matched by isspace(ch))
1161  *
1162  * For each string found the 'struct trace_parser' is updated,
1163  * and the function returns.
1164  *
1165  * Returns number of bytes read.
1166  *
1167  * See kernel/trace/trace.h for 'struct trace_parser' details.
1168  */
1169 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1170 	size_t cnt, loff_t *ppos)
1171 {
1172 	char ch;
1173 	size_t read = 0;
1174 	ssize_t ret;
1175 
1176 	if (!*ppos)
1177 		trace_parser_clear(parser);
1178 
1179 	ret = get_user(ch, ubuf++);
1180 	if (ret)
1181 		goto out;
1182 
1183 	read++;
1184 	cnt--;
1185 
1186 	/*
1187 	 * The parser is not finished with the last write,
1188 	 * continue reading the user input without skipping spaces.
1189 	 */
1190 	if (!parser->cont) {
1191 		/* skip white space */
1192 		while (cnt && isspace(ch)) {
1193 			ret = get_user(ch, ubuf++);
1194 			if (ret)
1195 				goto out;
1196 			read++;
1197 			cnt--;
1198 		}
1199 
1200 		/* only spaces were written */
1201 		if (isspace(ch)) {
1202 			*ppos += read;
1203 			ret = read;
1204 			goto out;
1205 		}
1206 
1207 		parser->idx = 0;
1208 	}
1209 
1210 	/* read the non-space input */
1211 	while (cnt && !isspace(ch)) {
1212 		if (parser->idx < parser->size - 1)
1213 			parser->buffer[parser->idx++] = ch;
1214 		else {
1215 			ret = -EINVAL;
1216 			goto out;
1217 		}
1218 		ret = get_user(ch, ubuf++);
1219 		if (ret)
1220 			goto out;
1221 		read++;
1222 		cnt--;
1223 	}
1224 
1225 	/* We either got finished input or we have to wait for another call. */
1226 	if (isspace(ch)) {
1227 		parser->buffer[parser->idx] = 0;
1228 		parser->cont = false;
1229 	} else if (parser->idx < parser->size - 1) {
1230 		parser->cont = true;
1231 		parser->buffer[parser->idx++] = ch;
1232 	} else {
1233 		ret = -EINVAL;
1234 		goto out;
1235 	}
1236 
1237 	*ppos += read;
1238 	ret = read;
1239 
1240 out:
1241 	return ret;
1242 }
1243 
1244 /* TODO add a seq_buf_to_buffer() */
1245 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1246 {
1247 	int len;
1248 
1249 	if (trace_seq_used(s) <= s->seq.readpos)
1250 		return -EBUSY;
1251 
1252 	len = trace_seq_used(s) - s->seq.readpos;
1253 	if (cnt > len)
1254 		cnt = len;
1255 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1256 
1257 	s->seq.readpos += cnt;
1258 	return cnt;
1259 }
1260 
1261 unsigned long __read_mostly	tracing_thresh;
1262 
1263 #ifdef CONFIG_TRACER_MAX_TRACE
1264 /*
1265  * Copy the new maximum trace into the separate maximum-trace
1266  * structure. (this way the maximum trace is permanently saved,
1267  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1268  */
1269 static void
1270 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1271 {
1272 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1273 	struct trace_buffer *max_buf = &tr->max_buffer;
1274 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1275 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1276 
1277 	max_buf->cpu = cpu;
1278 	max_buf->time_start = data->preempt_timestamp;
1279 
1280 	max_data->saved_latency = tr->max_latency;
1281 	max_data->critical_start = data->critical_start;
1282 	max_data->critical_end = data->critical_end;
1283 
1284 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1285 	max_data->pid = tsk->pid;
1286 	/*
1287 	 * If tsk == current, then use current_uid(), as that does not use
1288 	 * RCU. The irq tracer can be called out of RCU scope.
1289 	 */
1290 	if (tsk == current)
1291 		max_data->uid = current_uid();
1292 	else
1293 		max_data->uid = task_uid(tsk);
1294 
1295 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1296 	max_data->policy = tsk->policy;
1297 	max_data->rt_priority = tsk->rt_priority;
1298 
1299 	/* record this tasks comm */
1300 	tracing_record_cmdline(tsk);
1301 }
1302 
1303 /**
1304  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1305  * @tr: tracer
1306  * @tsk: the task with the latency
1307  * @cpu: The cpu that initiated the trace.
1308  *
1309  * Flip the buffers between the @tr and the max_tr and record information
1310  * about which task was the cause of this latency.
1311  */
1312 void
1313 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1314 {
1315 	struct ring_buffer *buf;
1316 
1317 	if (tr->stop_count)
1318 		return;
1319 
1320 	WARN_ON_ONCE(!irqs_disabled());
1321 
1322 	if (!tr->allocated_snapshot) {
1323 		/* Only the nop tracer should hit this when disabling */
1324 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1325 		return;
1326 	}
1327 
1328 	arch_spin_lock(&tr->max_lock);
1329 
1330 	buf = tr->trace_buffer.buffer;
1331 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1332 	tr->max_buffer.buffer = buf;
1333 
1334 	__update_max_tr(tr, tsk, cpu);
1335 	arch_spin_unlock(&tr->max_lock);
1336 }
1337 
1338 /**
1339  * update_max_tr_single - only copy one trace over, and reset the rest
1340  * @tr - tracer
1341  * @tsk - task with the latency
1342  * @cpu - the cpu of the buffer to copy.
1343  *
1344  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1345  */
1346 void
1347 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1348 {
1349 	int ret;
1350 
1351 	if (tr->stop_count)
1352 		return;
1353 
1354 	WARN_ON_ONCE(!irqs_disabled());
1355 	if (!tr->allocated_snapshot) {
1356 		/* Only the nop tracer should hit this when disabling */
1357 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1358 		return;
1359 	}
1360 
1361 	arch_spin_lock(&tr->max_lock);
1362 
1363 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1364 
1365 	if (ret == -EBUSY) {
1366 		/*
1367 		 * We failed to swap the buffer due to a commit taking
1368 		 * place on this CPU. We fail to record, but we reset
1369 		 * the max trace buffer (no one writes directly to it)
1370 		 * and flag that it failed.
1371 		 */
1372 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1373 			"Failed to swap buffers due to commit in progress\n");
1374 	}
1375 
1376 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1377 
1378 	__update_max_tr(tr, tsk, cpu);
1379 	arch_spin_unlock(&tr->max_lock);
1380 }
1381 #endif /* CONFIG_TRACER_MAX_TRACE */
1382 
1383 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1384 {
1385 	/* Iterators are static, they should be filled or empty */
1386 	if (trace_buffer_iter(iter, iter->cpu_file))
1387 		return 0;
1388 
1389 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1390 				full);
1391 }
1392 
1393 #ifdef CONFIG_FTRACE_STARTUP_TEST
1394 static int run_tracer_selftest(struct tracer *type)
1395 {
1396 	struct trace_array *tr = &global_trace;
1397 	struct tracer *saved_tracer = tr->current_trace;
1398 	int ret;
1399 
1400 	if (!type->selftest || tracing_selftest_disabled)
1401 		return 0;
1402 
1403 	/*
1404 	 * Run a selftest on this tracer.
1405 	 * Here we reset the trace buffer, and set the current
1406 	 * tracer to be this tracer. The tracer can then run some
1407 	 * internal tracing to verify that everything is in order.
1408 	 * If we fail, we do not register this tracer.
1409 	 */
1410 	tracing_reset_online_cpus(&tr->trace_buffer);
1411 
1412 	tr->current_trace = type;
1413 
1414 #ifdef CONFIG_TRACER_MAX_TRACE
1415 	if (type->use_max_tr) {
1416 		/* If we expanded the buffers, make sure the max is expanded too */
1417 		if (ring_buffer_expanded)
1418 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1419 					   RING_BUFFER_ALL_CPUS);
1420 		tr->allocated_snapshot = true;
1421 	}
1422 #endif
1423 
1424 	/* the test is responsible for initializing and enabling */
1425 	pr_info("Testing tracer %s: ", type->name);
1426 	ret = type->selftest(type, tr);
1427 	/* the test is responsible for resetting too */
1428 	tr->current_trace = saved_tracer;
1429 	if (ret) {
1430 		printk(KERN_CONT "FAILED!\n");
1431 		/* Add the warning after printing 'FAILED' */
1432 		WARN_ON(1);
1433 		return -1;
1434 	}
1435 	/* Only reset on passing, to avoid touching corrupted buffers */
1436 	tracing_reset_online_cpus(&tr->trace_buffer);
1437 
1438 #ifdef CONFIG_TRACER_MAX_TRACE
1439 	if (type->use_max_tr) {
1440 		tr->allocated_snapshot = false;
1441 
1442 		/* Shrink the max buffer again */
1443 		if (ring_buffer_expanded)
1444 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1445 					   RING_BUFFER_ALL_CPUS);
1446 	}
1447 #endif
1448 
1449 	printk(KERN_CONT "PASSED\n");
1450 	return 0;
1451 }
1452 #else
1453 static inline int run_tracer_selftest(struct tracer *type)
1454 {
1455 	return 0;
1456 }
1457 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1458 
1459 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1460 
1461 static void __init apply_trace_boot_options(void);
1462 
1463 /**
1464  * register_tracer - register a tracer with the ftrace system.
1465  * @type - the plugin for the tracer
1466  *
1467  * Register a new plugin tracer.
1468  */
1469 int __init register_tracer(struct tracer *type)
1470 {
1471 	struct tracer *t;
1472 	int ret = 0;
1473 
1474 	if (!type->name) {
1475 		pr_info("Tracer must have a name\n");
1476 		return -1;
1477 	}
1478 
1479 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1480 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1481 		return -1;
1482 	}
1483 
1484 	mutex_lock(&trace_types_lock);
1485 
1486 	tracing_selftest_running = true;
1487 
1488 	for (t = trace_types; t; t = t->next) {
1489 		if (strcmp(type->name, t->name) == 0) {
1490 			/* already found */
1491 			pr_info("Tracer %s already registered\n",
1492 				type->name);
1493 			ret = -1;
1494 			goto out;
1495 		}
1496 	}
1497 
1498 	if (!type->set_flag)
1499 		type->set_flag = &dummy_set_flag;
1500 	if (!type->flags) {
1501 		/*allocate a dummy tracer_flags*/
1502 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1503 		if (!type->flags) {
1504 			ret = -ENOMEM;
1505 			goto out;
1506 		}
1507 		type->flags->val = 0;
1508 		type->flags->opts = dummy_tracer_opt;
1509 	} else
1510 		if (!type->flags->opts)
1511 			type->flags->opts = dummy_tracer_opt;
1512 
1513 	/* store the tracer for __set_tracer_option */
1514 	type->flags->trace = type;
1515 
1516 	ret = run_tracer_selftest(type);
1517 	if (ret < 0)
1518 		goto out;
1519 
1520 	type->next = trace_types;
1521 	trace_types = type;
1522 	add_tracer_options(&global_trace, type);
1523 
1524  out:
1525 	tracing_selftest_running = false;
1526 	mutex_unlock(&trace_types_lock);
1527 
1528 	if (ret || !default_bootup_tracer)
1529 		goto out_unlock;
1530 
1531 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1532 		goto out_unlock;
1533 
1534 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1535 	/* Do we want this tracer to start on bootup? */
1536 	tracing_set_tracer(&global_trace, type->name);
1537 	default_bootup_tracer = NULL;
1538 
1539 	apply_trace_boot_options();
1540 
1541 	/* disable other selftests, since this will break it. */
1542 	tracing_selftest_disabled = true;
1543 #ifdef CONFIG_FTRACE_STARTUP_TEST
1544 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1545 	       type->name);
1546 #endif
1547 
1548  out_unlock:
1549 	return ret;
1550 }
1551 
1552 void tracing_reset(struct trace_buffer *buf, int cpu)
1553 {
1554 	struct ring_buffer *buffer = buf->buffer;
1555 
1556 	if (!buffer)
1557 		return;
1558 
1559 	ring_buffer_record_disable(buffer);
1560 
1561 	/* Make sure all commits have finished */
1562 	synchronize_sched();
1563 	ring_buffer_reset_cpu(buffer, cpu);
1564 
1565 	ring_buffer_record_enable(buffer);
1566 }
1567 
1568 void tracing_reset_online_cpus(struct trace_buffer *buf)
1569 {
1570 	struct ring_buffer *buffer = buf->buffer;
1571 	int cpu;
1572 
1573 	if (!buffer)
1574 		return;
1575 
1576 	ring_buffer_record_disable(buffer);
1577 
1578 	/* Make sure all commits have finished */
1579 	synchronize_sched();
1580 
1581 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1582 
1583 	for_each_online_cpu(cpu)
1584 		ring_buffer_reset_cpu(buffer, cpu);
1585 
1586 	ring_buffer_record_enable(buffer);
1587 }
1588 
1589 /* Must have trace_types_lock held */
1590 void tracing_reset_all_online_cpus(void)
1591 {
1592 	struct trace_array *tr;
1593 
1594 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1595 		tracing_reset_online_cpus(&tr->trace_buffer);
1596 #ifdef CONFIG_TRACER_MAX_TRACE
1597 		tracing_reset_online_cpus(&tr->max_buffer);
1598 #endif
1599 	}
1600 }
1601 
1602 #define SAVED_CMDLINES_DEFAULT 128
1603 #define NO_CMDLINE_MAP UINT_MAX
1604 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1605 struct saved_cmdlines_buffer {
1606 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1607 	unsigned *map_cmdline_to_pid;
1608 	unsigned cmdline_num;
1609 	int cmdline_idx;
1610 	char *saved_cmdlines;
1611 };
1612 static struct saved_cmdlines_buffer *savedcmd;
1613 
1614 /* temporary disable recording */
1615 static atomic_t trace_record_cmdline_disabled __read_mostly;
1616 
1617 static inline char *get_saved_cmdlines(int idx)
1618 {
1619 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1620 }
1621 
1622 static inline void set_cmdline(int idx, const char *cmdline)
1623 {
1624 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1625 }
1626 
1627 static int allocate_cmdlines_buffer(unsigned int val,
1628 				    struct saved_cmdlines_buffer *s)
1629 {
1630 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1631 					GFP_KERNEL);
1632 	if (!s->map_cmdline_to_pid)
1633 		return -ENOMEM;
1634 
1635 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1636 	if (!s->saved_cmdlines) {
1637 		kfree(s->map_cmdline_to_pid);
1638 		return -ENOMEM;
1639 	}
1640 
1641 	s->cmdline_idx = 0;
1642 	s->cmdline_num = val;
1643 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1644 	       sizeof(s->map_pid_to_cmdline));
1645 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1646 	       val * sizeof(*s->map_cmdline_to_pid));
1647 
1648 	return 0;
1649 }
1650 
1651 static int trace_create_savedcmd(void)
1652 {
1653 	int ret;
1654 
1655 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1656 	if (!savedcmd)
1657 		return -ENOMEM;
1658 
1659 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1660 	if (ret < 0) {
1661 		kfree(savedcmd);
1662 		savedcmd = NULL;
1663 		return -ENOMEM;
1664 	}
1665 
1666 	return 0;
1667 }
1668 
1669 int is_tracing_stopped(void)
1670 {
1671 	return global_trace.stop_count;
1672 }
1673 
1674 /**
1675  * tracing_start - quick start of the tracer
1676  *
1677  * If tracing is enabled but was stopped by tracing_stop,
1678  * this will start the tracer back up.
1679  */
1680 void tracing_start(void)
1681 {
1682 	struct ring_buffer *buffer;
1683 	unsigned long flags;
1684 
1685 	if (tracing_disabled)
1686 		return;
1687 
1688 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1689 	if (--global_trace.stop_count) {
1690 		if (global_trace.stop_count < 0) {
1691 			/* Someone screwed up their debugging */
1692 			WARN_ON_ONCE(1);
1693 			global_trace.stop_count = 0;
1694 		}
1695 		goto out;
1696 	}
1697 
1698 	/* Prevent the buffers from switching */
1699 	arch_spin_lock(&global_trace.max_lock);
1700 
1701 	buffer = global_trace.trace_buffer.buffer;
1702 	if (buffer)
1703 		ring_buffer_record_enable(buffer);
1704 
1705 #ifdef CONFIG_TRACER_MAX_TRACE
1706 	buffer = global_trace.max_buffer.buffer;
1707 	if (buffer)
1708 		ring_buffer_record_enable(buffer);
1709 #endif
1710 
1711 	arch_spin_unlock(&global_trace.max_lock);
1712 
1713  out:
1714 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1715 }
1716 
1717 static void tracing_start_tr(struct trace_array *tr)
1718 {
1719 	struct ring_buffer *buffer;
1720 	unsigned long flags;
1721 
1722 	if (tracing_disabled)
1723 		return;
1724 
1725 	/* If global, we need to also start the max tracer */
1726 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1727 		return tracing_start();
1728 
1729 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1730 
1731 	if (--tr->stop_count) {
1732 		if (tr->stop_count < 0) {
1733 			/* Someone screwed up their debugging */
1734 			WARN_ON_ONCE(1);
1735 			tr->stop_count = 0;
1736 		}
1737 		goto out;
1738 	}
1739 
1740 	buffer = tr->trace_buffer.buffer;
1741 	if (buffer)
1742 		ring_buffer_record_enable(buffer);
1743 
1744  out:
1745 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1746 }
1747 
1748 /**
1749  * tracing_stop - quick stop of the tracer
1750  *
1751  * Light weight way to stop tracing. Use in conjunction with
1752  * tracing_start.
1753  */
1754 void tracing_stop(void)
1755 {
1756 	struct ring_buffer *buffer;
1757 	unsigned long flags;
1758 
1759 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1760 	if (global_trace.stop_count++)
1761 		goto out;
1762 
1763 	/* Prevent the buffers from switching */
1764 	arch_spin_lock(&global_trace.max_lock);
1765 
1766 	buffer = global_trace.trace_buffer.buffer;
1767 	if (buffer)
1768 		ring_buffer_record_disable(buffer);
1769 
1770 #ifdef CONFIG_TRACER_MAX_TRACE
1771 	buffer = global_trace.max_buffer.buffer;
1772 	if (buffer)
1773 		ring_buffer_record_disable(buffer);
1774 #endif
1775 
1776 	arch_spin_unlock(&global_trace.max_lock);
1777 
1778  out:
1779 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1780 }
1781 
1782 static void tracing_stop_tr(struct trace_array *tr)
1783 {
1784 	struct ring_buffer *buffer;
1785 	unsigned long flags;
1786 
1787 	/* If global, we need to also stop the max tracer */
1788 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1789 		return tracing_stop();
1790 
1791 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1792 	if (tr->stop_count++)
1793 		goto out;
1794 
1795 	buffer = tr->trace_buffer.buffer;
1796 	if (buffer)
1797 		ring_buffer_record_disable(buffer);
1798 
1799  out:
1800 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1801 }
1802 
1803 void trace_stop_cmdline_recording(void);
1804 
1805 static int trace_save_cmdline(struct task_struct *tsk)
1806 {
1807 	unsigned pid, idx;
1808 
1809 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1810 		return 0;
1811 
1812 	/*
1813 	 * It's not the end of the world if we don't get
1814 	 * the lock, but we also don't want to spin
1815 	 * nor do we want to disable interrupts,
1816 	 * so if we miss here, then better luck next time.
1817 	 */
1818 	if (!arch_spin_trylock(&trace_cmdline_lock))
1819 		return 0;
1820 
1821 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1822 	if (idx == NO_CMDLINE_MAP) {
1823 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1824 
1825 		/*
1826 		 * Check whether the cmdline buffer at idx has a pid
1827 		 * mapped. We are going to overwrite that entry so we
1828 		 * need to clear the map_pid_to_cmdline. Otherwise we
1829 		 * would read the new comm for the old pid.
1830 		 */
1831 		pid = savedcmd->map_cmdline_to_pid[idx];
1832 		if (pid != NO_CMDLINE_MAP)
1833 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1834 
1835 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1836 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1837 
1838 		savedcmd->cmdline_idx = idx;
1839 	}
1840 
1841 	set_cmdline(idx, tsk->comm);
1842 
1843 	arch_spin_unlock(&trace_cmdline_lock);
1844 
1845 	return 1;
1846 }
1847 
1848 static void __trace_find_cmdline(int pid, char comm[])
1849 {
1850 	unsigned map;
1851 
1852 	if (!pid) {
1853 		strcpy(comm, "<idle>");
1854 		return;
1855 	}
1856 
1857 	if (WARN_ON_ONCE(pid < 0)) {
1858 		strcpy(comm, "<XXX>");
1859 		return;
1860 	}
1861 
1862 	if (pid > PID_MAX_DEFAULT) {
1863 		strcpy(comm, "<...>");
1864 		return;
1865 	}
1866 
1867 	map = savedcmd->map_pid_to_cmdline[pid];
1868 	if (map != NO_CMDLINE_MAP)
1869 		strcpy(comm, get_saved_cmdlines(map));
1870 	else
1871 		strcpy(comm, "<...>");
1872 }
1873 
1874 void trace_find_cmdline(int pid, char comm[])
1875 {
1876 	preempt_disable();
1877 	arch_spin_lock(&trace_cmdline_lock);
1878 
1879 	__trace_find_cmdline(pid, comm);
1880 
1881 	arch_spin_unlock(&trace_cmdline_lock);
1882 	preempt_enable();
1883 }
1884 
1885 void tracing_record_cmdline(struct task_struct *tsk)
1886 {
1887 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1888 		return;
1889 
1890 	if (!__this_cpu_read(trace_cmdline_save))
1891 		return;
1892 
1893 	if (trace_save_cmdline(tsk))
1894 		__this_cpu_write(trace_cmdline_save, false);
1895 }
1896 
1897 void
1898 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1899 			     int pc)
1900 {
1901 	struct task_struct *tsk = current;
1902 
1903 	entry->preempt_count		= pc & 0xff;
1904 	entry->pid			= (tsk) ? tsk->pid : 0;
1905 	entry->flags =
1906 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1907 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1908 #else
1909 		TRACE_FLAG_IRQS_NOSUPPORT |
1910 #endif
1911 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1912 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1913 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1914 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1915 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1916 }
1917 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1918 
1919 static __always_inline void
1920 trace_event_setup(struct ring_buffer_event *event,
1921 		  int type, unsigned long flags, int pc)
1922 {
1923 	struct trace_entry *ent = ring_buffer_event_data(event);
1924 
1925 	tracing_generic_entry_update(ent, flags, pc);
1926 	ent->type = type;
1927 }
1928 
1929 struct ring_buffer_event *
1930 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1931 			  int type,
1932 			  unsigned long len,
1933 			  unsigned long flags, int pc)
1934 {
1935 	struct ring_buffer_event *event;
1936 
1937 	event = ring_buffer_lock_reserve(buffer, len);
1938 	if (event != NULL)
1939 		trace_event_setup(event, type, flags, pc);
1940 
1941 	return event;
1942 }
1943 
1944 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1945 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1946 static int trace_buffered_event_ref;
1947 
1948 /**
1949  * trace_buffered_event_enable - enable buffering events
1950  *
1951  * When events are being filtered, it is quicker to use a temporary
1952  * buffer to write the event data into if there's a likely chance
1953  * that it will not be committed. The discard of the ring buffer
1954  * is not as fast as committing, and is much slower than copying
1955  * a commit.
1956  *
1957  * When an event is to be filtered, allocate per cpu buffers to
1958  * write the event data into, and if the event is filtered and discarded
1959  * it is simply dropped, otherwise, the entire data is to be committed
1960  * in one shot.
1961  */
1962 void trace_buffered_event_enable(void)
1963 {
1964 	struct ring_buffer_event *event;
1965 	struct page *page;
1966 	int cpu;
1967 
1968 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1969 
1970 	if (trace_buffered_event_ref++)
1971 		return;
1972 
1973 	for_each_tracing_cpu(cpu) {
1974 		page = alloc_pages_node(cpu_to_node(cpu),
1975 					GFP_KERNEL | __GFP_NORETRY, 0);
1976 		if (!page)
1977 			goto failed;
1978 
1979 		event = page_address(page);
1980 		memset(event, 0, sizeof(*event));
1981 
1982 		per_cpu(trace_buffered_event, cpu) = event;
1983 
1984 		preempt_disable();
1985 		if (cpu == smp_processor_id() &&
1986 		    this_cpu_read(trace_buffered_event) !=
1987 		    per_cpu(trace_buffered_event, cpu))
1988 			WARN_ON_ONCE(1);
1989 		preempt_enable();
1990 	}
1991 
1992 	return;
1993  failed:
1994 	trace_buffered_event_disable();
1995 }
1996 
1997 static void enable_trace_buffered_event(void *data)
1998 {
1999 	/* Probably not needed, but do it anyway */
2000 	smp_rmb();
2001 	this_cpu_dec(trace_buffered_event_cnt);
2002 }
2003 
2004 static void disable_trace_buffered_event(void *data)
2005 {
2006 	this_cpu_inc(trace_buffered_event_cnt);
2007 }
2008 
2009 /**
2010  * trace_buffered_event_disable - disable buffering events
2011  *
2012  * When a filter is removed, it is faster to not use the buffered
2013  * events, and to commit directly into the ring buffer. Free up
2014  * the temp buffers when there are no more users. This requires
2015  * special synchronization with current events.
2016  */
2017 void trace_buffered_event_disable(void)
2018 {
2019 	int cpu;
2020 
2021 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2022 
2023 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2024 		return;
2025 
2026 	if (--trace_buffered_event_ref)
2027 		return;
2028 
2029 	preempt_disable();
2030 	/* For each CPU, set the buffer as used. */
2031 	smp_call_function_many(tracing_buffer_mask,
2032 			       disable_trace_buffered_event, NULL, 1);
2033 	preempt_enable();
2034 
2035 	/* Wait for all current users to finish */
2036 	synchronize_sched();
2037 
2038 	for_each_tracing_cpu(cpu) {
2039 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2040 		per_cpu(trace_buffered_event, cpu) = NULL;
2041 	}
2042 	/*
2043 	 * Make sure trace_buffered_event is NULL before clearing
2044 	 * trace_buffered_event_cnt.
2045 	 */
2046 	smp_wmb();
2047 
2048 	preempt_disable();
2049 	/* Do the work on each cpu */
2050 	smp_call_function_many(tracing_buffer_mask,
2051 			       enable_trace_buffered_event, NULL, 1);
2052 	preempt_enable();
2053 }
2054 
2055 void
2056 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2057 {
2058 	__this_cpu_write(trace_cmdline_save, true);
2059 
2060 	/* If this is the temp buffer, we need to commit fully */
2061 	if (this_cpu_read(trace_buffered_event) == event) {
2062 		/* Length is in event->array[0] */
2063 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
2064 		/* Release the temp buffer */
2065 		this_cpu_dec(trace_buffered_event_cnt);
2066 	} else
2067 		ring_buffer_unlock_commit(buffer, event);
2068 }
2069 
2070 static struct ring_buffer *temp_buffer;
2071 
2072 struct ring_buffer_event *
2073 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2074 			  struct trace_event_file *trace_file,
2075 			  int type, unsigned long len,
2076 			  unsigned long flags, int pc)
2077 {
2078 	struct ring_buffer_event *entry;
2079 	int val;
2080 
2081 	*current_rb = trace_file->tr->trace_buffer.buffer;
2082 
2083 	if ((trace_file->flags &
2084 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2085 	    (entry = this_cpu_read(trace_buffered_event))) {
2086 		/* Try to use the per cpu buffer first */
2087 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2088 		if (val == 1) {
2089 			trace_event_setup(entry, type, flags, pc);
2090 			entry->array[0] = len;
2091 			return entry;
2092 		}
2093 		this_cpu_dec(trace_buffered_event_cnt);
2094 	}
2095 
2096 	entry = trace_buffer_lock_reserve(*current_rb,
2097 					 type, len, flags, pc);
2098 	/*
2099 	 * If tracing is off, but we have triggers enabled
2100 	 * we still need to look at the event data. Use the temp_buffer
2101 	 * to store the trace event for the tigger to use. It's recusive
2102 	 * safe and will not be recorded anywhere.
2103 	 */
2104 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2105 		*current_rb = temp_buffer;
2106 		entry = trace_buffer_lock_reserve(*current_rb,
2107 						  type, len, flags, pc);
2108 	}
2109 	return entry;
2110 }
2111 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2112 
2113 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2114 				     struct ring_buffer *buffer,
2115 				     struct ring_buffer_event *event,
2116 				     unsigned long flags, int pc,
2117 				     struct pt_regs *regs)
2118 {
2119 	__buffer_unlock_commit(buffer, event);
2120 
2121 	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
2122 	ftrace_trace_userstack(buffer, flags, pc);
2123 }
2124 
2125 void
2126 trace_function(struct trace_array *tr,
2127 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2128 	       int pc)
2129 {
2130 	struct trace_event_call *call = &event_function;
2131 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2132 	struct ring_buffer_event *event;
2133 	struct ftrace_entry *entry;
2134 
2135 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2136 					  flags, pc);
2137 	if (!event)
2138 		return;
2139 	entry	= ring_buffer_event_data(event);
2140 	entry->ip			= ip;
2141 	entry->parent_ip		= parent_ip;
2142 
2143 	if (!call_filter_check_discard(call, entry, buffer, event))
2144 		__buffer_unlock_commit(buffer, event);
2145 }
2146 
2147 #ifdef CONFIG_STACKTRACE
2148 
2149 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2150 struct ftrace_stack {
2151 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2152 };
2153 
2154 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2155 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2156 
2157 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2158 				 unsigned long flags,
2159 				 int skip, int pc, struct pt_regs *regs)
2160 {
2161 	struct trace_event_call *call = &event_kernel_stack;
2162 	struct ring_buffer_event *event;
2163 	struct stack_entry *entry;
2164 	struct stack_trace trace;
2165 	int use_stack;
2166 	int size = FTRACE_STACK_ENTRIES;
2167 
2168 	trace.nr_entries	= 0;
2169 	trace.skip		= skip;
2170 
2171 	/*
2172 	 * Since events can happen in NMIs there's no safe way to
2173 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2174 	 * or NMI comes in, it will just have to use the default
2175 	 * FTRACE_STACK_SIZE.
2176 	 */
2177 	preempt_disable_notrace();
2178 
2179 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2180 	/*
2181 	 * We don't need any atomic variables, just a barrier.
2182 	 * If an interrupt comes in, we don't care, because it would
2183 	 * have exited and put the counter back to what we want.
2184 	 * We just need a barrier to keep gcc from moving things
2185 	 * around.
2186 	 */
2187 	barrier();
2188 	if (use_stack == 1) {
2189 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2190 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2191 
2192 		if (regs)
2193 			save_stack_trace_regs(regs, &trace);
2194 		else
2195 			save_stack_trace(&trace);
2196 
2197 		if (trace.nr_entries > size)
2198 			size = trace.nr_entries;
2199 	} else
2200 		/* From now on, use_stack is a boolean */
2201 		use_stack = 0;
2202 
2203 	size *= sizeof(unsigned long);
2204 
2205 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2206 					  sizeof(*entry) + size, flags, pc);
2207 	if (!event)
2208 		goto out;
2209 	entry = ring_buffer_event_data(event);
2210 
2211 	memset(&entry->caller, 0, size);
2212 
2213 	if (use_stack)
2214 		memcpy(&entry->caller, trace.entries,
2215 		       trace.nr_entries * sizeof(unsigned long));
2216 	else {
2217 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2218 		trace.entries		= entry->caller;
2219 		if (regs)
2220 			save_stack_trace_regs(regs, &trace);
2221 		else
2222 			save_stack_trace(&trace);
2223 	}
2224 
2225 	entry->size = trace.nr_entries;
2226 
2227 	if (!call_filter_check_discard(call, entry, buffer, event))
2228 		__buffer_unlock_commit(buffer, event);
2229 
2230  out:
2231 	/* Again, don't let gcc optimize things here */
2232 	barrier();
2233 	__this_cpu_dec(ftrace_stack_reserve);
2234 	preempt_enable_notrace();
2235 
2236 }
2237 
2238 static inline void ftrace_trace_stack(struct trace_array *tr,
2239 				      struct ring_buffer *buffer,
2240 				      unsigned long flags,
2241 				      int skip, int pc, struct pt_regs *regs)
2242 {
2243 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2244 		return;
2245 
2246 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2247 }
2248 
2249 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2250 		   int pc)
2251 {
2252 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2253 }
2254 
2255 /**
2256  * trace_dump_stack - record a stack back trace in the trace buffer
2257  * @skip: Number of functions to skip (helper handlers)
2258  */
2259 void trace_dump_stack(int skip)
2260 {
2261 	unsigned long flags;
2262 
2263 	if (tracing_disabled || tracing_selftest_running)
2264 		return;
2265 
2266 	local_save_flags(flags);
2267 
2268 	/*
2269 	 * Skip 3 more, seems to get us at the caller of
2270 	 * this function.
2271 	 */
2272 	skip += 3;
2273 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2274 			     flags, skip, preempt_count(), NULL);
2275 }
2276 
2277 static DEFINE_PER_CPU(int, user_stack_count);
2278 
2279 void
2280 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2281 {
2282 	struct trace_event_call *call = &event_user_stack;
2283 	struct ring_buffer_event *event;
2284 	struct userstack_entry *entry;
2285 	struct stack_trace trace;
2286 
2287 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2288 		return;
2289 
2290 	/*
2291 	 * NMIs can not handle page faults, even with fix ups.
2292 	 * The save user stack can (and often does) fault.
2293 	 */
2294 	if (unlikely(in_nmi()))
2295 		return;
2296 
2297 	/*
2298 	 * prevent recursion, since the user stack tracing may
2299 	 * trigger other kernel events.
2300 	 */
2301 	preempt_disable();
2302 	if (__this_cpu_read(user_stack_count))
2303 		goto out;
2304 
2305 	__this_cpu_inc(user_stack_count);
2306 
2307 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2308 					  sizeof(*entry), flags, pc);
2309 	if (!event)
2310 		goto out_drop_count;
2311 	entry	= ring_buffer_event_data(event);
2312 
2313 	entry->tgid		= current->tgid;
2314 	memset(&entry->caller, 0, sizeof(entry->caller));
2315 
2316 	trace.nr_entries	= 0;
2317 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2318 	trace.skip		= 0;
2319 	trace.entries		= entry->caller;
2320 
2321 	save_stack_trace_user(&trace);
2322 	if (!call_filter_check_discard(call, entry, buffer, event))
2323 		__buffer_unlock_commit(buffer, event);
2324 
2325  out_drop_count:
2326 	__this_cpu_dec(user_stack_count);
2327  out:
2328 	preempt_enable();
2329 }
2330 
2331 #ifdef UNUSED
2332 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2333 {
2334 	ftrace_trace_userstack(tr, flags, preempt_count());
2335 }
2336 #endif /* UNUSED */
2337 
2338 #endif /* CONFIG_STACKTRACE */
2339 
2340 /* created for use with alloc_percpu */
2341 struct trace_buffer_struct {
2342 	int nesting;
2343 	char buffer[4][TRACE_BUF_SIZE];
2344 };
2345 
2346 static struct trace_buffer_struct *trace_percpu_buffer;
2347 
2348 /*
2349  * Thise allows for lockless recording.  If we're nested too deeply, then
2350  * this returns NULL.
2351  */
2352 static char *get_trace_buf(void)
2353 {
2354 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2355 
2356 	if (!buffer || buffer->nesting >= 4)
2357 		return NULL;
2358 
2359 	return &buffer->buffer[buffer->nesting++][0];
2360 }
2361 
2362 static void put_trace_buf(void)
2363 {
2364 	this_cpu_dec(trace_percpu_buffer->nesting);
2365 }
2366 
2367 static int alloc_percpu_trace_buffer(void)
2368 {
2369 	struct trace_buffer_struct *buffers;
2370 
2371 	buffers = alloc_percpu(struct trace_buffer_struct);
2372 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2373 		return -ENOMEM;
2374 
2375 	trace_percpu_buffer = buffers;
2376 	return 0;
2377 }
2378 
2379 static int buffers_allocated;
2380 
2381 void trace_printk_init_buffers(void)
2382 {
2383 	if (buffers_allocated)
2384 		return;
2385 
2386 	if (alloc_percpu_trace_buffer())
2387 		return;
2388 
2389 	/* trace_printk() is for debug use only. Don't use it in production. */
2390 
2391 	pr_warn("\n");
2392 	pr_warn("**********************************************************\n");
2393 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2394 	pr_warn("**                                                      **\n");
2395 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2396 	pr_warn("**                                                      **\n");
2397 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2398 	pr_warn("** unsafe for production use.                           **\n");
2399 	pr_warn("**                                                      **\n");
2400 	pr_warn("** If you see this message and you are not debugging    **\n");
2401 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2402 	pr_warn("**                                                      **\n");
2403 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2404 	pr_warn("**********************************************************\n");
2405 
2406 	/* Expand the buffers to set size */
2407 	tracing_update_buffers();
2408 
2409 	buffers_allocated = 1;
2410 
2411 	/*
2412 	 * trace_printk_init_buffers() can be called by modules.
2413 	 * If that happens, then we need to start cmdline recording
2414 	 * directly here. If the global_trace.buffer is already
2415 	 * allocated here, then this was called by module code.
2416 	 */
2417 	if (global_trace.trace_buffer.buffer)
2418 		tracing_start_cmdline_record();
2419 }
2420 
2421 void trace_printk_start_comm(void)
2422 {
2423 	/* Start tracing comms if trace printk is set */
2424 	if (!buffers_allocated)
2425 		return;
2426 	tracing_start_cmdline_record();
2427 }
2428 
2429 static void trace_printk_start_stop_comm(int enabled)
2430 {
2431 	if (!buffers_allocated)
2432 		return;
2433 
2434 	if (enabled)
2435 		tracing_start_cmdline_record();
2436 	else
2437 		tracing_stop_cmdline_record();
2438 }
2439 
2440 /**
2441  * trace_vbprintk - write binary msg to tracing buffer
2442  *
2443  */
2444 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2445 {
2446 	struct trace_event_call *call = &event_bprint;
2447 	struct ring_buffer_event *event;
2448 	struct ring_buffer *buffer;
2449 	struct trace_array *tr = &global_trace;
2450 	struct bprint_entry *entry;
2451 	unsigned long flags;
2452 	char *tbuffer;
2453 	int len = 0, size, pc;
2454 
2455 	if (unlikely(tracing_selftest_running || tracing_disabled))
2456 		return 0;
2457 
2458 	/* Don't pollute graph traces with trace_vprintk internals */
2459 	pause_graph_tracing();
2460 
2461 	pc = preempt_count();
2462 	preempt_disable_notrace();
2463 
2464 	tbuffer = get_trace_buf();
2465 	if (!tbuffer) {
2466 		len = 0;
2467 		goto out_nobuffer;
2468 	}
2469 
2470 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2471 
2472 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2473 		goto out;
2474 
2475 	local_save_flags(flags);
2476 	size = sizeof(*entry) + sizeof(u32) * len;
2477 	buffer = tr->trace_buffer.buffer;
2478 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2479 					  flags, pc);
2480 	if (!event)
2481 		goto out;
2482 	entry = ring_buffer_event_data(event);
2483 	entry->ip			= ip;
2484 	entry->fmt			= fmt;
2485 
2486 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2487 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2488 		__buffer_unlock_commit(buffer, event);
2489 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2490 	}
2491 
2492 out:
2493 	put_trace_buf();
2494 
2495 out_nobuffer:
2496 	preempt_enable_notrace();
2497 	unpause_graph_tracing();
2498 
2499 	return len;
2500 }
2501 EXPORT_SYMBOL_GPL(trace_vbprintk);
2502 
2503 static int
2504 __trace_array_vprintk(struct ring_buffer *buffer,
2505 		      unsigned long ip, const char *fmt, va_list args)
2506 {
2507 	struct trace_event_call *call = &event_print;
2508 	struct ring_buffer_event *event;
2509 	int len = 0, size, pc;
2510 	struct print_entry *entry;
2511 	unsigned long flags;
2512 	char *tbuffer;
2513 
2514 	if (tracing_disabled || tracing_selftest_running)
2515 		return 0;
2516 
2517 	/* Don't pollute graph traces with trace_vprintk internals */
2518 	pause_graph_tracing();
2519 
2520 	pc = preempt_count();
2521 	preempt_disable_notrace();
2522 
2523 
2524 	tbuffer = get_trace_buf();
2525 	if (!tbuffer) {
2526 		len = 0;
2527 		goto out_nobuffer;
2528 	}
2529 
2530 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2531 
2532 	local_save_flags(flags);
2533 	size = sizeof(*entry) + len + 1;
2534 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2535 					  flags, pc);
2536 	if (!event)
2537 		goto out;
2538 	entry = ring_buffer_event_data(event);
2539 	entry->ip = ip;
2540 
2541 	memcpy(&entry->buf, tbuffer, len + 1);
2542 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2543 		__buffer_unlock_commit(buffer, event);
2544 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2545 	}
2546 
2547 out:
2548 	put_trace_buf();
2549 
2550 out_nobuffer:
2551 	preempt_enable_notrace();
2552 	unpause_graph_tracing();
2553 
2554 	return len;
2555 }
2556 
2557 int trace_array_vprintk(struct trace_array *tr,
2558 			unsigned long ip, const char *fmt, va_list args)
2559 {
2560 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2561 }
2562 
2563 int trace_array_printk(struct trace_array *tr,
2564 		       unsigned long ip, const char *fmt, ...)
2565 {
2566 	int ret;
2567 	va_list ap;
2568 
2569 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2570 		return 0;
2571 
2572 	va_start(ap, fmt);
2573 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2574 	va_end(ap);
2575 	return ret;
2576 }
2577 
2578 int trace_array_printk_buf(struct ring_buffer *buffer,
2579 			   unsigned long ip, const char *fmt, ...)
2580 {
2581 	int ret;
2582 	va_list ap;
2583 
2584 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2585 		return 0;
2586 
2587 	va_start(ap, fmt);
2588 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2589 	va_end(ap);
2590 	return ret;
2591 }
2592 
2593 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2594 {
2595 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2596 }
2597 EXPORT_SYMBOL_GPL(trace_vprintk);
2598 
2599 static void trace_iterator_increment(struct trace_iterator *iter)
2600 {
2601 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2602 
2603 	iter->idx++;
2604 	if (buf_iter)
2605 		ring_buffer_read(buf_iter, NULL);
2606 }
2607 
2608 static struct trace_entry *
2609 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2610 		unsigned long *lost_events)
2611 {
2612 	struct ring_buffer_event *event;
2613 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2614 
2615 	if (buf_iter)
2616 		event = ring_buffer_iter_peek(buf_iter, ts);
2617 	else
2618 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2619 					 lost_events);
2620 
2621 	if (event) {
2622 		iter->ent_size = ring_buffer_event_length(event);
2623 		return ring_buffer_event_data(event);
2624 	}
2625 	iter->ent_size = 0;
2626 	return NULL;
2627 }
2628 
2629 static struct trace_entry *
2630 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2631 		  unsigned long *missing_events, u64 *ent_ts)
2632 {
2633 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2634 	struct trace_entry *ent, *next = NULL;
2635 	unsigned long lost_events = 0, next_lost = 0;
2636 	int cpu_file = iter->cpu_file;
2637 	u64 next_ts = 0, ts;
2638 	int next_cpu = -1;
2639 	int next_size = 0;
2640 	int cpu;
2641 
2642 	/*
2643 	 * If we are in a per_cpu trace file, don't bother by iterating over
2644 	 * all cpu and peek directly.
2645 	 */
2646 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2647 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2648 			return NULL;
2649 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2650 		if (ent_cpu)
2651 			*ent_cpu = cpu_file;
2652 
2653 		return ent;
2654 	}
2655 
2656 	for_each_tracing_cpu(cpu) {
2657 
2658 		if (ring_buffer_empty_cpu(buffer, cpu))
2659 			continue;
2660 
2661 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2662 
2663 		/*
2664 		 * Pick the entry with the smallest timestamp:
2665 		 */
2666 		if (ent && (!next || ts < next_ts)) {
2667 			next = ent;
2668 			next_cpu = cpu;
2669 			next_ts = ts;
2670 			next_lost = lost_events;
2671 			next_size = iter->ent_size;
2672 		}
2673 	}
2674 
2675 	iter->ent_size = next_size;
2676 
2677 	if (ent_cpu)
2678 		*ent_cpu = next_cpu;
2679 
2680 	if (ent_ts)
2681 		*ent_ts = next_ts;
2682 
2683 	if (missing_events)
2684 		*missing_events = next_lost;
2685 
2686 	return next;
2687 }
2688 
2689 /* Find the next real entry, without updating the iterator itself */
2690 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2691 					  int *ent_cpu, u64 *ent_ts)
2692 {
2693 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2694 }
2695 
2696 /* Find the next real entry, and increment the iterator to the next entry */
2697 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2698 {
2699 	iter->ent = __find_next_entry(iter, &iter->cpu,
2700 				      &iter->lost_events, &iter->ts);
2701 
2702 	if (iter->ent)
2703 		trace_iterator_increment(iter);
2704 
2705 	return iter->ent ? iter : NULL;
2706 }
2707 
2708 static void trace_consume(struct trace_iterator *iter)
2709 {
2710 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2711 			    &iter->lost_events);
2712 }
2713 
2714 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2715 {
2716 	struct trace_iterator *iter = m->private;
2717 	int i = (int)*pos;
2718 	void *ent;
2719 
2720 	WARN_ON_ONCE(iter->leftover);
2721 
2722 	(*pos)++;
2723 
2724 	/* can't go backwards */
2725 	if (iter->idx > i)
2726 		return NULL;
2727 
2728 	if (iter->idx < 0)
2729 		ent = trace_find_next_entry_inc(iter);
2730 	else
2731 		ent = iter;
2732 
2733 	while (ent && iter->idx < i)
2734 		ent = trace_find_next_entry_inc(iter);
2735 
2736 	iter->pos = *pos;
2737 
2738 	return ent;
2739 }
2740 
2741 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2742 {
2743 	struct ring_buffer_event *event;
2744 	struct ring_buffer_iter *buf_iter;
2745 	unsigned long entries = 0;
2746 	u64 ts;
2747 
2748 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2749 
2750 	buf_iter = trace_buffer_iter(iter, cpu);
2751 	if (!buf_iter)
2752 		return;
2753 
2754 	ring_buffer_iter_reset(buf_iter);
2755 
2756 	/*
2757 	 * We could have the case with the max latency tracers
2758 	 * that a reset never took place on a cpu. This is evident
2759 	 * by the timestamp being before the start of the buffer.
2760 	 */
2761 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2762 		if (ts >= iter->trace_buffer->time_start)
2763 			break;
2764 		entries++;
2765 		ring_buffer_read(buf_iter, NULL);
2766 	}
2767 
2768 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2769 }
2770 
2771 /*
2772  * The current tracer is copied to avoid a global locking
2773  * all around.
2774  */
2775 static void *s_start(struct seq_file *m, loff_t *pos)
2776 {
2777 	struct trace_iterator *iter = m->private;
2778 	struct trace_array *tr = iter->tr;
2779 	int cpu_file = iter->cpu_file;
2780 	void *p = NULL;
2781 	loff_t l = 0;
2782 	int cpu;
2783 
2784 	/*
2785 	 * copy the tracer to avoid using a global lock all around.
2786 	 * iter->trace is a copy of current_trace, the pointer to the
2787 	 * name may be used instead of a strcmp(), as iter->trace->name
2788 	 * will point to the same string as current_trace->name.
2789 	 */
2790 	mutex_lock(&trace_types_lock);
2791 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2792 		*iter->trace = *tr->current_trace;
2793 	mutex_unlock(&trace_types_lock);
2794 
2795 #ifdef CONFIG_TRACER_MAX_TRACE
2796 	if (iter->snapshot && iter->trace->use_max_tr)
2797 		return ERR_PTR(-EBUSY);
2798 #endif
2799 
2800 	if (!iter->snapshot)
2801 		atomic_inc(&trace_record_cmdline_disabled);
2802 
2803 	if (*pos != iter->pos) {
2804 		iter->ent = NULL;
2805 		iter->cpu = 0;
2806 		iter->idx = -1;
2807 
2808 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2809 			for_each_tracing_cpu(cpu)
2810 				tracing_iter_reset(iter, cpu);
2811 		} else
2812 			tracing_iter_reset(iter, cpu_file);
2813 
2814 		iter->leftover = 0;
2815 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2816 			;
2817 
2818 	} else {
2819 		/*
2820 		 * If we overflowed the seq_file before, then we want
2821 		 * to just reuse the trace_seq buffer again.
2822 		 */
2823 		if (iter->leftover)
2824 			p = iter;
2825 		else {
2826 			l = *pos - 1;
2827 			p = s_next(m, p, &l);
2828 		}
2829 	}
2830 
2831 	trace_event_read_lock();
2832 	trace_access_lock(cpu_file);
2833 	return p;
2834 }
2835 
2836 static void s_stop(struct seq_file *m, void *p)
2837 {
2838 	struct trace_iterator *iter = m->private;
2839 
2840 #ifdef CONFIG_TRACER_MAX_TRACE
2841 	if (iter->snapshot && iter->trace->use_max_tr)
2842 		return;
2843 #endif
2844 
2845 	if (!iter->snapshot)
2846 		atomic_dec(&trace_record_cmdline_disabled);
2847 
2848 	trace_access_unlock(iter->cpu_file);
2849 	trace_event_read_unlock();
2850 }
2851 
2852 static void
2853 get_total_entries(struct trace_buffer *buf,
2854 		  unsigned long *total, unsigned long *entries)
2855 {
2856 	unsigned long count;
2857 	int cpu;
2858 
2859 	*total = 0;
2860 	*entries = 0;
2861 
2862 	for_each_tracing_cpu(cpu) {
2863 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2864 		/*
2865 		 * If this buffer has skipped entries, then we hold all
2866 		 * entries for the trace and we need to ignore the
2867 		 * ones before the time stamp.
2868 		 */
2869 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2870 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2871 			/* total is the same as the entries */
2872 			*total += count;
2873 		} else
2874 			*total += count +
2875 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2876 		*entries += count;
2877 	}
2878 }
2879 
2880 static void print_lat_help_header(struct seq_file *m)
2881 {
2882 	seq_puts(m, "#                  _------=> CPU#            \n"
2883 		    "#                 / _-----=> irqs-off        \n"
2884 		    "#                | / _----=> need-resched    \n"
2885 		    "#                || / _---=> hardirq/softirq \n"
2886 		    "#                ||| / _--=> preempt-depth   \n"
2887 		    "#                |||| /     delay            \n"
2888 		    "#  cmd     pid   ||||| time  |   caller      \n"
2889 		    "#     \\   /      |||||  \\    |   /         \n");
2890 }
2891 
2892 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2893 {
2894 	unsigned long total;
2895 	unsigned long entries;
2896 
2897 	get_total_entries(buf, &total, &entries);
2898 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2899 		   entries, total, num_online_cpus());
2900 	seq_puts(m, "#\n");
2901 }
2902 
2903 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2904 {
2905 	print_event_info(buf, m);
2906 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2907 		    "#              | |       |          |         |\n");
2908 }
2909 
2910 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2911 {
2912 	print_event_info(buf, m);
2913 	seq_puts(m, "#                              _-----=> irqs-off\n"
2914 		    "#                             / _----=> need-resched\n"
2915 		    "#                            | / _---=> hardirq/softirq\n"
2916 		    "#                            || / _--=> preempt-depth\n"
2917 		    "#                            ||| /     delay\n"
2918 		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2919 		    "#              | |       |   ||||       |         |\n");
2920 }
2921 
2922 void
2923 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2924 {
2925 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2926 	struct trace_buffer *buf = iter->trace_buffer;
2927 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2928 	struct tracer *type = iter->trace;
2929 	unsigned long entries;
2930 	unsigned long total;
2931 	const char *name = "preemption";
2932 
2933 	name = type->name;
2934 
2935 	get_total_entries(buf, &total, &entries);
2936 
2937 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2938 		   name, UTS_RELEASE);
2939 	seq_puts(m, "# -----------------------------------"
2940 		 "---------------------------------\n");
2941 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2942 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2943 		   nsecs_to_usecs(data->saved_latency),
2944 		   entries,
2945 		   total,
2946 		   buf->cpu,
2947 #if defined(CONFIG_PREEMPT_NONE)
2948 		   "server",
2949 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2950 		   "desktop",
2951 #elif defined(CONFIG_PREEMPT)
2952 		   "preempt",
2953 #else
2954 		   "unknown",
2955 #endif
2956 		   /* These are reserved for later use */
2957 		   0, 0, 0, 0);
2958 #ifdef CONFIG_SMP
2959 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2960 #else
2961 	seq_puts(m, ")\n");
2962 #endif
2963 	seq_puts(m, "#    -----------------\n");
2964 	seq_printf(m, "#    | task: %.16s-%d "
2965 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2966 		   data->comm, data->pid,
2967 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2968 		   data->policy, data->rt_priority);
2969 	seq_puts(m, "#    -----------------\n");
2970 
2971 	if (data->critical_start) {
2972 		seq_puts(m, "#  => started at: ");
2973 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2974 		trace_print_seq(m, &iter->seq);
2975 		seq_puts(m, "\n#  => ended at:   ");
2976 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2977 		trace_print_seq(m, &iter->seq);
2978 		seq_puts(m, "\n#\n");
2979 	}
2980 
2981 	seq_puts(m, "#\n");
2982 }
2983 
2984 static void test_cpu_buff_start(struct trace_iterator *iter)
2985 {
2986 	struct trace_seq *s = &iter->seq;
2987 	struct trace_array *tr = iter->tr;
2988 
2989 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2990 		return;
2991 
2992 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2993 		return;
2994 
2995 	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2996 		return;
2997 
2998 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2999 		return;
3000 
3001 	if (iter->started)
3002 		cpumask_set_cpu(iter->cpu, iter->started);
3003 
3004 	/* Don't print started cpu buffer for the first entry of the trace */
3005 	if (iter->idx > 1)
3006 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3007 				iter->cpu);
3008 }
3009 
3010 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3011 {
3012 	struct trace_array *tr = iter->tr;
3013 	struct trace_seq *s = &iter->seq;
3014 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3015 	struct trace_entry *entry;
3016 	struct trace_event *event;
3017 
3018 	entry = iter->ent;
3019 
3020 	test_cpu_buff_start(iter);
3021 
3022 	event = ftrace_find_event(entry->type);
3023 
3024 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3025 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3026 			trace_print_lat_context(iter);
3027 		else
3028 			trace_print_context(iter);
3029 	}
3030 
3031 	if (trace_seq_has_overflowed(s))
3032 		return TRACE_TYPE_PARTIAL_LINE;
3033 
3034 	if (event)
3035 		return event->funcs->trace(iter, sym_flags, event);
3036 
3037 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3038 
3039 	return trace_handle_return(s);
3040 }
3041 
3042 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3043 {
3044 	struct trace_array *tr = iter->tr;
3045 	struct trace_seq *s = &iter->seq;
3046 	struct trace_entry *entry;
3047 	struct trace_event *event;
3048 
3049 	entry = iter->ent;
3050 
3051 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3052 		trace_seq_printf(s, "%d %d %llu ",
3053 				 entry->pid, iter->cpu, iter->ts);
3054 
3055 	if (trace_seq_has_overflowed(s))
3056 		return TRACE_TYPE_PARTIAL_LINE;
3057 
3058 	event = ftrace_find_event(entry->type);
3059 	if (event)
3060 		return event->funcs->raw(iter, 0, event);
3061 
3062 	trace_seq_printf(s, "%d ?\n", entry->type);
3063 
3064 	return trace_handle_return(s);
3065 }
3066 
3067 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3068 {
3069 	struct trace_array *tr = iter->tr;
3070 	struct trace_seq *s = &iter->seq;
3071 	unsigned char newline = '\n';
3072 	struct trace_entry *entry;
3073 	struct trace_event *event;
3074 
3075 	entry = iter->ent;
3076 
3077 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3078 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3079 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3080 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3081 		if (trace_seq_has_overflowed(s))
3082 			return TRACE_TYPE_PARTIAL_LINE;
3083 	}
3084 
3085 	event = ftrace_find_event(entry->type);
3086 	if (event) {
3087 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3088 		if (ret != TRACE_TYPE_HANDLED)
3089 			return ret;
3090 	}
3091 
3092 	SEQ_PUT_FIELD(s, newline);
3093 
3094 	return trace_handle_return(s);
3095 }
3096 
3097 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3098 {
3099 	struct trace_array *tr = iter->tr;
3100 	struct trace_seq *s = &iter->seq;
3101 	struct trace_entry *entry;
3102 	struct trace_event *event;
3103 
3104 	entry = iter->ent;
3105 
3106 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3107 		SEQ_PUT_FIELD(s, entry->pid);
3108 		SEQ_PUT_FIELD(s, iter->cpu);
3109 		SEQ_PUT_FIELD(s, iter->ts);
3110 		if (trace_seq_has_overflowed(s))
3111 			return TRACE_TYPE_PARTIAL_LINE;
3112 	}
3113 
3114 	event = ftrace_find_event(entry->type);
3115 	return event ? event->funcs->binary(iter, 0, event) :
3116 		TRACE_TYPE_HANDLED;
3117 }
3118 
3119 int trace_empty(struct trace_iterator *iter)
3120 {
3121 	struct ring_buffer_iter *buf_iter;
3122 	int cpu;
3123 
3124 	/* If we are looking at one CPU buffer, only check that one */
3125 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3126 		cpu = iter->cpu_file;
3127 		buf_iter = trace_buffer_iter(iter, cpu);
3128 		if (buf_iter) {
3129 			if (!ring_buffer_iter_empty(buf_iter))
3130 				return 0;
3131 		} else {
3132 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3133 				return 0;
3134 		}
3135 		return 1;
3136 	}
3137 
3138 	for_each_tracing_cpu(cpu) {
3139 		buf_iter = trace_buffer_iter(iter, cpu);
3140 		if (buf_iter) {
3141 			if (!ring_buffer_iter_empty(buf_iter))
3142 				return 0;
3143 		} else {
3144 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3145 				return 0;
3146 		}
3147 	}
3148 
3149 	return 1;
3150 }
3151 
3152 /*  Called with trace_event_read_lock() held. */
3153 enum print_line_t print_trace_line(struct trace_iterator *iter)
3154 {
3155 	struct trace_array *tr = iter->tr;
3156 	unsigned long trace_flags = tr->trace_flags;
3157 	enum print_line_t ret;
3158 
3159 	if (iter->lost_events) {
3160 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3161 				 iter->cpu, iter->lost_events);
3162 		if (trace_seq_has_overflowed(&iter->seq))
3163 			return TRACE_TYPE_PARTIAL_LINE;
3164 	}
3165 
3166 	if (iter->trace && iter->trace->print_line) {
3167 		ret = iter->trace->print_line(iter);
3168 		if (ret != TRACE_TYPE_UNHANDLED)
3169 			return ret;
3170 	}
3171 
3172 	if (iter->ent->type == TRACE_BPUTS &&
3173 			trace_flags & TRACE_ITER_PRINTK &&
3174 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3175 		return trace_print_bputs_msg_only(iter);
3176 
3177 	if (iter->ent->type == TRACE_BPRINT &&
3178 			trace_flags & TRACE_ITER_PRINTK &&
3179 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3180 		return trace_print_bprintk_msg_only(iter);
3181 
3182 	if (iter->ent->type == TRACE_PRINT &&
3183 			trace_flags & TRACE_ITER_PRINTK &&
3184 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3185 		return trace_print_printk_msg_only(iter);
3186 
3187 	if (trace_flags & TRACE_ITER_BIN)
3188 		return print_bin_fmt(iter);
3189 
3190 	if (trace_flags & TRACE_ITER_HEX)
3191 		return print_hex_fmt(iter);
3192 
3193 	if (trace_flags & TRACE_ITER_RAW)
3194 		return print_raw_fmt(iter);
3195 
3196 	return print_trace_fmt(iter);
3197 }
3198 
3199 void trace_latency_header(struct seq_file *m)
3200 {
3201 	struct trace_iterator *iter = m->private;
3202 	struct trace_array *tr = iter->tr;
3203 
3204 	/* print nothing if the buffers are empty */
3205 	if (trace_empty(iter))
3206 		return;
3207 
3208 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3209 		print_trace_header(m, iter);
3210 
3211 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3212 		print_lat_help_header(m);
3213 }
3214 
3215 void trace_default_header(struct seq_file *m)
3216 {
3217 	struct trace_iterator *iter = m->private;
3218 	struct trace_array *tr = iter->tr;
3219 	unsigned long trace_flags = tr->trace_flags;
3220 
3221 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3222 		return;
3223 
3224 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3225 		/* print nothing if the buffers are empty */
3226 		if (trace_empty(iter))
3227 			return;
3228 		print_trace_header(m, iter);
3229 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3230 			print_lat_help_header(m);
3231 	} else {
3232 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3233 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3234 				print_func_help_header_irq(iter->trace_buffer, m);
3235 			else
3236 				print_func_help_header(iter->trace_buffer, m);
3237 		}
3238 	}
3239 }
3240 
3241 static void test_ftrace_alive(struct seq_file *m)
3242 {
3243 	if (!ftrace_is_dead())
3244 		return;
3245 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3246 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3247 }
3248 
3249 #ifdef CONFIG_TRACER_MAX_TRACE
3250 static void show_snapshot_main_help(struct seq_file *m)
3251 {
3252 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3253 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3254 		    "#                      Takes a snapshot of the main buffer.\n"
3255 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3256 		    "#                      (Doesn't have to be '2' works with any number that\n"
3257 		    "#                       is not a '0' or '1')\n");
3258 }
3259 
3260 static void show_snapshot_percpu_help(struct seq_file *m)
3261 {
3262 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3263 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3264 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3265 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3266 #else
3267 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3268 		    "#                     Must use main snapshot file to allocate.\n");
3269 #endif
3270 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3271 		    "#                      (Doesn't have to be '2' works with any number that\n"
3272 		    "#                       is not a '0' or '1')\n");
3273 }
3274 
3275 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3276 {
3277 	if (iter->tr->allocated_snapshot)
3278 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3279 	else
3280 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3281 
3282 	seq_puts(m, "# Snapshot commands:\n");
3283 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3284 		show_snapshot_main_help(m);
3285 	else
3286 		show_snapshot_percpu_help(m);
3287 }
3288 #else
3289 /* Should never be called */
3290 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3291 #endif
3292 
3293 static int s_show(struct seq_file *m, void *v)
3294 {
3295 	struct trace_iterator *iter = v;
3296 	int ret;
3297 
3298 	if (iter->ent == NULL) {
3299 		if (iter->tr) {
3300 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3301 			seq_puts(m, "#\n");
3302 			test_ftrace_alive(m);
3303 		}
3304 		if (iter->snapshot && trace_empty(iter))
3305 			print_snapshot_help(m, iter);
3306 		else if (iter->trace && iter->trace->print_header)
3307 			iter->trace->print_header(m);
3308 		else
3309 			trace_default_header(m);
3310 
3311 	} else if (iter->leftover) {
3312 		/*
3313 		 * If we filled the seq_file buffer earlier, we
3314 		 * want to just show it now.
3315 		 */
3316 		ret = trace_print_seq(m, &iter->seq);
3317 
3318 		/* ret should this time be zero, but you never know */
3319 		iter->leftover = ret;
3320 
3321 	} else {
3322 		print_trace_line(iter);
3323 		ret = trace_print_seq(m, &iter->seq);
3324 		/*
3325 		 * If we overflow the seq_file buffer, then it will
3326 		 * ask us for this data again at start up.
3327 		 * Use that instead.
3328 		 *  ret is 0 if seq_file write succeeded.
3329 		 *        -1 otherwise.
3330 		 */
3331 		iter->leftover = ret;
3332 	}
3333 
3334 	return 0;
3335 }
3336 
3337 /*
3338  * Should be used after trace_array_get(), trace_types_lock
3339  * ensures that i_cdev was already initialized.
3340  */
3341 static inline int tracing_get_cpu(struct inode *inode)
3342 {
3343 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3344 		return (long)inode->i_cdev - 1;
3345 	return RING_BUFFER_ALL_CPUS;
3346 }
3347 
3348 static const struct seq_operations tracer_seq_ops = {
3349 	.start		= s_start,
3350 	.next		= s_next,
3351 	.stop		= s_stop,
3352 	.show		= s_show,
3353 };
3354 
3355 static struct trace_iterator *
3356 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3357 {
3358 	struct trace_array *tr = inode->i_private;
3359 	struct trace_iterator *iter;
3360 	int cpu;
3361 
3362 	if (tracing_disabled)
3363 		return ERR_PTR(-ENODEV);
3364 
3365 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3366 	if (!iter)
3367 		return ERR_PTR(-ENOMEM);
3368 
3369 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3370 				    GFP_KERNEL);
3371 	if (!iter->buffer_iter)
3372 		goto release;
3373 
3374 	/*
3375 	 * We make a copy of the current tracer to avoid concurrent
3376 	 * changes on it while we are reading.
3377 	 */
3378 	mutex_lock(&trace_types_lock);
3379 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3380 	if (!iter->trace)
3381 		goto fail;
3382 
3383 	*iter->trace = *tr->current_trace;
3384 
3385 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3386 		goto fail;
3387 
3388 	iter->tr = tr;
3389 
3390 #ifdef CONFIG_TRACER_MAX_TRACE
3391 	/* Currently only the top directory has a snapshot */
3392 	if (tr->current_trace->print_max || snapshot)
3393 		iter->trace_buffer = &tr->max_buffer;
3394 	else
3395 #endif
3396 		iter->trace_buffer = &tr->trace_buffer;
3397 	iter->snapshot = snapshot;
3398 	iter->pos = -1;
3399 	iter->cpu_file = tracing_get_cpu(inode);
3400 	mutex_init(&iter->mutex);
3401 
3402 	/* Notify the tracer early; before we stop tracing. */
3403 	if (iter->trace && iter->trace->open)
3404 		iter->trace->open(iter);
3405 
3406 	/* Annotate start of buffers if we had overruns */
3407 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3408 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3409 
3410 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3411 	if (trace_clocks[tr->clock_id].in_ns)
3412 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3413 
3414 	/* stop the trace while dumping if we are not opening "snapshot" */
3415 	if (!iter->snapshot)
3416 		tracing_stop_tr(tr);
3417 
3418 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3419 		for_each_tracing_cpu(cpu) {
3420 			iter->buffer_iter[cpu] =
3421 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3422 		}
3423 		ring_buffer_read_prepare_sync();
3424 		for_each_tracing_cpu(cpu) {
3425 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3426 			tracing_iter_reset(iter, cpu);
3427 		}
3428 	} else {
3429 		cpu = iter->cpu_file;
3430 		iter->buffer_iter[cpu] =
3431 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3432 		ring_buffer_read_prepare_sync();
3433 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3434 		tracing_iter_reset(iter, cpu);
3435 	}
3436 
3437 	mutex_unlock(&trace_types_lock);
3438 
3439 	return iter;
3440 
3441  fail:
3442 	mutex_unlock(&trace_types_lock);
3443 	kfree(iter->trace);
3444 	kfree(iter->buffer_iter);
3445 release:
3446 	seq_release_private(inode, file);
3447 	return ERR_PTR(-ENOMEM);
3448 }
3449 
3450 int tracing_open_generic(struct inode *inode, struct file *filp)
3451 {
3452 	if (tracing_disabled)
3453 		return -ENODEV;
3454 
3455 	filp->private_data = inode->i_private;
3456 	return 0;
3457 }
3458 
3459 bool tracing_is_disabled(void)
3460 {
3461 	return (tracing_disabled) ? true: false;
3462 }
3463 
3464 /*
3465  * Open and update trace_array ref count.
3466  * Must have the current trace_array passed to it.
3467  */
3468 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3469 {
3470 	struct trace_array *tr = inode->i_private;
3471 
3472 	if (tracing_disabled)
3473 		return -ENODEV;
3474 
3475 	if (trace_array_get(tr) < 0)
3476 		return -ENODEV;
3477 
3478 	filp->private_data = inode->i_private;
3479 
3480 	return 0;
3481 }
3482 
3483 static int tracing_release(struct inode *inode, struct file *file)
3484 {
3485 	struct trace_array *tr = inode->i_private;
3486 	struct seq_file *m = file->private_data;
3487 	struct trace_iterator *iter;
3488 	int cpu;
3489 
3490 	if (!(file->f_mode & FMODE_READ)) {
3491 		trace_array_put(tr);
3492 		return 0;
3493 	}
3494 
3495 	/* Writes do not use seq_file */
3496 	iter = m->private;
3497 	mutex_lock(&trace_types_lock);
3498 
3499 	for_each_tracing_cpu(cpu) {
3500 		if (iter->buffer_iter[cpu])
3501 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3502 	}
3503 
3504 	if (iter->trace && iter->trace->close)
3505 		iter->trace->close(iter);
3506 
3507 	if (!iter->snapshot)
3508 		/* reenable tracing if it was previously enabled */
3509 		tracing_start_tr(tr);
3510 
3511 	__trace_array_put(tr);
3512 
3513 	mutex_unlock(&trace_types_lock);
3514 
3515 	mutex_destroy(&iter->mutex);
3516 	free_cpumask_var(iter->started);
3517 	kfree(iter->trace);
3518 	kfree(iter->buffer_iter);
3519 	seq_release_private(inode, file);
3520 
3521 	return 0;
3522 }
3523 
3524 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3525 {
3526 	struct trace_array *tr = inode->i_private;
3527 
3528 	trace_array_put(tr);
3529 	return 0;
3530 }
3531 
3532 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3533 {
3534 	struct trace_array *tr = inode->i_private;
3535 
3536 	trace_array_put(tr);
3537 
3538 	return single_release(inode, file);
3539 }
3540 
3541 static int tracing_open(struct inode *inode, struct file *file)
3542 {
3543 	struct trace_array *tr = inode->i_private;
3544 	struct trace_iterator *iter;
3545 	int ret = 0;
3546 
3547 	if (trace_array_get(tr) < 0)
3548 		return -ENODEV;
3549 
3550 	/* If this file was open for write, then erase contents */
3551 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3552 		int cpu = tracing_get_cpu(inode);
3553 
3554 		if (cpu == RING_BUFFER_ALL_CPUS)
3555 			tracing_reset_online_cpus(&tr->trace_buffer);
3556 		else
3557 			tracing_reset(&tr->trace_buffer, cpu);
3558 	}
3559 
3560 	if (file->f_mode & FMODE_READ) {
3561 		iter = __tracing_open(inode, file, false);
3562 		if (IS_ERR(iter))
3563 			ret = PTR_ERR(iter);
3564 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3565 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3566 	}
3567 
3568 	if (ret < 0)
3569 		trace_array_put(tr);
3570 
3571 	return ret;
3572 }
3573 
3574 /*
3575  * Some tracers are not suitable for instance buffers.
3576  * A tracer is always available for the global array (toplevel)
3577  * or if it explicitly states that it is.
3578  */
3579 static bool
3580 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3581 {
3582 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3583 }
3584 
3585 /* Find the next tracer that this trace array may use */
3586 static struct tracer *
3587 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3588 {
3589 	while (t && !trace_ok_for_array(t, tr))
3590 		t = t->next;
3591 
3592 	return t;
3593 }
3594 
3595 static void *
3596 t_next(struct seq_file *m, void *v, loff_t *pos)
3597 {
3598 	struct trace_array *tr = m->private;
3599 	struct tracer *t = v;
3600 
3601 	(*pos)++;
3602 
3603 	if (t)
3604 		t = get_tracer_for_array(tr, t->next);
3605 
3606 	return t;
3607 }
3608 
3609 static void *t_start(struct seq_file *m, loff_t *pos)
3610 {
3611 	struct trace_array *tr = m->private;
3612 	struct tracer *t;
3613 	loff_t l = 0;
3614 
3615 	mutex_lock(&trace_types_lock);
3616 
3617 	t = get_tracer_for_array(tr, trace_types);
3618 	for (; t && l < *pos; t = t_next(m, t, &l))
3619 			;
3620 
3621 	return t;
3622 }
3623 
3624 static void t_stop(struct seq_file *m, void *p)
3625 {
3626 	mutex_unlock(&trace_types_lock);
3627 }
3628 
3629 static int t_show(struct seq_file *m, void *v)
3630 {
3631 	struct tracer *t = v;
3632 
3633 	if (!t)
3634 		return 0;
3635 
3636 	seq_puts(m, t->name);
3637 	if (t->next)
3638 		seq_putc(m, ' ');
3639 	else
3640 		seq_putc(m, '\n');
3641 
3642 	return 0;
3643 }
3644 
3645 static const struct seq_operations show_traces_seq_ops = {
3646 	.start		= t_start,
3647 	.next		= t_next,
3648 	.stop		= t_stop,
3649 	.show		= t_show,
3650 };
3651 
3652 static int show_traces_open(struct inode *inode, struct file *file)
3653 {
3654 	struct trace_array *tr = inode->i_private;
3655 	struct seq_file *m;
3656 	int ret;
3657 
3658 	if (tracing_disabled)
3659 		return -ENODEV;
3660 
3661 	ret = seq_open(file, &show_traces_seq_ops);
3662 	if (ret)
3663 		return ret;
3664 
3665 	m = file->private_data;
3666 	m->private = tr;
3667 
3668 	return 0;
3669 }
3670 
3671 static ssize_t
3672 tracing_write_stub(struct file *filp, const char __user *ubuf,
3673 		   size_t count, loff_t *ppos)
3674 {
3675 	return count;
3676 }
3677 
3678 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3679 {
3680 	int ret;
3681 
3682 	if (file->f_mode & FMODE_READ)
3683 		ret = seq_lseek(file, offset, whence);
3684 	else
3685 		file->f_pos = ret = 0;
3686 
3687 	return ret;
3688 }
3689 
3690 static const struct file_operations tracing_fops = {
3691 	.open		= tracing_open,
3692 	.read		= seq_read,
3693 	.write		= tracing_write_stub,
3694 	.llseek		= tracing_lseek,
3695 	.release	= tracing_release,
3696 };
3697 
3698 static const struct file_operations show_traces_fops = {
3699 	.open		= show_traces_open,
3700 	.read		= seq_read,
3701 	.release	= seq_release,
3702 	.llseek		= seq_lseek,
3703 };
3704 
3705 /*
3706  * The tracer itself will not take this lock, but still we want
3707  * to provide a consistent cpumask to user-space:
3708  */
3709 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3710 
3711 /*
3712  * Temporary storage for the character representation of the
3713  * CPU bitmask (and one more byte for the newline):
3714  */
3715 static char mask_str[NR_CPUS + 1];
3716 
3717 static ssize_t
3718 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3719 		     size_t count, loff_t *ppos)
3720 {
3721 	struct trace_array *tr = file_inode(filp)->i_private;
3722 	int len;
3723 
3724 	mutex_lock(&tracing_cpumask_update_lock);
3725 
3726 	len = snprintf(mask_str, count, "%*pb\n",
3727 		       cpumask_pr_args(tr->tracing_cpumask));
3728 	if (len >= count) {
3729 		count = -EINVAL;
3730 		goto out_err;
3731 	}
3732 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3733 
3734 out_err:
3735 	mutex_unlock(&tracing_cpumask_update_lock);
3736 
3737 	return count;
3738 }
3739 
3740 static ssize_t
3741 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3742 		      size_t count, loff_t *ppos)
3743 {
3744 	struct trace_array *tr = file_inode(filp)->i_private;
3745 	cpumask_var_t tracing_cpumask_new;
3746 	int err, cpu;
3747 
3748 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3749 		return -ENOMEM;
3750 
3751 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3752 	if (err)
3753 		goto err_unlock;
3754 
3755 	mutex_lock(&tracing_cpumask_update_lock);
3756 
3757 	local_irq_disable();
3758 	arch_spin_lock(&tr->max_lock);
3759 	for_each_tracing_cpu(cpu) {
3760 		/*
3761 		 * Increase/decrease the disabled counter if we are
3762 		 * about to flip a bit in the cpumask:
3763 		 */
3764 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3765 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3766 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3767 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3768 		}
3769 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3770 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3771 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3772 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3773 		}
3774 	}
3775 	arch_spin_unlock(&tr->max_lock);
3776 	local_irq_enable();
3777 
3778 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3779 
3780 	mutex_unlock(&tracing_cpumask_update_lock);
3781 	free_cpumask_var(tracing_cpumask_new);
3782 
3783 	return count;
3784 
3785 err_unlock:
3786 	free_cpumask_var(tracing_cpumask_new);
3787 
3788 	return err;
3789 }
3790 
3791 static const struct file_operations tracing_cpumask_fops = {
3792 	.open		= tracing_open_generic_tr,
3793 	.read		= tracing_cpumask_read,
3794 	.write		= tracing_cpumask_write,
3795 	.release	= tracing_release_generic_tr,
3796 	.llseek		= generic_file_llseek,
3797 };
3798 
3799 static int tracing_trace_options_show(struct seq_file *m, void *v)
3800 {
3801 	struct tracer_opt *trace_opts;
3802 	struct trace_array *tr = m->private;
3803 	u32 tracer_flags;
3804 	int i;
3805 
3806 	mutex_lock(&trace_types_lock);
3807 	tracer_flags = tr->current_trace->flags->val;
3808 	trace_opts = tr->current_trace->flags->opts;
3809 
3810 	for (i = 0; trace_options[i]; i++) {
3811 		if (tr->trace_flags & (1 << i))
3812 			seq_printf(m, "%s\n", trace_options[i]);
3813 		else
3814 			seq_printf(m, "no%s\n", trace_options[i]);
3815 	}
3816 
3817 	for (i = 0; trace_opts[i].name; i++) {
3818 		if (tracer_flags & trace_opts[i].bit)
3819 			seq_printf(m, "%s\n", trace_opts[i].name);
3820 		else
3821 			seq_printf(m, "no%s\n", trace_opts[i].name);
3822 	}
3823 	mutex_unlock(&trace_types_lock);
3824 
3825 	return 0;
3826 }
3827 
3828 static int __set_tracer_option(struct trace_array *tr,
3829 			       struct tracer_flags *tracer_flags,
3830 			       struct tracer_opt *opts, int neg)
3831 {
3832 	struct tracer *trace = tracer_flags->trace;
3833 	int ret;
3834 
3835 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3836 	if (ret)
3837 		return ret;
3838 
3839 	if (neg)
3840 		tracer_flags->val &= ~opts->bit;
3841 	else
3842 		tracer_flags->val |= opts->bit;
3843 	return 0;
3844 }
3845 
3846 /* Try to assign a tracer specific option */
3847 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3848 {
3849 	struct tracer *trace = tr->current_trace;
3850 	struct tracer_flags *tracer_flags = trace->flags;
3851 	struct tracer_opt *opts = NULL;
3852 	int i;
3853 
3854 	for (i = 0; tracer_flags->opts[i].name; i++) {
3855 		opts = &tracer_flags->opts[i];
3856 
3857 		if (strcmp(cmp, opts->name) == 0)
3858 			return __set_tracer_option(tr, trace->flags, opts, neg);
3859 	}
3860 
3861 	return -EINVAL;
3862 }
3863 
3864 /* Some tracers require overwrite to stay enabled */
3865 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3866 {
3867 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3868 		return -1;
3869 
3870 	return 0;
3871 }
3872 
3873 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3874 {
3875 	/* do nothing if flag is already set */
3876 	if (!!(tr->trace_flags & mask) == !!enabled)
3877 		return 0;
3878 
3879 	/* Give the tracer a chance to approve the change */
3880 	if (tr->current_trace->flag_changed)
3881 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3882 			return -EINVAL;
3883 
3884 	if (enabled)
3885 		tr->trace_flags |= mask;
3886 	else
3887 		tr->trace_flags &= ~mask;
3888 
3889 	if (mask == TRACE_ITER_RECORD_CMD)
3890 		trace_event_enable_cmd_record(enabled);
3891 
3892 	if (mask == TRACE_ITER_EVENT_FORK)
3893 		trace_event_follow_fork(tr, enabled);
3894 
3895 	if (mask == TRACE_ITER_OVERWRITE) {
3896 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3897 #ifdef CONFIG_TRACER_MAX_TRACE
3898 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3899 #endif
3900 	}
3901 
3902 	if (mask == TRACE_ITER_PRINTK) {
3903 		trace_printk_start_stop_comm(enabled);
3904 		trace_printk_control(enabled);
3905 	}
3906 
3907 	return 0;
3908 }
3909 
3910 static int trace_set_options(struct trace_array *tr, char *option)
3911 {
3912 	char *cmp;
3913 	int neg = 0;
3914 	int ret = -ENODEV;
3915 	int i;
3916 	size_t orig_len = strlen(option);
3917 
3918 	cmp = strstrip(option);
3919 
3920 	if (strncmp(cmp, "no", 2) == 0) {
3921 		neg = 1;
3922 		cmp += 2;
3923 	}
3924 
3925 	mutex_lock(&trace_types_lock);
3926 
3927 	for (i = 0; trace_options[i]; i++) {
3928 		if (strcmp(cmp, trace_options[i]) == 0) {
3929 			ret = set_tracer_flag(tr, 1 << i, !neg);
3930 			break;
3931 		}
3932 	}
3933 
3934 	/* If no option could be set, test the specific tracer options */
3935 	if (!trace_options[i])
3936 		ret = set_tracer_option(tr, cmp, neg);
3937 
3938 	mutex_unlock(&trace_types_lock);
3939 
3940 	/*
3941 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
3942 	 * turn it back into a space.
3943 	 */
3944 	if (orig_len > strlen(option))
3945 		option[strlen(option)] = ' ';
3946 
3947 	return ret;
3948 }
3949 
3950 static void __init apply_trace_boot_options(void)
3951 {
3952 	char *buf = trace_boot_options_buf;
3953 	char *option;
3954 
3955 	while (true) {
3956 		option = strsep(&buf, ",");
3957 
3958 		if (!option)
3959 			break;
3960 
3961 		if (*option)
3962 			trace_set_options(&global_trace, option);
3963 
3964 		/* Put back the comma to allow this to be called again */
3965 		if (buf)
3966 			*(buf - 1) = ',';
3967 	}
3968 }
3969 
3970 static ssize_t
3971 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3972 			size_t cnt, loff_t *ppos)
3973 {
3974 	struct seq_file *m = filp->private_data;
3975 	struct trace_array *tr = m->private;
3976 	char buf[64];
3977 	int ret;
3978 
3979 	if (cnt >= sizeof(buf))
3980 		return -EINVAL;
3981 
3982 	if (copy_from_user(buf, ubuf, cnt))
3983 		return -EFAULT;
3984 
3985 	buf[cnt] = 0;
3986 
3987 	ret = trace_set_options(tr, buf);
3988 	if (ret < 0)
3989 		return ret;
3990 
3991 	*ppos += cnt;
3992 
3993 	return cnt;
3994 }
3995 
3996 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3997 {
3998 	struct trace_array *tr = inode->i_private;
3999 	int ret;
4000 
4001 	if (tracing_disabled)
4002 		return -ENODEV;
4003 
4004 	if (trace_array_get(tr) < 0)
4005 		return -ENODEV;
4006 
4007 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4008 	if (ret < 0)
4009 		trace_array_put(tr);
4010 
4011 	return ret;
4012 }
4013 
4014 static const struct file_operations tracing_iter_fops = {
4015 	.open		= tracing_trace_options_open,
4016 	.read		= seq_read,
4017 	.llseek		= seq_lseek,
4018 	.release	= tracing_single_release_tr,
4019 	.write		= tracing_trace_options_write,
4020 };
4021 
4022 static const char readme_msg[] =
4023 	"tracing mini-HOWTO:\n\n"
4024 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4025 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4026 	" Important files:\n"
4027 	"  trace\t\t\t- The static contents of the buffer\n"
4028 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4029 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4030 	"  current_tracer\t- function and latency tracers\n"
4031 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4032 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4033 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4034 	"  trace_clock\t\t-change the clock used to order events\n"
4035 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4036 	"      global:   Synced across CPUs but slows tracing down.\n"
4037 	"     counter:   Not a clock, but just an increment\n"
4038 	"      uptime:   Jiffy counter from time of boot\n"
4039 	"        perf:   Same clock that perf events use\n"
4040 #ifdef CONFIG_X86_64
4041 	"     x86-tsc:   TSC cycle counter\n"
4042 #endif
4043 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4044 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4045 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4046 	"\t\t\t  Remove sub-buffer with rmdir\n"
4047 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4048 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4049 	"\t\t\t  option name\n"
4050 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4051 #ifdef CONFIG_DYNAMIC_FTRACE
4052 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4053 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4054 	"\t\t\t  functions\n"
4055 	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4056 	"\t     modules: Can select a group via module\n"
4057 	"\t      Format: :mod:<module-name>\n"
4058 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4059 	"\t    triggers: a command to perform when function is hit\n"
4060 	"\t      Format: <function>:<trigger>[:count]\n"
4061 	"\t     trigger: traceon, traceoff\n"
4062 	"\t\t      enable_event:<system>:<event>\n"
4063 	"\t\t      disable_event:<system>:<event>\n"
4064 #ifdef CONFIG_STACKTRACE
4065 	"\t\t      stacktrace\n"
4066 #endif
4067 #ifdef CONFIG_TRACER_SNAPSHOT
4068 	"\t\t      snapshot\n"
4069 #endif
4070 	"\t\t      dump\n"
4071 	"\t\t      cpudump\n"
4072 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4073 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4074 	"\t     The first one will disable tracing every time do_fault is hit\n"
4075 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4076 	"\t       The first time do trap is hit and it disables tracing, the\n"
4077 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4078 	"\t       the counter will not decrement. It only decrements when the\n"
4079 	"\t       trigger did work\n"
4080 	"\t     To remove trigger without count:\n"
4081 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4082 	"\t     To remove trigger with a count:\n"
4083 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4084 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4085 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4086 	"\t    modules: Can select a group via module command :mod:\n"
4087 	"\t    Does not accept triggers\n"
4088 #endif /* CONFIG_DYNAMIC_FTRACE */
4089 #ifdef CONFIG_FUNCTION_TRACER
4090 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4091 	"\t\t    (function)\n"
4092 #endif
4093 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4094 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4095 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4096 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4097 #endif
4098 #ifdef CONFIG_TRACER_SNAPSHOT
4099 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4100 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4101 	"\t\t\t  information\n"
4102 #endif
4103 #ifdef CONFIG_STACK_TRACER
4104 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4105 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4106 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4107 	"\t\t\t  new trace)\n"
4108 #ifdef CONFIG_DYNAMIC_FTRACE
4109 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4110 	"\t\t\t  traces\n"
4111 #endif
4112 #endif /* CONFIG_STACK_TRACER */
4113 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4114 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4115 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4116 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4117 	"\t\t\t  events\n"
4118 	"      filter\t\t- If set, only events passing filter are traced\n"
4119 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4120 	"\t\t\t  <event>:\n"
4121 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4122 	"      filter\t\t- If set, only events passing filter are traced\n"
4123 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4124 	"\t    Format: <trigger>[:count][if <filter>]\n"
4125 	"\t   trigger: traceon, traceoff\n"
4126 	"\t            enable_event:<system>:<event>\n"
4127 	"\t            disable_event:<system>:<event>\n"
4128 #ifdef CONFIG_HIST_TRIGGERS
4129 	"\t            enable_hist:<system>:<event>\n"
4130 	"\t            disable_hist:<system>:<event>\n"
4131 #endif
4132 #ifdef CONFIG_STACKTRACE
4133 	"\t\t    stacktrace\n"
4134 #endif
4135 #ifdef CONFIG_TRACER_SNAPSHOT
4136 	"\t\t    snapshot\n"
4137 #endif
4138 #ifdef CONFIG_HIST_TRIGGERS
4139 	"\t\t    hist (see below)\n"
4140 #endif
4141 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4142 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4143 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4144 	"\t                  events/block/block_unplug/trigger\n"
4145 	"\t   The first disables tracing every time block_unplug is hit.\n"
4146 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4147 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4148 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4149 	"\t   Like function triggers, the counter is only decremented if it\n"
4150 	"\t    enabled or disabled tracing.\n"
4151 	"\t   To remove a trigger without a count:\n"
4152 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4153 	"\t   To remove a trigger with a count:\n"
4154 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4155 	"\t   Filters can be ignored when removing a trigger.\n"
4156 #ifdef CONFIG_HIST_TRIGGERS
4157 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4158 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4159 	"\t            [:values=<field1[,field2,...]>]\n"
4160 	"\t            [:sort=<field1[,field2,...]>]\n"
4161 	"\t            [:size=#entries]\n"
4162 	"\t            [:pause][:continue][:clear]\n"
4163 	"\t            [:name=histname1]\n"
4164 	"\t            [if <filter>]\n\n"
4165 	"\t    When a matching event is hit, an entry is added to a hash\n"
4166 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4167 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4168 	"\t    correspond to fields in the event's format description.  Keys\n"
4169 	"\t    can be any field, or the special string 'stacktrace'.\n"
4170 	"\t    Compound keys consisting of up to two fields can be specified\n"
4171 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4172 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4173 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4174 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4175 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4176 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4177 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4178 	"\t    its histogram data will be shared with other triggers of the\n"
4179 	"\t    same name, and trigger hits will update this common data.\n\n"
4180 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4181 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4182 	"\t    triggers attached to an event, there will be a table for each\n"
4183 	"\t    trigger in the output.  The table displayed for a named\n"
4184 	"\t    trigger will be the same as any other instance having the\n"
4185 	"\t    same name.  The default format used to display a given field\n"
4186 	"\t    can be modified by appending any of the following modifiers\n"
4187 	"\t    to the field name, as applicable:\n\n"
4188 	"\t            .hex        display a number as a hex value\n"
4189 	"\t            .sym        display an address as a symbol\n"
4190 	"\t            .sym-offset display an address as a symbol and offset\n"
4191 	"\t            .execname   display a common_pid as a program name\n"
4192 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4193 	"\t            .log2       display log2 value rather than raw number\n\n"
4194 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4195 	"\t    trigger or to start a hist trigger but not log any events\n"
4196 	"\t    until told to do so.  'continue' can be used to start or\n"
4197 	"\t    restart a paused hist trigger.\n\n"
4198 	"\t    The 'clear' parameter will clear the contents of a running\n"
4199 	"\t    hist trigger and leave its current paused/active state\n"
4200 	"\t    unchanged.\n\n"
4201 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4202 	"\t    have one event conditionally start and stop another event's\n"
4203 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4204 	"\t    the enable_event and disable_event triggers.\n"
4205 #endif
4206 ;
4207 
4208 static ssize_t
4209 tracing_readme_read(struct file *filp, char __user *ubuf,
4210 		       size_t cnt, loff_t *ppos)
4211 {
4212 	return simple_read_from_buffer(ubuf, cnt, ppos,
4213 					readme_msg, strlen(readme_msg));
4214 }
4215 
4216 static const struct file_operations tracing_readme_fops = {
4217 	.open		= tracing_open_generic,
4218 	.read		= tracing_readme_read,
4219 	.llseek		= generic_file_llseek,
4220 };
4221 
4222 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4223 {
4224 	unsigned int *ptr = v;
4225 
4226 	if (*pos || m->count)
4227 		ptr++;
4228 
4229 	(*pos)++;
4230 
4231 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4232 	     ptr++) {
4233 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4234 			continue;
4235 
4236 		return ptr;
4237 	}
4238 
4239 	return NULL;
4240 }
4241 
4242 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4243 {
4244 	void *v;
4245 	loff_t l = 0;
4246 
4247 	preempt_disable();
4248 	arch_spin_lock(&trace_cmdline_lock);
4249 
4250 	v = &savedcmd->map_cmdline_to_pid[0];
4251 	while (l <= *pos) {
4252 		v = saved_cmdlines_next(m, v, &l);
4253 		if (!v)
4254 			return NULL;
4255 	}
4256 
4257 	return v;
4258 }
4259 
4260 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4261 {
4262 	arch_spin_unlock(&trace_cmdline_lock);
4263 	preempt_enable();
4264 }
4265 
4266 static int saved_cmdlines_show(struct seq_file *m, void *v)
4267 {
4268 	char buf[TASK_COMM_LEN];
4269 	unsigned int *pid = v;
4270 
4271 	__trace_find_cmdline(*pid, buf);
4272 	seq_printf(m, "%d %s\n", *pid, buf);
4273 	return 0;
4274 }
4275 
4276 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4277 	.start		= saved_cmdlines_start,
4278 	.next		= saved_cmdlines_next,
4279 	.stop		= saved_cmdlines_stop,
4280 	.show		= saved_cmdlines_show,
4281 };
4282 
4283 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4284 {
4285 	if (tracing_disabled)
4286 		return -ENODEV;
4287 
4288 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4289 }
4290 
4291 static const struct file_operations tracing_saved_cmdlines_fops = {
4292 	.open		= tracing_saved_cmdlines_open,
4293 	.read		= seq_read,
4294 	.llseek		= seq_lseek,
4295 	.release	= seq_release,
4296 };
4297 
4298 static ssize_t
4299 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4300 				 size_t cnt, loff_t *ppos)
4301 {
4302 	char buf[64];
4303 	int r;
4304 
4305 	arch_spin_lock(&trace_cmdline_lock);
4306 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4307 	arch_spin_unlock(&trace_cmdline_lock);
4308 
4309 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4310 }
4311 
4312 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4313 {
4314 	kfree(s->saved_cmdlines);
4315 	kfree(s->map_cmdline_to_pid);
4316 	kfree(s);
4317 }
4318 
4319 static int tracing_resize_saved_cmdlines(unsigned int val)
4320 {
4321 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4322 
4323 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4324 	if (!s)
4325 		return -ENOMEM;
4326 
4327 	if (allocate_cmdlines_buffer(val, s) < 0) {
4328 		kfree(s);
4329 		return -ENOMEM;
4330 	}
4331 
4332 	arch_spin_lock(&trace_cmdline_lock);
4333 	savedcmd_temp = savedcmd;
4334 	savedcmd = s;
4335 	arch_spin_unlock(&trace_cmdline_lock);
4336 	free_saved_cmdlines_buffer(savedcmd_temp);
4337 
4338 	return 0;
4339 }
4340 
4341 static ssize_t
4342 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4343 				  size_t cnt, loff_t *ppos)
4344 {
4345 	unsigned long val;
4346 	int ret;
4347 
4348 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4349 	if (ret)
4350 		return ret;
4351 
4352 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4353 	if (!val || val > PID_MAX_DEFAULT)
4354 		return -EINVAL;
4355 
4356 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4357 	if (ret < 0)
4358 		return ret;
4359 
4360 	*ppos += cnt;
4361 
4362 	return cnt;
4363 }
4364 
4365 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4366 	.open		= tracing_open_generic,
4367 	.read		= tracing_saved_cmdlines_size_read,
4368 	.write		= tracing_saved_cmdlines_size_write,
4369 };
4370 
4371 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4372 static union trace_enum_map_item *
4373 update_enum_map(union trace_enum_map_item *ptr)
4374 {
4375 	if (!ptr->map.enum_string) {
4376 		if (ptr->tail.next) {
4377 			ptr = ptr->tail.next;
4378 			/* Set ptr to the next real item (skip head) */
4379 			ptr++;
4380 		} else
4381 			return NULL;
4382 	}
4383 	return ptr;
4384 }
4385 
4386 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4387 {
4388 	union trace_enum_map_item *ptr = v;
4389 
4390 	/*
4391 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4392 	 * This really should never happen.
4393 	 */
4394 	ptr = update_enum_map(ptr);
4395 	if (WARN_ON_ONCE(!ptr))
4396 		return NULL;
4397 
4398 	ptr++;
4399 
4400 	(*pos)++;
4401 
4402 	ptr = update_enum_map(ptr);
4403 
4404 	return ptr;
4405 }
4406 
4407 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4408 {
4409 	union trace_enum_map_item *v;
4410 	loff_t l = 0;
4411 
4412 	mutex_lock(&trace_enum_mutex);
4413 
4414 	v = trace_enum_maps;
4415 	if (v)
4416 		v++;
4417 
4418 	while (v && l < *pos) {
4419 		v = enum_map_next(m, v, &l);
4420 	}
4421 
4422 	return v;
4423 }
4424 
4425 static void enum_map_stop(struct seq_file *m, void *v)
4426 {
4427 	mutex_unlock(&trace_enum_mutex);
4428 }
4429 
4430 static int enum_map_show(struct seq_file *m, void *v)
4431 {
4432 	union trace_enum_map_item *ptr = v;
4433 
4434 	seq_printf(m, "%s %ld (%s)\n",
4435 		   ptr->map.enum_string, ptr->map.enum_value,
4436 		   ptr->map.system);
4437 
4438 	return 0;
4439 }
4440 
4441 static const struct seq_operations tracing_enum_map_seq_ops = {
4442 	.start		= enum_map_start,
4443 	.next		= enum_map_next,
4444 	.stop		= enum_map_stop,
4445 	.show		= enum_map_show,
4446 };
4447 
4448 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4449 {
4450 	if (tracing_disabled)
4451 		return -ENODEV;
4452 
4453 	return seq_open(filp, &tracing_enum_map_seq_ops);
4454 }
4455 
4456 static const struct file_operations tracing_enum_map_fops = {
4457 	.open		= tracing_enum_map_open,
4458 	.read		= seq_read,
4459 	.llseek		= seq_lseek,
4460 	.release	= seq_release,
4461 };
4462 
4463 static inline union trace_enum_map_item *
4464 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4465 {
4466 	/* Return tail of array given the head */
4467 	return ptr + ptr->head.length + 1;
4468 }
4469 
4470 static void
4471 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4472 			   int len)
4473 {
4474 	struct trace_enum_map **stop;
4475 	struct trace_enum_map **map;
4476 	union trace_enum_map_item *map_array;
4477 	union trace_enum_map_item *ptr;
4478 
4479 	stop = start + len;
4480 
4481 	/*
4482 	 * The trace_enum_maps contains the map plus a head and tail item,
4483 	 * where the head holds the module and length of array, and the
4484 	 * tail holds a pointer to the next list.
4485 	 */
4486 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4487 	if (!map_array) {
4488 		pr_warn("Unable to allocate trace enum mapping\n");
4489 		return;
4490 	}
4491 
4492 	mutex_lock(&trace_enum_mutex);
4493 
4494 	if (!trace_enum_maps)
4495 		trace_enum_maps = map_array;
4496 	else {
4497 		ptr = trace_enum_maps;
4498 		for (;;) {
4499 			ptr = trace_enum_jmp_to_tail(ptr);
4500 			if (!ptr->tail.next)
4501 				break;
4502 			ptr = ptr->tail.next;
4503 
4504 		}
4505 		ptr->tail.next = map_array;
4506 	}
4507 	map_array->head.mod = mod;
4508 	map_array->head.length = len;
4509 	map_array++;
4510 
4511 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4512 		map_array->map = **map;
4513 		map_array++;
4514 	}
4515 	memset(map_array, 0, sizeof(*map_array));
4516 
4517 	mutex_unlock(&trace_enum_mutex);
4518 }
4519 
4520 static void trace_create_enum_file(struct dentry *d_tracer)
4521 {
4522 	trace_create_file("enum_map", 0444, d_tracer,
4523 			  NULL, &tracing_enum_map_fops);
4524 }
4525 
4526 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4527 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4528 static inline void trace_insert_enum_map_file(struct module *mod,
4529 			      struct trace_enum_map **start, int len) { }
4530 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4531 
4532 static void trace_insert_enum_map(struct module *mod,
4533 				  struct trace_enum_map **start, int len)
4534 {
4535 	struct trace_enum_map **map;
4536 
4537 	if (len <= 0)
4538 		return;
4539 
4540 	map = start;
4541 
4542 	trace_event_enum_update(map, len);
4543 
4544 	trace_insert_enum_map_file(mod, start, len);
4545 }
4546 
4547 static ssize_t
4548 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4549 		       size_t cnt, loff_t *ppos)
4550 {
4551 	struct trace_array *tr = filp->private_data;
4552 	char buf[MAX_TRACER_SIZE+2];
4553 	int r;
4554 
4555 	mutex_lock(&trace_types_lock);
4556 	r = sprintf(buf, "%s\n", tr->current_trace->name);
4557 	mutex_unlock(&trace_types_lock);
4558 
4559 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4560 }
4561 
4562 int tracer_init(struct tracer *t, struct trace_array *tr)
4563 {
4564 	tracing_reset_online_cpus(&tr->trace_buffer);
4565 	return t->init(tr);
4566 }
4567 
4568 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4569 {
4570 	int cpu;
4571 
4572 	for_each_tracing_cpu(cpu)
4573 		per_cpu_ptr(buf->data, cpu)->entries = val;
4574 }
4575 
4576 #ifdef CONFIG_TRACER_MAX_TRACE
4577 /* resize @tr's buffer to the size of @size_tr's entries */
4578 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4579 					struct trace_buffer *size_buf, int cpu_id)
4580 {
4581 	int cpu, ret = 0;
4582 
4583 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4584 		for_each_tracing_cpu(cpu) {
4585 			ret = ring_buffer_resize(trace_buf->buffer,
4586 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4587 			if (ret < 0)
4588 				break;
4589 			per_cpu_ptr(trace_buf->data, cpu)->entries =
4590 				per_cpu_ptr(size_buf->data, cpu)->entries;
4591 		}
4592 	} else {
4593 		ret = ring_buffer_resize(trace_buf->buffer,
4594 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4595 		if (ret == 0)
4596 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4597 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4598 	}
4599 
4600 	return ret;
4601 }
4602 #endif /* CONFIG_TRACER_MAX_TRACE */
4603 
4604 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4605 					unsigned long size, int cpu)
4606 {
4607 	int ret;
4608 
4609 	/*
4610 	 * If kernel or user changes the size of the ring buffer
4611 	 * we use the size that was given, and we can forget about
4612 	 * expanding it later.
4613 	 */
4614 	ring_buffer_expanded = true;
4615 
4616 	/* May be called before buffers are initialized */
4617 	if (!tr->trace_buffer.buffer)
4618 		return 0;
4619 
4620 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4621 	if (ret < 0)
4622 		return ret;
4623 
4624 #ifdef CONFIG_TRACER_MAX_TRACE
4625 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4626 	    !tr->current_trace->use_max_tr)
4627 		goto out;
4628 
4629 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4630 	if (ret < 0) {
4631 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4632 						     &tr->trace_buffer, cpu);
4633 		if (r < 0) {
4634 			/*
4635 			 * AARGH! We are left with different
4636 			 * size max buffer!!!!
4637 			 * The max buffer is our "snapshot" buffer.
4638 			 * When a tracer needs a snapshot (one of the
4639 			 * latency tracers), it swaps the max buffer
4640 			 * with the saved snap shot. We succeeded to
4641 			 * update the size of the main buffer, but failed to
4642 			 * update the size of the max buffer. But when we tried
4643 			 * to reset the main buffer to the original size, we
4644 			 * failed there too. This is very unlikely to
4645 			 * happen, but if it does, warn and kill all
4646 			 * tracing.
4647 			 */
4648 			WARN_ON(1);
4649 			tracing_disabled = 1;
4650 		}
4651 		return ret;
4652 	}
4653 
4654 	if (cpu == RING_BUFFER_ALL_CPUS)
4655 		set_buffer_entries(&tr->max_buffer, size);
4656 	else
4657 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4658 
4659  out:
4660 #endif /* CONFIG_TRACER_MAX_TRACE */
4661 
4662 	if (cpu == RING_BUFFER_ALL_CPUS)
4663 		set_buffer_entries(&tr->trace_buffer, size);
4664 	else
4665 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4666 
4667 	return ret;
4668 }
4669 
4670 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4671 					  unsigned long size, int cpu_id)
4672 {
4673 	int ret = size;
4674 
4675 	mutex_lock(&trace_types_lock);
4676 
4677 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4678 		/* make sure, this cpu is enabled in the mask */
4679 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4680 			ret = -EINVAL;
4681 			goto out;
4682 		}
4683 	}
4684 
4685 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4686 	if (ret < 0)
4687 		ret = -ENOMEM;
4688 
4689 out:
4690 	mutex_unlock(&trace_types_lock);
4691 
4692 	return ret;
4693 }
4694 
4695 
4696 /**
4697  * tracing_update_buffers - used by tracing facility to expand ring buffers
4698  *
4699  * To save on memory when the tracing is never used on a system with it
4700  * configured in. The ring buffers are set to a minimum size. But once
4701  * a user starts to use the tracing facility, then they need to grow
4702  * to their default size.
4703  *
4704  * This function is to be called when a tracer is about to be used.
4705  */
4706 int tracing_update_buffers(void)
4707 {
4708 	int ret = 0;
4709 
4710 	mutex_lock(&trace_types_lock);
4711 	if (!ring_buffer_expanded)
4712 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4713 						RING_BUFFER_ALL_CPUS);
4714 	mutex_unlock(&trace_types_lock);
4715 
4716 	return ret;
4717 }
4718 
4719 struct trace_option_dentry;
4720 
4721 static void
4722 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4723 
4724 /*
4725  * Used to clear out the tracer before deletion of an instance.
4726  * Must have trace_types_lock held.
4727  */
4728 static void tracing_set_nop(struct trace_array *tr)
4729 {
4730 	if (tr->current_trace == &nop_trace)
4731 		return;
4732 
4733 	tr->current_trace->enabled--;
4734 
4735 	if (tr->current_trace->reset)
4736 		tr->current_trace->reset(tr);
4737 
4738 	tr->current_trace = &nop_trace;
4739 }
4740 
4741 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4742 {
4743 	/* Only enable if the directory has been created already. */
4744 	if (!tr->dir)
4745 		return;
4746 
4747 	create_trace_option_files(tr, t);
4748 }
4749 
4750 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4751 {
4752 	struct tracer *t;
4753 #ifdef CONFIG_TRACER_MAX_TRACE
4754 	bool had_max_tr;
4755 #endif
4756 	int ret = 0;
4757 
4758 	mutex_lock(&trace_types_lock);
4759 
4760 	if (!ring_buffer_expanded) {
4761 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4762 						RING_BUFFER_ALL_CPUS);
4763 		if (ret < 0)
4764 			goto out;
4765 		ret = 0;
4766 	}
4767 
4768 	for (t = trace_types; t; t = t->next) {
4769 		if (strcmp(t->name, buf) == 0)
4770 			break;
4771 	}
4772 	if (!t) {
4773 		ret = -EINVAL;
4774 		goto out;
4775 	}
4776 	if (t == tr->current_trace)
4777 		goto out;
4778 
4779 	/* Some tracers are only allowed for the top level buffer */
4780 	if (!trace_ok_for_array(t, tr)) {
4781 		ret = -EINVAL;
4782 		goto out;
4783 	}
4784 
4785 	/* If trace pipe files are being read, we can't change the tracer */
4786 	if (tr->current_trace->ref) {
4787 		ret = -EBUSY;
4788 		goto out;
4789 	}
4790 
4791 	trace_branch_disable();
4792 
4793 	tr->current_trace->enabled--;
4794 
4795 	if (tr->current_trace->reset)
4796 		tr->current_trace->reset(tr);
4797 
4798 	/* Current trace needs to be nop_trace before synchronize_sched */
4799 	tr->current_trace = &nop_trace;
4800 
4801 #ifdef CONFIG_TRACER_MAX_TRACE
4802 	had_max_tr = tr->allocated_snapshot;
4803 
4804 	if (had_max_tr && !t->use_max_tr) {
4805 		/*
4806 		 * We need to make sure that the update_max_tr sees that
4807 		 * current_trace changed to nop_trace to keep it from
4808 		 * swapping the buffers after we resize it.
4809 		 * The update_max_tr is called from interrupts disabled
4810 		 * so a synchronized_sched() is sufficient.
4811 		 */
4812 		synchronize_sched();
4813 		free_snapshot(tr);
4814 	}
4815 #endif
4816 
4817 #ifdef CONFIG_TRACER_MAX_TRACE
4818 	if (t->use_max_tr && !had_max_tr) {
4819 		ret = alloc_snapshot(tr);
4820 		if (ret < 0)
4821 			goto out;
4822 	}
4823 #endif
4824 
4825 	if (t->init) {
4826 		ret = tracer_init(t, tr);
4827 		if (ret)
4828 			goto out;
4829 	}
4830 
4831 	tr->current_trace = t;
4832 	tr->current_trace->enabled++;
4833 	trace_branch_enable(tr);
4834  out:
4835 	mutex_unlock(&trace_types_lock);
4836 
4837 	return ret;
4838 }
4839 
4840 static ssize_t
4841 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4842 			size_t cnt, loff_t *ppos)
4843 {
4844 	struct trace_array *tr = filp->private_data;
4845 	char buf[MAX_TRACER_SIZE+1];
4846 	int i;
4847 	size_t ret;
4848 	int err;
4849 
4850 	ret = cnt;
4851 
4852 	if (cnt > MAX_TRACER_SIZE)
4853 		cnt = MAX_TRACER_SIZE;
4854 
4855 	if (copy_from_user(buf, ubuf, cnt))
4856 		return -EFAULT;
4857 
4858 	buf[cnt] = 0;
4859 
4860 	/* strip ending whitespace. */
4861 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4862 		buf[i] = 0;
4863 
4864 	err = tracing_set_tracer(tr, buf);
4865 	if (err)
4866 		return err;
4867 
4868 	*ppos += ret;
4869 
4870 	return ret;
4871 }
4872 
4873 static ssize_t
4874 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4875 		   size_t cnt, loff_t *ppos)
4876 {
4877 	char buf[64];
4878 	int r;
4879 
4880 	r = snprintf(buf, sizeof(buf), "%ld\n",
4881 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4882 	if (r > sizeof(buf))
4883 		r = sizeof(buf);
4884 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4885 }
4886 
4887 static ssize_t
4888 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4889 		    size_t cnt, loff_t *ppos)
4890 {
4891 	unsigned long val;
4892 	int ret;
4893 
4894 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4895 	if (ret)
4896 		return ret;
4897 
4898 	*ptr = val * 1000;
4899 
4900 	return cnt;
4901 }
4902 
4903 static ssize_t
4904 tracing_thresh_read(struct file *filp, char __user *ubuf,
4905 		    size_t cnt, loff_t *ppos)
4906 {
4907 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4908 }
4909 
4910 static ssize_t
4911 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4912 		     size_t cnt, loff_t *ppos)
4913 {
4914 	struct trace_array *tr = filp->private_data;
4915 	int ret;
4916 
4917 	mutex_lock(&trace_types_lock);
4918 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4919 	if (ret < 0)
4920 		goto out;
4921 
4922 	if (tr->current_trace->update_thresh) {
4923 		ret = tr->current_trace->update_thresh(tr);
4924 		if (ret < 0)
4925 			goto out;
4926 	}
4927 
4928 	ret = cnt;
4929 out:
4930 	mutex_unlock(&trace_types_lock);
4931 
4932 	return ret;
4933 }
4934 
4935 #ifdef CONFIG_TRACER_MAX_TRACE
4936 
4937 static ssize_t
4938 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4939 		     size_t cnt, loff_t *ppos)
4940 {
4941 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4942 }
4943 
4944 static ssize_t
4945 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4946 		      size_t cnt, loff_t *ppos)
4947 {
4948 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4949 }
4950 
4951 #endif
4952 
4953 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4954 {
4955 	struct trace_array *tr = inode->i_private;
4956 	struct trace_iterator *iter;
4957 	int ret = 0;
4958 
4959 	if (tracing_disabled)
4960 		return -ENODEV;
4961 
4962 	if (trace_array_get(tr) < 0)
4963 		return -ENODEV;
4964 
4965 	mutex_lock(&trace_types_lock);
4966 
4967 	/* create a buffer to store the information to pass to userspace */
4968 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4969 	if (!iter) {
4970 		ret = -ENOMEM;
4971 		__trace_array_put(tr);
4972 		goto out;
4973 	}
4974 
4975 	trace_seq_init(&iter->seq);
4976 	iter->trace = tr->current_trace;
4977 
4978 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4979 		ret = -ENOMEM;
4980 		goto fail;
4981 	}
4982 
4983 	/* trace pipe does not show start of buffer */
4984 	cpumask_setall(iter->started);
4985 
4986 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4987 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
4988 
4989 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4990 	if (trace_clocks[tr->clock_id].in_ns)
4991 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4992 
4993 	iter->tr = tr;
4994 	iter->trace_buffer = &tr->trace_buffer;
4995 	iter->cpu_file = tracing_get_cpu(inode);
4996 	mutex_init(&iter->mutex);
4997 	filp->private_data = iter;
4998 
4999 	if (iter->trace->pipe_open)
5000 		iter->trace->pipe_open(iter);
5001 
5002 	nonseekable_open(inode, filp);
5003 
5004 	tr->current_trace->ref++;
5005 out:
5006 	mutex_unlock(&trace_types_lock);
5007 	return ret;
5008 
5009 fail:
5010 	kfree(iter->trace);
5011 	kfree(iter);
5012 	__trace_array_put(tr);
5013 	mutex_unlock(&trace_types_lock);
5014 	return ret;
5015 }
5016 
5017 static int tracing_release_pipe(struct inode *inode, struct file *file)
5018 {
5019 	struct trace_iterator *iter = file->private_data;
5020 	struct trace_array *tr = inode->i_private;
5021 
5022 	mutex_lock(&trace_types_lock);
5023 
5024 	tr->current_trace->ref--;
5025 
5026 	if (iter->trace->pipe_close)
5027 		iter->trace->pipe_close(iter);
5028 
5029 	mutex_unlock(&trace_types_lock);
5030 
5031 	free_cpumask_var(iter->started);
5032 	mutex_destroy(&iter->mutex);
5033 	kfree(iter);
5034 
5035 	trace_array_put(tr);
5036 
5037 	return 0;
5038 }
5039 
5040 static unsigned int
5041 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5042 {
5043 	struct trace_array *tr = iter->tr;
5044 
5045 	/* Iterators are static, they should be filled or empty */
5046 	if (trace_buffer_iter(iter, iter->cpu_file))
5047 		return POLLIN | POLLRDNORM;
5048 
5049 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5050 		/*
5051 		 * Always select as readable when in blocking mode
5052 		 */
5053 		return POLLIN | POLLRDNORM;
5054 	else
5055 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5056 					     filp, poll_table);
5057 }
5058 
5059 static unsigned int
5060 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5061 {
5062 	struct trace_iterator *iter = filp->private_data;
5063 
5064 	return trace_poll(iter, filp, poll_table);
5065 }
5066 
5067 /* Must be called with iter->mutex held. */
5068 static int tracing_wait_pipe(struct file *filp)
5069 {
5070 	struct trace_iterator *iter = filp->private_data;
5071 	int ret;
5072 
5073 	while (trace_empty(iter)) {
5074 
5075 		if ((filp->f_flags & O_NONBLOCK)) {
5076 			return -EAGAIN;
5077 		}
5078 
5079 		/*
5080 		 * We block until we read something and tracing is disabled.
5081 		 * We still block if tracing is disabled, but we have never
5082 		 * read anything. This allows a user to cat this file, and
5083 		 * then enable tracing. But after we have read something,
5084 		 * we give an EOF when tracing is again disabled.
5085 		 *
5086 		 * iter->pos will be 0 if we haven't read anything.
5087 		 */
5088 		if (!tracing_is_on() && iter->pos)
5089 			break;
5090 
5091 		mutex_unlock(&iter->mutex);
5092 
5093 		ret = wait_on_pipe(iter, false);
5094 
5095 		mutex_lock(&iter->mutex);
5096 
5097 		if (ret)
5098 			return ret;
5099 	}
5100 
5101 	return 1;
5102 }
5103 
5104 /*
5105  * Consumer reader.
5106  */
5107 static ssize_t
5108 tracing_read_pipe(struct file *filp, char __user *ubuf,
5109 		  size_t cnt, loff_t *ppos)
5110 {
5111 	struct trace_iterator *iter = filp->private_data;
5112 	ssize_t sret;
5113 
5114 	/* return any leftover data */
5115 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5116 	if (sret != -EBUSY)
5117 		return sret;
5118 
5119 	trace_seq_init(&iter->seq);
5120 
5121 	/*
5122 	 * Avoid more than one consumer on a single file descriptor
5123 	 * This is just a matter of traces coherency, the ring buffer itself
5124 	 * is protected.
5125 	 */
5126 	mutex_lock(&iter->mutex);
5127 	if (iter->trace->read) {
5128 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5129 		if (sret)
5130 			goto out;
5131 	}
5132 
5133 waitagain:
5134 	sret = tracing_wait_pipe(filp);
5135 	if (sret <= 0)
5136 		goto out;
5137 
5138 	/* stop when tracing is finished */
5139 	if (trace_empty(iter)) {
5140 		sret = 0;
5141 		goto out;
5142 	}
5143 
5144 	if (cnt >= PAGE_SIZE)
5145 		cnt = PAGE_SIZE - 1;
5146 
5147 	/* reset all but tr, trace, and overruns */
5148 	memset(&iter->seq, 0,
5149 	       sizeof(struct trace_iterator) -
5150 	       offsetof(struct trace_iterator, seq));
5151 	cpumask_clear(iter->started);
5152 	iter->pos = -1;
5153 
5154 	trace_event_read_lock();
5155 	trace_access_lock(iter->cpu_file);
5156 	while (trace_find_next_entry_inc(iter) != NULL) {
5157 		enum print_line_t ret;
5158 		int save_len = iter->seq.seq.len;
5159 
5160 		ret = print_trace_line(iter);
5161 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5162 			/* don't print partial lines */
5163 			iter->seq.seq.len = save_len;
5164 			break;
5165 		}
5166 		if (ret != TRACE_TYPE_NO_CONSUME)
5167 			trace_consume(iter);
5168 
5169 		if (trace_seq_used(&iter->seq) >= cnt)
5170 			break;
5171 
5172 		/*
5173 		 * Setting the full flag means we reached the trace_seq buffer
5174 		 * size and we should leave by partial output condition above.
5175 		 * One of the trace_seq_* functions is not used properly.
5176 		 */
5177 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5178 			  iter->ent->type);
5179 	}
5180 	trace_access_unlock(iter->cpu_file);
5181 	trace_event_read_unlock();
5182 
5183 	/* Now copy what we have to the user */
5184 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5185 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5186 		trace_seq_init(&iter->seq);
5187 
5188 	/*
5189 	 * If there was nothing to send to user, in spite of consuming trace
5190 	 * entries, go back to wait for more entries.
5191 	 */
5192 	if (sret == -EBUSY)
5193 		goto waitagain;
5194 
5195 out:
5196 	mutex_unlock(&iter->mutex);
5197 
5198 	return sret;
5199 }
5200 
5201 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5202 				     unsigned int idx)
5203 {
5204 	__free_page(spd->pages[idx]);
5205 }
5206 
5207 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5208 	.can_merge		= 0,
5209 	.confirm		= generic_pipe_buf_confirm,
5210 	.release		= generic_pipe_buf_release,
5211 	.steal			= generic_pipe_buf_steal,
5212 	.get			= generic_pipe_buf_get,
5213 };
5214 
5215 static size_t
5216 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5217 {
5218 	size_t count;
5219 	int save_len;
5220 	int ret;
5221 
5222 	/* Seq buffer is page-sized, exactly what we need. */
5223 	for (;;) {
5224 		save_len = iter->seq.seq.len;
5225 		ret = print_trace_line(iter);
5226 
5227 		if (trace_seq_has_overflowed(&iter->seq)) {
5228 			iter->seq.seq.len = save_len;
5229 			break;
5230 		}
5231 
5232 		/*
5233 		 * This should not be hit, because it should only
5234 		 * be set if the iter->seq overflowed. But check it
5235 		 * anyway to be safe.
5236 		 */
5237 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5238 			iter->seq.seq.len = save_len;
5239 			break;
5240 		}
5241 
5242 		count = trace_seq_used(&iter->seq) - save_len;
5243 		if (rem < count) {
5244 			rem = 0;
5245 			iter->seq.seq.len = save_len;
5246 			break;
5247 		}
5248 
5249 		if (ret != TRACE_TYPE_NO_CONSUME)
5250 			trace_consume(iter);
5251 		rem -= count;
5252 		if (!trace_find_next_entry_inc(iter))	{
5253 			rem = 0;
5254 			iter->ent = NULL;
5255 			break;
5256 		}
5257 	}
5258 
5259 	return rem;
5260 }
5261 
5262 static ssize_t tracing_splice_read_pipe(struct file *filp,
5263 					loff_t *ppos,
5264 					struct pipe_inode_info *pipe,
5265 					size_t len,
5266 					unsigned int flags)
5267 {
5268 	struct page *pages_def[PIPE_DEF_BUFFERS];
5269 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5270 	struct trace_iterator *iter = filp->private_data;
5271 	struct splice_pipe_desc spd = {
5272 		.pages		= pages_def,
5273 		.partial	= partial_def,
5274 		.nr_pages	= 0, /* This gets updated below. */
5275 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5276 		.flags		= flags,
5277 		.ops		= &tracing_pipe_buf_ops,
5278 		.spd_release	= tracing_spd_release_pipe,
5279 	};
5280 	ssize_t ret;
5281 	size_t rem;
5282 	unsigned int i;
5283 
5284 	if (splice_grow_spd(pipe, &spd))
5285 		return -ENOMEM;
5286 
5287 	mutex_lock(&iter->mutex);
5288 
5289 	if (iter->trace->splice_read) {
5290 		ret = iter->trace->splice_read(iter, filp,
5291 					       ppos, pipe, len, flags);
5292 		if (ret)
5293 			goto out_err;
5294 	}
5295 
5296 	ret = tracing_wait_pipe(filp);
5297 	if (ret <= 0)
5298 		goto out_err;
5299 
5300 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5301 		ret = -EFAULT;
5302 		goto out_err;
5303 	}
5304 
5305 	trace_event_read_lock();
5306 	trace_access_lock(iter->cpu_file);
5307 
5308 	/* Fill as many pages as possible. */
5309 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5310 		spd.pages[i] = alloc_page(GFP_KERNEL);
5311 		if (!spd.pages[i])
5312 			break;
5313 
5314 		rem = tracing_fill_pipe_page(rem, iter);
5315 
5316 		/* Copy the data into the page, so we can start over. */
5317 		ret = trace_seq_to_buffer(&iter->seq,
5318 					  page_address(spd.pages[i]),
5319 					  trace_seq_used(&iter->seq));
5320 		if (ret < 0) {
5321 			__free_page(spd.pages[i]);
5322 			break;
5323 		}
5324 		spd.partial[i].offset = 0;
5325 		spd.partial[i].len = trace_seq_used(&iter->seq);
5326 
5327 		trace_seq_init(&iter->seq);
5328 	}
5329 
5330 	trace_access_unlock(iter->cpu_file);
5331 	trace_event_read_unlock();
5332 	mutex_unlock(&iter->mutex);
5333 
5334 	spd.nr_pages = i;
5335 
5336 	if (i)
5337 		ret = splice_to_pipe(pipe, &spd);
5338 	else
5339 		ret = 0;
5340 out:
5341 	splice_shrink_spd(&spd);
5342 	return ret;
5343 
5344 out_err:
5345 	mutex_unlock(&iter->mutex);
5346 	goto out;
5347 }
5348 
5349 static ssize_t
5350 tracing_entries_read(struct file *filp, char __user *ubuf,
5351 		     size_t cnt, loff_t *ppos)
5352 {
5353 	struct inode *inode = file_inode(filp);
5354 	struct trace_array *tr = inode->i_private;
5355 	int cpu = tracing_get_cpu(inode);
5356 	char buf[64];
5357 	int r = 0;
5358 	ssize_t ret;
5359 
5360 	mutex_lock(&trace_types_lock);
5361 
5362 	if (cpu == RING_BUFFER_ALL_CPUS) {
5363 		int cpu, buf_size_same;
5364 		unsigned long size;
5365 
5366 		size = 0;
5367 		buf_size_same = 1;
5368 		/* check if all cpu sizes are same */
5369 		for_each_tracing_cpu(cpu) {
5370 			/* fill in the size from first enabled cpu */
5371 			if (size == 0)
5372 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5373 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5374 				buf_size_same = 0;
5375 				break;
5376 			}
5377 		}
5378 
5379 		if (buf_size_same) {
5380 			if (!ring_buffer_expanded)
5381 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5382 					    size >> 10,
5383 					    trace_buf_size >> 10);
5384 			else
5385 				r = sprintf(buf, "%lu\n", size >> 10);
5386 		} else
5387 			r = sprintf(buf, "X\n");
5388 	} else
5389 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5390 
5391 	mutex_unlock(&trace_types_lock);
5392 
5393 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5394 	return ret;
5395 }
5396 
5397 static ssize_t
5398 tracing_entries_write(struct file *filp, const char __user *ubuf,
5399 		      size_t cnt, loff_t *ppos)
5400 {
5401 	struct inode *inode = file_inode(filp);
5402 	struct trace_array *tr = inode->i_private;
5403 	unsigned long val;
5404 	int ret;
5405 
5406 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5407 	if (ret)
5408 		return ret;
5409 
5410 	/* must have at least 1 entry */
5411 	if (!val)
5412 		return -EINVAL;
5413 
5414 	/* value is in KB */
5415 	val <<= 10;
5416 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5417 	if (ret < 0)
5418 		return ret;
5419 
5420 	*ppos += cnt;
5421 
5422 	return cnt;
5423 }
5424 
5425 static ssize_t
5426 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5427 				size_t cnt, loff_t *ppos)
5428 {
5429 	struct trace_array *tr = filp->private_data;
5430 	char buf[64];
5431 	int r, cpu;
5432 	unsigned long size = 0, expanded_size = 0;
5433 
5434 	mutex_lock(&trace_types_lock);
5435 	for_each_tracing_cpu(cpu) {
5436 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5437 		if (!ring_buffer_expanded)
5438 			expanded_size += trace_buf_size >> 10;
5439 	}
5440 	if (ring_buffer_expanded)
5441 		r = sprintf(buf, "%lu\n", size);
5442 	else
5443 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5444 	mutex_unlock(&trace_types_lock);
5445 
5446 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5447 }
5448 
5449 static ssize_t
5450 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5451 			  size_t cnt, loff_t *ppos)
5452 {
5453 	/*
5454 	 * There is no need to read what the user has written, this function
5455 	 * is just to make sure that there is no error when "echo" is used
5456 	 */
5457 
5458 	*ppos += cnt;
5459 
5460 	return cnt;
5461 }
5462 
5463 static int
5464 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5465 {
5466 	struct trace_array *tr = inode->i_private;
5467 
5468 	/* disable tracing ? */
5469 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5470 		tracer_tracing_off(tr);
5471 	/* resize the ring buffer to 0 */
5472 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5473 
5474 	trace_array_put(tr);
5475 
5476 	return 0;
5477 }
5478 
5479 static ssize_t
5480 tracing_mark_write(struct file *filp, const char __user *ubuf,
5481 					size_t cnt, loff_t *fpos)
5482 {
5483 	unsigned long addr = (unsigned long)ubuf;
5484 	struct trace_array *tr = filp->private_data;
5485 	struct ring_buffer_event *event;
5486 	struct ring_buffer *buffer;
5487 	struct print_entry *entry;
5488 	unsigned long irq_flags;
5489 	struct page *pages[2];
5490 	void *map_page[2];
5491 	int nr_pages = 1;
5492 	ssize_t written;
5493 	int offset;
5494 	int size;
5495 	int len;
5496 	int ret;
5497 	int i;
5498 
5499 	if (tracing_disabled)
5500 		return -EINVAL;
5501 
5502 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5503 		return -EINVAL;
5504 
5505 	if (cnt > TRACE_BUF_SIZE)
5506 		cnt = TRACE_BUF_SIZE;
5507 
5508 	/*
5509 	 * Userspace is injecting traces into the kernel trace buffer.
5510 	 * We want to be as non intrusive as possible.
5511 	 * To do so, we do not want to allocate any special buffers
5512 	 * or take any locks, but instead write the userspace data
5513 	 * straight into the ring buffer.
5514 	 *
5515 	 * First we need to pin the userspace buffer into memory,
5516 	 * which, most likely it is, because it just referenced it.
5517 	 * But there's no guarantee that it is. By using get_user_pages_fast()
5518 	 * and kmap_atomic/kunmap_atomic() we can get access to the
5519 	 * pages directly. We then write the data directly into the
5520 	 * ring buffer.
5521 	 */
5522 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5523 
5524 	/* check if we cross pages */
5525 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5526 		nr_pages = 2;
5527 
5528 	offset = addr & (PAGE_SIZE - 1);
5529 	addr &= PAGE_MASK;
5530 
5531 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5532 	if (ret < nr_pages) {
5533 		while (--ret >= 0)
5534 			put_page(pages[ret]);
5535 		written = -EFAULT;
5536 		goto out;
5537 	}
5538 
5539 	for (i = 0; i < nr_pages; i++)
5540 		map_page[i] = kmap_atomic(pages[i]);
5541 
5542 	local_save_flags(irq_flags);
5543 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5544 	buffer = tr->trace_buffer.buffer;
5545 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5546 					  irq_flags, preempt_count());
5547 	if (!event) {
5548 		/* Ring buffer disabled, return as if not open for write */
5549 		written = -EBADF;
5550 		goto out_unlock;
5551 	}
5552 
5553 	entry = ring_buffer_event_data(event);
5554 	entry->ip = _THIS_IP_;
5555 
5556 	if (nr_pages == 2) {
5557 		len = PAGE_SIZE - offset;
5558 		memcpy(&entry->buf, map_page[0] + offset, len);
5559 		memcpy(&entry->buf[len], map_page[1], cnt - len);
5560 	} else
5561 		memcpy(&entry->buf, map_page[0] + offset, cnt);
5562 
5563 	if (entry->buf[cnt - 1] != '\n') {
5564 		entry->buf[cnt] = '\n';
5565 		entry->buf[cnt + 1] = '\0';
5566 	} else
5567 		entry->buf[cnt] = '\0';
5568 
5569 	__buffer_unlock_commit(buffer, event);
5570 
5571 	written = cnt;
5572 
5573 	*fpos += written;
5574 
5575  out_unlock:
5576 	for (i = nr_pages - 1; i >= 0; i--) {
5577 		kunmap_atomic(map_page[i]);
5578 		put_page(pages[i]);
5579 	}
5580  out:
5581 	return written;
5582 }
5583 
5584 static int tracing_clock_show(struct seq_file *m, void *v)
5585 {
5586 	struct trace_array *tr = m->private;
5587 	int i;
5588 
5589 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5590 		seq_printf(m,
5591 			"%s%s%s%s", i ? " " : "",
5592 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5593 			i == tr->clock_id ? "]" : "");
5594 	seq_putc(m, '\n');
5595 
5596 	return 0;
5597 }
5598 
5599 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5600 {
5601 	int i;
5602 
5603 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5604 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5605 			break;
5606 	}
5607 	if (i == ARRAY_SIZE(trace_clocks))
5608 		return -EINVAL;
5609 
5610 	mutex_lock(&trace_types_lock);
5611 
5612 	tr->clock_id = i;
5613 
5614 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5615 
5616 	/*
5617 	 * New clock may not be consistent with the previous clock.
5618 	 * Reset the buffer so that it doesn't have incomparable timestamps.
5619 	 */
5620 	tracing_reset_online_cpus(&tr->trace_buffer);
5621 
5622 #ifdef CONFIG_TRACER_MAX_TRACE
5623 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5624 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5625 	tracing_reset_online_cpus(&tr->max_buffer);
5626 #endif
5627 
5628 	mutex_unlock(&trace_types_lock);
5629 
5630 	return 0;
5631 }
5632 
5633 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5634 				   size_t cnt, loff_t *fpos)
5635 {
5636 	struct seq_file *m = filp->private_data;
5637 	struct trace_array *tr = m->private;
5638 	char buf[64];
5639 	const char *clockstr;
5640 	int ret;
5641 
5642 	if (cnt >= sizeof(buf))
5643 		return -EINVAL;
5644 
5645 	if (copy_from_user(buf, ubuf, cnt))
5646 		return -EFAULT;
5647 
5648 	buf[cnt] = 0;
5649 
5650 	clockstr = strstrip(buf);
5651 
5652 	ret = tracing_set_clock(tr, clockstr);
5653 	if (ret)
5654 		return ret;
5655 
5656 	*fpos += cnt;
5657 
5658 	return cnt;
5659 }
5660 
5661 static int tracing_clock_open(struct inode *inode, struct file *file)
5662 {
5663 	struct trace_array *tr = inode->i_private;
5664 	int ret;
5665 
5666 	if (tracing_disabled)
5667 		return -ENODEV;
5668 
5669 	if (trace_array_get(tr))
5670 		return -ENODEV;
5671 
5672 	ret = single_open(file, tracing_clock_show, inode->i_private);
5673 	if (ret < 0)
5674 		trace_array_put(tr);
5675 
5676 	return ret;
5677 }
5678 
5679 struct ftrace_buffer_info {
5680 	struct trace_iterator	iter;
5681 	void			*spare;
5682 	unsigned int		read;
5683 };
5684 
5685 #ifdef CONFIG_TRACER_SNAPSHOT
5686 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5687 {
5688 	struct trace_array *tr = inode->i_private;
5689 	struct trace_iterator *iter;
5690 	struct seq_file *m;
5691 	int ret = 0;
5692 
5693 	if (trace_array_get(tr) < 0)
5694 		return -ENODEV;
5695 
5696 	if (file->f_mode & FMODE_READ) {
5697 		iter = __tracing_open(inode, file, true);
5698 		if (IS_ERR(iter))
5699 			ret = PTR_ERR(iter);
5700 	} else {
5701 		/* Writes still need the seq_file to hold the private data */
5702 		ret = -ENOMEM;
5703 		m = kzalloc(sizeof(*m), GFP_KERNEL);
5704 		if (!m)
5705 			goto out;
5706 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5707 		if (!iter) {
5708 			kfree(m);
5709 			goto out;
5710 		}
5711 		ret = 0;
5712 
5713 		iter->tr = tr;
5714 		iter->trace_buffer = &tr->max_buffer;
5715 		iter->cpu_file = tracing_get_cpu(inode);
5716 		m->private = iter;
5717 		file->private_data = m;
5718 	}
5719 out:
5720 	if (ret < 0)
5721 		trace_array_put(tr);
5722 
5723 	return ret;
5724 }
5725 
5726 static ssize_t
5727 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5728 		       loff_t *ppos)
5729 {
5730 	struct seq_file *m = filp->private_data;
5731 	struct trace_iterator *iter = m->private;
5732 	struct trace_array *tr = iter->tr;
5733 	unsigned long val;
5734 	int ret;
5735 
5736 	ret = tracing_update_buffers();
5737 	if (ret < 0)
5738 		return ret;
5739 
5740 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5741 	if (ret)
5742 		return ret;
5743 
5744 	mutex_lock(&trace_types_lock);
5745 
5746 	if (tr->current_trace->use_max_tr) {
5747 		ret = -EBUSY;
5748 		goto out;
5749 	}
5750 
5751 	switch (val) {
5752 	case 0:
5753 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5754 			ret = -EINVAL;
5755 			break;
5756 		}
5757 		if (tr->allocated_snapshot)
5758 			free_snapshot(tr);
5759 		break;
5760 	case 1:
5761 /* Only allow per-cpu swap if the ring buffer supports it */
5762 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5763 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5764 			ret = -EINVAL;
5765 			break;
5766 		}
5767 #endif
5768 		if (!tr->allocated_snapshot) {
5769 			ret = alloc_snapshot(tr);
5770 			if (ret < 0)
5771 				break;
5772 		}
5773 		local_irq_disable();
5774 		/* Now, we're going to swap */
5775 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5776 			update_max_tr(tr, current, smp_processor_id());
5777 		else
5778 			update_max_tr_single(tr, current, iter->cpu_file);
5779 		local_irq_enable();
5780 		break;
5781 	default:
5782 		if (tr->allocated_snapshot) {
5783 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5784 				tracing_reset_online_cpus(&tr->max_buffer);
5785 			else
5786 				tracing_reset(&tr->max_buffer, iter->cpu_file);
5787 		}
5788 		break;
5789 	}
5790 
5791 	if (ret >= 0) {
5792 		*ppos += cnt;
5793 		ret = cnt;
5794 	}
5795 out:
5796 	mutex_unlock(&trace_types_lock);
5797 	return ret;
5798 }
5799 
5800 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5801 {
5802 	struct seq_file *m = file->private_data;
5803 	int ret;
5804 
5805 	ret = tracing_release(inode, file);
5806 
5807 	if (file->f_mode & FMODE_READ)
5808 		return ret;
5809 
5810 	/* If write only, the seq_file is just a stub */
5811 	if (m)
5812 		kfree(m->private);
5813 	kfree(m);
5814 
5815 	return 0;
5816 }
5817 
5818 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5819 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5820 				    size_t count, loff_t *ppos);
5821 static int tracing_buffers_release(struct inode *inode, struct file *file);
5822 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5823 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5824 
5825 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5826 {
5827 	struct ftrace_buffer_info *info;
5828 	int ret;
5829 
5830 	ret = tracing_buffers_open(inode, filp);
5831 	if (ret < 0)
5832 		return ret;
5833 
5834 	info = filp->private_data;
5835 
5836 	if (info->iter.trace->use_max_tr) {
5837 		tracing_buffers_release(inode, filp);
5838 		return -EBUSY;
5839 	}
5840 
5841 	info->iter.snapshot = true;
5842 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5843 
5844 	return ret;
5845 }
5846 
5847 #endif /* CONFIG_TRACER_SNAPSHOT */
5848 
5849 
5850 static const struct file_operations tracing_thresh_fops = {
5851 	.open		= tracing_open_generic,
5852 	.read		= tracing_thresh_read,
5853 	.write		= tracing_thresh_write,
5854 	.llseek		= generic_file_llseek,
5855 };
5856 
5857 #ifdef CONFIG_TRACER_MAX_TRACE
5858 static const struct file_operations tracing_max_lat_fops = {
5859 	.open		= tracing_open_generic,
5860 	.read		= tracing_max_lat_read,
5861 	.write		= tracing_max_lat_write,
5862 	.llseek		= generic_file_llseek,
5863 };
5864 #endif
5865 
5866 static const struct file_operations set_tracer_fops = {
5867 	.open		= tracing_open_generic,
5868 	.read		= tracing_set_trace_read,
5869 	.write		= tracing_set_trace_write,
5870 	.llseek		= generic_file_llseek,
5871 };
5872 
5873 static const struct file_operations tracing_pipe_fops = {
5874 	.open		= tracing_open_pipe,
5875 	.poll		= tracing_poll_pipe,
5876 	.read		= tracing_read_pipe,
5877 	.splice_read	= tracing_splice_read_pipe,
5878 	.release	= tracing_release_pipe,
5879 	.llseek		= no_llseek,
5880 };
5881 
5882 static const struct file_operations tracing_entries_fops = {
5883 	.open		= tracing_open_generic_tr,
5884 	.read		= tracing_entries_read,
5885 	.write		= tracing_entries_write,
5886 	.llseek		= generic_file_llseek,
5887 	.release	= tracing_release_generic_tr,
5888 };
5889 
5890 static const struct file_operations tracing_total_entries_fops = {
5891 	.open		= tracing_open_generic_tr,
5892 	.read		= tracing_total_entries_read,
5893 	.llseek		= generic_file_llseek,
5894 	.release	= tracing_release_generic_tr,
5895 };
5896 
5897 static const struct file_operations tracing_free_buffer_fops = {
5898 	.open		= tracing_open_generic_tr,
5899 	.write		= tracing_free_buffer_write,
5900 	.release	= tracing_free_buffer_release,
5901 };
5902 
5903 static const struct file_operations tracing_mark_fops = {
5904 	.open		= tracing_open_generic_tr,
5905 	.write		= tracing_mark_write,
5906 	.llseek		= generic_file_llseek,
5907 	.release	= tracing_release_generic_tr,
5908 };
5909 
5910 static const struct file_operations trace_clock_fops = {
5911 	.open		= tracing_clock_open,
5912 	.read		= seq_read,
5913 	.llseek		= seq_lseek,
5914 	.release	= tracing_single_release_tr,
5915 	.write		= tracing_clock_write,
5916 };
5917 
5918 #ifdef CONFIG_TRACER_SNAPSHOT
5919 static const struct file_operations snapshot_fops = {
5920 	.open		= tracing_snapshot_open,
5921 	.read		= seq_read,
5922 	.write		= tracing_snapshot_write,
5923 	.llseek		= tracing_lseek,
5924 	.release	= tracing_snapshot_release,
5925 };
5926 
5927 static const struct file_operations snapshot_raw_fops = {
5928 	.open		= snapshot_raw_open,
5929 	.read		= tracing_buffers_read,
5930 	.release	= tracing_buffers_release,
5931 	.splice_read	= tracing_buffers_splice_read,
5932 	.llseek		= no_llseek,
5933 };
5934 
5935 #endif /* CONFIG_TRACER_SNAPSHOT */
5936 
5937 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5938 {
5939 	struct trace_array *tr = inode->i_private;
5940 	struct ftrace_buffer_info *info;
5941 	int ret;
5942 
5943 	if (tracing_disabled)
5944 		return -ENODEV;
5945 
5946 	if (trace_array_get(tr) < 0)
5947 		return -ENODEV;
5948 
5949 	info = kzalloc(sizeof(*info), GFP_KERNEL);
5950 	if (!info) {
5951 		trace_array_put(tr);
5952 		return -ENOMEM;
5953 	}
5954 
5955 	mutex_lock(&trace_types_lock);
5956 
5957 	info->iter.tr		= tr;
5958 	info->iter.cpu_file	= tracing_get_cpu(inode);
5959 	info->iter.trace	= tr->current_trace;
5960 	info->iter.trace_buffer = &tr->trace_buffer;
5961 	info->spare		= NULL;
5962 	/* Force reading ring buffer for first read */
5963 	info->read		= (unsigned int)-1;
5964 
5965 	filp->private_data = info;
5966 
5967 	tr->current_trace->ref++;
5968 
5969 	mutex_unlock(&trace_types_lock);
5970 
5971 	ret = nonseekable_open(inode, filp);
5972 	if (ret < 0)
5973 		trace_array_put(tr);
5974 
5975 	return ret;
5976 }
5977 
5978 static unsigned int
5979 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5980 {
5981 	struct ftrace_buffer_info *info = filp->private_data;
5982 	struct trace_iterator *iter = &info->iter;
5983 
5984 	return trace_poll(iter, filp, poll_table);
5985 }
5986 
5987 static ssize_t
5988 tracing_buffers_read(struct file *filp, char __user *ubuf,
5989 		     size_t count, loff_t *ppos)
5990 {
5991 	struct ftrace_buffer_info *info = filp->private_data;
5992 	struct trace_iterator *iter = &info->iter;
5993 	ssize_t ret;
5994 	ssize_t size;
5995 
5996 	if (!count)
5997 		return 0;
5998 
5999 #ifdef CONFIG_TRACER_MAX_TRACE
6000 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6001 		return -EBUSY;
6002 #endif
6003 
6004 	if (!info->spare)
6005 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6006 							  iter->cpu_file);
6007 	if (!info->spare)
6008 		return -ENOMEM;
6009 
6010 	/* Do we have previous read data to read? */
6011 	if (info->read < PAGE_SIZE)
6012 		goto read;
6013 
6014  again:
6015 	trace_access_lock(iter->cpu_file);
6016 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6017 				    &info->spare,
6018 				    count,
6019 				    iter->cpu_file, 0);
6020 	trace_access_unlock(iter->cpu_file);
6021 
6022 	if (ret < 0) {
6023 		if (trace_empty(iter)) {
6024 			if ((filp->f_flags & O_NONBLOCK))
6025 				return -EAGAIN;
6026 
6027 			ret = wait_on_pipe(iter, false);
6028 			if (ret)
6029 				return ret;
6030 
6031 			goto again;
6032 		}
6033 		return 0;
6034 	}
6035 
6036 	info->read = 0;
6037  read:
6038 	size = PAGE_SIZE - info->read;
6039 	if (size > count)
6040 		size = count;
6041 
6042 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6043 	if (ret == size)
6044 		return -EFAULT;
6045 
6046 	size -= ret;
6047 
6048 	*ppos += size;
6049 	info->read += size;
6050 
6051 	return size;
6052 }
6053 
6054 static int tracing_buffers_release(struct inode *inode, struct file *file)
6055 {
6056 	struct ftrace_buffer_info *info = file->private_data;
6057 	struct trace_iterator *iter = &info->iter;
6058 
6059 	mutex_lock(&trace_types_lock);
6060 
6061 	iter->tr->current_trace->ref--;
6062 
6063 	__trace_array_put(iter->tr);
6064 
6065 	if (info->spare)
6066 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6067 	kfree(info);
6068 
6069 	mutex_unlock(&trace_types_lock);
6070 
6071 	return 0;
6072 }
6073 
6074 struct buffer_ref {
6075 	struct ring_buffer	*buffer;
6076 	void			*page;
6077 	int			ref;
6078 };
6079 
6080 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6081 				    struct pipe_buffer *buf)
6082 {
6083 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6084 
6085 	if (--ref->ref)
6086 		return;
6087 
6088 	ring_buffer_free_read_page(ref->buffer, ref->page);
6089 	kfree(ref);
6090 	buf->private = 0;
6091 }
6092 
6093 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6094 				struct pipe_buffer *buf)
6095 {
6096 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6097 
6098 	ref->ref++;
6099 }
6100 
6101 /* Pipe buffer operations for a buffer. */
6102 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6103 	.can_merge		= 0,
6104 	.confirm		= generic_pipe_buf_confirm,
6105 	.release		= buffer_pipe_buf_release,
6106 	.steal			= generic_pipe_buf_steal,
6107 	.get			= buffer_pipe_buf_get,
6108 };
6109 
6110 /*
6111  * Callback from splice_to_pipe(), if we need to release some pages
6112  * at the end of the spd in case we error'ed out in filling the pipe.
6113  */
6114 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6115 {
6116 	struct buffer_ref *ref =
6117 		(struct buffer_ref *)spd->partial[i].private;
6118 
6119 	if (--ref->ref)
6120 		return;
6121 
6122 	ring_buffer_free_read_page(ref->buffer, ref->page);
6123 	kfree(ref);
6124 	spd->partial[i].private = 0;
6125 }
6126 
6127 static ssize_t
6128 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6129 			    struct pipe_inode_info *pipe, size_t len,
6130 			    unsigned int flags)
6131 {
6132 	struct ftrace_buffer_info *info = file->private_data;
6133 	struct trace_iterator *iter = &info->iter;
6134 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6135 	struct page *pages_def[PIPE_DEF_BUFFERS];
6136 	struct splice_pipe_desc spd = {
6137 		.pages		= pages_def,
6138 		.partial	= partial_def,
6139 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6140 		.flags		= flags,
6141 		.ops		= &buffer_pipe_buf_ops,
6142 		.spd_release	= buffer_spd_release,
6143 	};
6144 	struct buffer_ref *ref;
6145 	int entries, size, i;
6146 	ssize_t ret = 0;
6147 
6148 #ifdef CONFIG_TRACER_MAX_TRACE
6149 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6150 		return -EBUSY;
6151 #endif
6152 
6153 	if (splice_grow_spd(pipe, &spd))
6154 		return -ENOMEM;
6155 
6156 	if (*ppos & (PAGE_SIZE - 1))
6157 		return -EINVAL;
6158 
6159 	if (len & (PAGE_SIZE - 1)) {
6160 		if (len < PAGE_SIZE)
6161 			return -EINVAL;
6162 		len &= PAGE_MASK;
6163 	}
6164 
6165  again:
6166 	trace_access_lock(iter->cpu_file);
6167 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6168 
6169 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6170 		struct page *page;
6171 		int r;
6172 
6173 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6174 		if (!ref) {
6175 			ret = -ENOMEM;
6176 			break;
6177 		}
6178 
6179 		ref->ref = 1;
6180 		ref->buffer = iter->trace_buffer->buffer;
6181 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6182 		if (!ref->page) {
6183 			ret = -ENOMEM;
6184 			kfree(ref);
6185 			break;
6186 		}
6187 
6188 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6189 					  len, iter->cpu_file, 1);
6190 		if (r < 0) {
6191 			ring_buffer_free_read_page(ref->buffer, ref->page);
6192 			kfree(ref);
6193 			break;
6194 		}
6195 
6196 		/*
6197 		 * zero out any left over data, this is going to
6198 		 * user land.
6199 		 */
6200 		size = ring_buffer_page_len(ref->page);
6201 		if (size < PAGE_SIZE)
6202 			memset(ref->page + size, 0, PAGE_SIZE - size);
6203 
6204 		page = virt_to_page(ref->page);
6205 
6206 		spd.pages[i] = page;
6207 		spd.partial[i].len = PAGE_SIZE;
6208 		spd.partial[i].offset = 0;
6209 		spd.partial[i].private = (unsigned long)ref;
6210 		spd.nr_pages++;
6211 		*ppos += PAGE_SIZE;
6212 
6213 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6214 	}
6215 
6216 	trace_access_unlock(iter->cpu_file);
6217 	spd.nr_pages = i;
6218 
6219 	/* did we read anything? */
6220 	if (!spd.nr_pages) {
6221 		if (ret)
6222 			return ret;
6223 
6224 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6225 			return -EAGAIN;
6226 
6227 		ret = wait_on_pipe(iter, true);
6228 		if (ret)
6229 			return ret;
6230 
6231 		goto again;
6232 	}
6233 
6234 	ret = splice_to_pipe(pipe, &spd);
6235 	splice_shrink_spd(&spd);
6236 
6237 	return ret;
6238 }
6239 
6240 static const struct file_operations tracing_buffers_fops = {
6241 	.open		= tracing_buffers_open,
6242 	.read		= tracing_buffers_read,
6243 	.poll		= tracing_buffers_poll,
6244 	.release	= tracing_buffers_release,
6245 	.splice_read	= tracing_buffers_splice_read,
6246 	.llseek		= no_llseek,
6247 };
6248 
6249 static ssize_t
6250 tracing_stats_read(struct file *filp, char __user *ubuf,
6251 		   size_t count, loff_t *ppos)
6252 {
6253 	struct inode *inode = file_inode(filp);
6254 	struct trace_array *tr = inode->i_private;
6255 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6256 	int cpu = tracing_get_cpu(inode);
6257 	struct trace_seq *s;
6258 	unsigned long cnt;
6259 	unsigned long long t;
6260 	unsigned long usec_rem;
6261 
6262 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6263 	if (!s)
6264 		return -ENOMEM;
6265 
6266 	trace_seq_init(s);
6267 
6268 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6269 	trace_seq_printf(s, "entries: %ld\n", cnt);
6270 
6271 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6272 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6273 
6274 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6275 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6276 
6277 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6278 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6279 
6280 	if (trace_clocks[tr->clock_id].in_ns) {
6281 		/* local or global for trace_clock */
6282 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6283 		usec_rem = do_div(t, USEC_PER_SEC);
6284 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6285 								t, usec_rem);
6286 
6287 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6288 		usec_rem = do_div(t, USEC_PER_SEC);
6289 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6290 	} else {
6291 		/* counter or tsc mode for trace_clock */
6292 		trace_seq_printf(s, "oldest event ts: %llu\n",
6293 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6294 
6295 		trace_seq_printf(s, "now ts: %llu\n",
6296 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6297 	}
6298 
6299 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6300 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6301 
6302 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6303 	trace_seq_printf(s, "read events: %ld\n", cnt);
6304 
6305 	count = simple_read_from_buffer(ubuf, count, ppos,
6306 					s->buffer, trace_seq_used(s));
6307 
6308 	kfree(s);
6309 
6310 	return count;
6311 }
6312 
6313 static const struct file_operations tracing_stats_fops = {
6314 	.open		= tracing_open_generic_tr,
6315 	.read		= tracing_stats_read,
6316 	.llseek		= generic_file_llseek,
6317 	.release	= tracing_release_generic_tr,
6318 };
6319 
6320 #ifdef CONFIG_DYNAMIC_FTRACE
6321 
6322 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6323 {
6324 	return 0;
6325 }
6326 
6327 static ssize_t
6328 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6329 		  size_t cnt, loff_t *ppos)
6330 {
6331 	static char ftrace_dyn_info_buffer[1024];
6332 	static DEFINE_MUTEX(dyn_info_mutex);
6333 	unsigned long *p = filp->private_data;
6334 	char *buf = ftrace_dyn_info_buffer;
6335 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6336 	int r;
6337 
6338 	mutex_lock(&dyn_info_mutex);
6339 	r = sprintf(buf, "%ld ", *p);
6340 
6341 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6342 	buf[r++] = '\n';
6343 
6344 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6345 
6346 	mutex_unlock(&dyn_info_mutex);
6347 
6348 	return r;
6349 }
6350 
6351 static const struct file_operations tracing_dyn_info_fops = {
6352 	.open		= tracing_open_generic,
6353 	.read		= tracing_read_dyn_info,
6354 	.llseek		= generic_file_llseek,
6355 };
6356 #endif /* CONFIG_DYNAMIC_FTRACE */
6357 
6358 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6359 static void
6360 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6361 {
6362 	tracing_snapshot();
6363 }
6364 
6365 static void
6366 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6367 {
6368 	unsigned long *count = (long *)data;
6369 
6370 	if (!*count)
6371 		return;
6372 
6373 	if (*count != -1)
6374 		(*count)--;
6375 
6376 	tracing_snapshot();
6377 }
6378 
6379 static int
6380 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6381 		      struct ftrace_probe_ops *ops, void *data)
6382 {
6383 	long count = (long)data;
6384 
6385 	seq_printf(m, "%ps:", (void *)ip);
6386 
6387 	seq_puts(m, "snapshot");
6388 
6389 	if (count == -1)
6390 		seq_puts(m, ":unlimited\n");
6391 	else
6392 		seq_printf(m, ":count=%ld\n", count);
6393 
6394 	return 0;
6395 }
6396 
6397 static struct ftrace_probe_ops snapshot_probe_ops = {
6398 	.func			= ftrace_snapshot,
6399 	.print			= ftrace_snapshot_print,
6400 };
6401 
6402 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6403 	.func			= ftrace_count_snapshot,
6404 	.print			= ftrace_snapshot_print,
6405 };
6406 
6407 static int
6408 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6409 			       char *glob, char *cmd, char *param, int enable)
6410 {
6411 	struct ftrace_probe_ops *ops;
6412 	void *count = (void *)-1;
6413 	char *number;
6414 	int ret;
6415 
6416 	/* hash funcs only work with set_ftrace_filter */
6417 	if (!enable)
6418 		return -EINVAL;
6419 
6420 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6421 
6422 	if (glob[0] == '!') {
6423 		unregister_ftrace_function_probe_func(glob+1, ops);
6424 		return 0;
6425 	}
6426 
6427 	if (!param)
6428 		goto out_reg;
6429 
6430 	number = strsep(&param, ":");
6431 
6432 	if (!strlen(number))
6433 		goto out_reg;
6434 
6435 	/*
6436 	 * We use the callback data field (which is a pointer)
6437 	 * as our counter.
6438 	 */
6439 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6440 	if (ret)
6441 		return ret;
6442 
6443  out_reg:
6444 	ret = register_ftrace_function_probe(glob, ops, count);
6445 
6446 	if (ret >= 0)
6447 		alloc_snapshot(&global_trace);
6448 
6449 	return ret < 0 ? ret : 0;
6450 }
6451 
6452 static struct ftrace_func_command ftrace_snapshot_cmd = {
6453 	.name			= "snapshot",
6454 	.func			= ftrace_trace_snapshot_callback,
6455 };
6456 
6457 static __init int register_snapshot_cmd(void)
6458 {
6459 	return register_ftrace_command(&ftrace_snapshot_cmd);
6460 }
6461 #else
6462 static inline __init int register_snapshot_cmd(void) { return 0; }
6463 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6464 
6465 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6466 {
6467 	if (WARN_ON(!tr->dir))
6468 		return ERR_PTR(-ENODEV);
6469 
6470 	/* Top directory uses NULL as the parent */
6471 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6472 		return NULL;
6473 
6474 	/* All sub buffers have a descriptor */
6475 	return tr->dir;
6476 }
6477 
6478 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6479 {
6480 	struct dentry *d_tracer;
6481 
6482 	if (tr->percpu_dir)
6483 		return tr->percpu_dir;
6484 
6485 	d_tracer = tracing_get_dentry(tr);
6486 	if (IS_ERR(d_tracer))
6487 		return NULL;
6488 
6489 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6490 
6491 	WARN_ONCE(!tr->percpu_dir,
6492 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6493 
6494 	return tr->percpu_dir;
6495 }
6496 
6497 static struct dentry *
6498 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6499 		      void *data, long cpu, const struct file_operations *fops)
6500 {
6501 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6502 
6503 	if (ret) /* See tracing_get_cpu() */
6504 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6505 	return ret;
6506 }
6507 
6508 static void
6509 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6510 {
6511 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6512 	struct dentry *d_cpu;
6513 	char cpu_dir[30]; /* 30 characters should be more than enough */
6514 
6515 	if (!d_percpu)
6516 		return;
6517 
6518 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6519 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6520 	if (!d_cpu) {
6521 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6522 		return;
6523 	}
6524 
6525 	/* per cpu trace_pipe */
6526 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6527 				tr, cpu, &tracing_pipe_fops);
6528 
6529 	/* per cpu trace */
6530 	trace_create_cpu_file("trace", 0644, d_cpu,
6531 				tr, cpu, &tracing_fops);
6532 
6533 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6534 				tr, cpu, &tracing_buffers_fops);
6535 
6536 	trace_create_cpu_file("stats", 0444, d_cpu,
6537 				tr, cpu, &tracing_stats_fops);
6538 
6539 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6540 				tr, cpu, &tracing_entries_fops);
6541 
6542 #ifdef CONFIG_TRACER_SNAPSHOT
6543 	trace_create_cpu_file("snapshot", 0644, d_cpu,
6544 				tr, cpu, &snapshot_fops);
6545 
6546 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6547 				tr, cpu, &snapshot_raw_fops);
6548 #endif
6549 }
6550 
6551 #ifdef CONFIG_FTRACE_SELFTEST
6552 /* Let selftest have access to static functions in this file */
6553 #include "trace_selftest.c"
6554 #endif
6555 
6556 static ssize_t
6557 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6558 			loff_t *ppos)
6559 {
6560 	struct trace_option_dentry *topt = filp->private_data;
6561 	char *buf;
6562 
6563 	if (topt->flags->val & topt->opt->bit)
6564 		buf = "1\n";
6565 	else
6566 		buf = "0\n";
6567 
6568 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6569 }
6570 
6571 static ssize_t
6572 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6573 			 loff_t *ppos)
6574 {
6575 	struct trace_option_dentry *topt = filp->private_data;
6576 	unsigned long val;
6577 	int ret;
6578 
6579 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6580 	if (ret)
6581 		return ret;
6582 
6583 	if (val != 0 && val != 1)
6584 		return -EINVAL;
6585 
6586 	if (!!(topt->flags->val & topt->opt->bit) != val) {
6587 		mutex_lock(&trace_types_lock);
6588 		ret = __set_tracer_option(topt->tr, topt->flags,
6589 					  topt->opt, !val);
6590 		mutex_unlock(&trace_types_lock);
6591 		if (ret)
6592 			return ret;
6593 	}
6594 
6595 	*ppos += cnt;
6596 
6597 	return cnt;
6598 }
6599 
6600 
6601 static const struct file_operations trace_options_fops = {
6602 	.open = tracing_open_generic,
6603 	.read = trace_options_read,
6604 	.write = trace_options_write,
6605 	.llseek	= generic_file_llseek,
6606 };
6607 
6608 /*
6609  * In order to pass in both the trace_array descriptor as well as the index
6610  * to the flag that the trace option file represents, the trace_array
6611  * has a character array of trace_flags_index[], which holds the index
6612  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6613  * The address of this character array is passed to the flag option file
6614  * read/write callbacks.
6615  *
6616  * In order to extract both the index and the trace_array descriptor,
6617  * get_tr_index() uses the following algorithm.
6618  *
6619  *   idx = *ptr;
6620  *
6621  * As the pointer itself contains the address of the index (remember
6622  * index[1] == 1).
6623  *
6624  * Then to get the trace_array descriptor, by subtracting that index
6625  * from the ptr, we get to the start of the index itself.
6626  *
6627  *   ptr - idx == &index[0]
6628  *
6629  * Then a simple container_of() from that pointer gets us to the
6630  * trace_array descriptor.
6631  */
6632 static void get_tr_index(void *data, struct trace_array **ptr,
6633 			 unsigned int *pindex)
6634 {
6635 	*pindex = *(unsigned char *)data;
6636 
6637 	*ptr = container_of(data - *pindex, struct trace_array,
6638 			    trace_flags_index);
6639 }
6640 
6641 static ssize_t
6642 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6643 			loff_t *ppos)
6644 {
6645 	void *tr_index = filp->private_data;
6646 	struct trace_array *tr;
6647 	unsigned int index;
6648 	char *buf;
6649 
6650 	get_tr_index(tr_index, &tr, &index);
6651 
6652 	if (tr->trace_flags & (1 << index))
6653 		buf = "1\n";
6654 	else
6655 		buf = "0\n";
6656 
6657 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6658 }
6659 
6660 static ssize_t
6661 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6662 			 loff_t *ppos)
6663 {
6664 	void *tr_index = filp->private_data;
6665 	struct trace_array *tr;
6666 	unsigned int index;
6667 	unsigned long val;
6668 	int ret;
6669 
6670 	get_tr_index(tr_index, &tr, &index);
6671 
6672 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6673 	if (ret)
6674 		return ret;
6675 
6676 	if (val != 0 && val != 1)
6677 		return -EINVAL;
6678 
6679 	mutex_lock(&trace_types_lock);
6680 	ret = set_tracer_flag(tr, 1 << index, val);
6681 	mutex_unlock(&trace_types_lock);
6682 
6683 	if (ret < 0)
6684 		return ret;
6685 
6686 	*ppos += cnt;
6687 
6688 	return cnt;
6689 }
6690 
6691 static const struct file_operations trace_options_core_fops = {
6692 	.open = tracing_open_generic,
6693 	.read = trace_options_core_read,
6694 	.write = trace_options_core_write,
6695 	.llseek = generic_file_llseek,
6696 };
6697 
6698 struct dentry *trace_create_file(const char *name,
6699 				 umode_t mode,
6700 				 struct dentry *parent,
6701 				 void *data,
6702 				 const struct file_operations *fops)
6703 {
6704 	struct dentry *ret;
6705 
6706 	ret = tracefs_create_file(name, mode, parent, data, fops);
6707 	if (!ret)
6708 		pr_warn("Could not create tracefs '%s' entry\n", name);
6709 
6710 	return ret;
6711 }
6712 
6713 
6714 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6715 {
6716 	struct dentry *d_tracer;
6717 
6718 	if (tr->options)
6719 		return tr->options;
6720 
6721 	d_tracer = tracing_get_dentry(tr);
6722 	if (IS_ERR(d_tracer))
6723 		return NULL;
6724 
6725 	tr->options = tracefs_create_dir("options", d_tracer);
6726 	if (!tr->options) {
6727 		pr_warn("Could not create tracefs directory 'options'\n");
6728 		return NULL;
6729 	}
6730 
6731 	return tr->options;
6732 }
6733 
6734 static void
6735 create_trace_option_file(struct trace_array *tr,
6736 			 struct trace_option_dentry *topt,
6737 			 struct tracer_flags *flags,
6738 			 struct tracer_opt *opt)
6739 {
6740 	struct dentry *t_options;
6741 
6742 	t_options = trace_options_init_dentry(tr);
6743 	if (!t_options)
6744 		return;
6745 
6746 	topt->flags = flags;
6747 	topt->opt = opt;
6748 	topt->tr = tr;
6749 
6750 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6751 				    &trace_options_fops);
6752 
6753 }
6754 
6755 static void
6756 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6757 {
6758 	struct trace_option_dentry *topts;
6759 	struct trace_options *tr_topts;
6760 	struct tracer_flags *flags;
6761 	struct tracer_opt *opts;
6762 	int cnt;
6763 	int i;
6764 
6765 	if (!tracer)
6766 		return;
6767 
6768 	flags = tracer->flags;
6769 
6770 	if (!flags || !flags->opts)
6771 		return;
6772 
6773 	/*
6774 	 * If this is an instance, only create flags for tracers
6775 	 * the instance may have.
6776 	 */
6777 	if (!trace_ok_for_array(tracer, tr))
6778 		return;
6779 
6780 	for (i = 0; i < tr->nr_topts; i++) {
6781 		/* Make sure there's no duplicate flags. */
6782 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6783 			return;
6784 	}
6785 
6786 	opts = flags->opts;
6787 
6788 	for (cnt = 0; opts[cnt].name; cnt++)
6789 		;
6790 
6791 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6792 	if (!topts)
6793 		return;
6794 
6795 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6796 			    GFP_KERNEL);
6797 	if (!tr_topts) {
6798 		kfree(topts);
6799 		return;
6800 	}
6801 
6802 	tr->topts = tr_topts;
6803 	tr->topts[tr->nr_topts].tracer = tracer;
6804 	tr->topts[tr->nr_topts].topts = topts;
6805 	tr->nr_topts++;
6806 
6807 	for (cnt = 0; opts[cnt].name; cnt++) {
6808 		create_trace_option_file(tr, &topts[cnt], flags,
6809 					 &opts[cnt]);
6810 		WARN_ONCE(topts[cnt].entry == NULL,
6811 			  "Failed to create trace option: %s",
6812 			  opts[cnt].name);
6813 	}
6814 }
6815 
6816 static struct dentry *
6817 create_trace_option_core_file(struct trace_array *tr,
6818 			      const char *option, long index)
6819 {
6820 	struct dentry *t_options;
6821 
6822 	t_options = trace_options_init_dentry(tr);
6823 	if (!t_options)
6824 		return NULL;
6825 
6826 	return trace_create_file(option, 0644, t_options,
6827 				 (void *)&tr->trace_flags_index[index],
6828 				 &trace_options_core_fops);
6829 }
6830 
6831 static void create_trace_options_dir(struct trace_array *tr)
6832 {
6833 	struct dentry *t_options;
6834 	bool top_level = tr == &global_trace;
6835 	int i;
6836 
6837 	t_options = trace_options_init_dentry(tr);
6838 	if (!t_options)
6839 		return;
6840 
6841 	for (i = 0; trace_options[i]; i++) {
6842 		if (top_level ||
6843 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6844 			create_trace_option_core_file(tr, trace_options[i], i);
6845 	}
6846 }
6847 
6848 static ssize_t
6849 rb_simple_read(struct file *filp, char __user *ubuf,
6850 	       size_t cnt, loff_t *ppos)
6851 {
6852 	struct trace_array *tr = filp->private_data;
6853 	char buf[64];
6854 	int r;
6855 
6856 	r = tracer_tracing_is_on(tr);
6857 	r = sprintf(buf, "%d\n", r);
6858 
6859 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6860 }
6861 
6862 static ssize_t
6863 rb_simple_write(struct file *filp, const char __user *ubuf,
6864 		size_t cnt, loff_t *ppos)
6865 {
6866 	struct trace_array *tr = filp->private_data;
6867 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6868 	unsigned long val;
6869 	int ret;
6870 
6871 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6872 	if (ret)
6873 		return ret;
6874 
6875 	if (buffer) {
6876 		mutex_lock(&trace_types_lock);
6877 		if (val) {
6878 			tracer_tracing_on(tr);
6879 			if (tr->current_trace->start)
6880 				tr->current_trace->start(tr);
6881 		} else {
6882 			tracer_tracing_off(tr);
6883 			if (tr->current_trace->stop)
6884 				tr->current_trace->stop(tr);
6885 		}
6886 		mutex_unlock(&trace_types_lock);
6887 	}
6888 
6889 	(*ppos)++;
6890 
6891 	return cnt;
6892 }
6893 
6894 static const struct file_operations rb_simple_fops = {
6895 	.open		= tracing_open_generic_tr,
6896 	.read		= rb_simple_read,
6897 	.write		= rb_simple_write,
6898 	.release	= tracing_release_generic_tr,
6899 	.llseek		= default_llseek,
6900 };
6901 
6902 struct dentry *trace_instance_dir;
6903 
6904 static void
6905 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6906 
6907 static int
6908 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6909 {
6910 	enum ring_buffer_flags rb_flags;
6911 
6912 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6913 
6914 	buf->tr = tr;
6915 
6916 	buf->buffer = ring_buffer_alloc(size, rb_flags);
6917 	if (!buf->buffer)
6918 		return -ENOMEM;
6919 
6920 	buf->data = alloc_percpu(struct trace_array_cpu);
6921 	if (!buf->data) {
6922 		ring_buffer_free(buf->buffer);
6923 		return -ENOMEM;
6924 	}
6925 
6926 	/* Allocate the first page for all buffers */
6927 	set_buffer_entries(&tr->trace_buffer,
6928 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6929 
6930 	return 0;
6931 }
6932 
6933 static int allocate_trace_buffers(struct trace_array *tr, int size)
6934 {
6935 	int ret;
6936 
6937 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6938 	if (ret)
6939 		return ret;
6940 
6941 #ifdef CONFIG_TRACER_MAX_TRACE
6942 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6943 				    allocate_snapshot ? size : 1);
6944 	if (WARN_ON(ret)) {
6945 		ring_buffer_free(tr->trace_buffer.buffer);
6946 		free_percpu(tr->trace_buffer.data);
6947 		return -ENOMEM;
6948 	}
6949 	tr->allocated_snapshot = allocate_snapshot;
6950 
6951 	/*
6952 	 * Only the top level trace array gets its snapshot allocated
6953 	 * from the kernel command line.
6954 	 */
6955 	allocate_snapshot = false;
6956 #endif
6957 	return 0;
6958 }
6959 
6960 static void free_trace_buffer(struct trace_buffer *buf)
6961 {
6962 	if (buf->buffer) {
6963 		ring_buffer_free(buf->buffer);
6964 		buf->buffer = NULL;
6965 		free_percpu(buf->data);
6966 		buf->data = NULL;
6967 	}
6968 }
6969 
6970 static void free_trace_buffers(struct trace_array *tr)
6971 {
6972 	if (!tr)
6973 		return;
6974 
6975 	free_trace_buffer(&tr->trace_buffer);
6976 
6977 #ifdef CONFIG_TRACER_MAX_TRACE
6978 	free_trace_buffer(&tr->max_buffer);
6979 #endif
6980 }
6981 
6982 static void init_trace_flags_index(struct trace_array *tr)
6983 {
6984 	int i;
6985 
6986 	/* Used by the trace options files */
6987 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6988 		tr->trace_flags_index[i] = i;
6989 }
6990 
6991 static void __update_tracer_options(struct trace_array *tr)
6992 {
6993 	struct tracer *t;
6994 
6995 	for (t = trace_types; t; t = t->next)
6996 		add_tracer_options(tr, t);
6997 }
6998 
6999 static void update_tracer_options(struct trace_array *tr)
7000 {
7001 	mutex_lock(&trace_types_lock);
7002 	__update_tracer_options(tr);
7003 	mutex_unlock(&trace_types_lock);
7004 }
7005 
7006 static int instance_mkdir(const char *name)
7007 {
7008 	struct trace_array *tr;
7009 	int ret;
7010 
7011 	mutex_lock(&trace_types_lock);
7012 
7013 	ret = -EEXIST;
7014 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7015 		if (tr->name && strcmp(tr->name, name) == 0)
7016 			goto out_unlock;
7017 	}
7018 
7019 	ret = -ENOMEM;
7020 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7021 	if (!tr)
7022 		goto out_unlock;
7023 
7024 	tr->name = kstrdup(name, GFP_KERNEL);
7025 	if (!tr->name)
7026 		goto out_free_tr;
7027 
7028 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7029 		goto out_free_tr;
7030 
7031 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7032 
7033 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7034 
7035 	raw_spin_lock_init(&tr->start_lock);
7036 
7037 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7038 
7039 	tr->current_trace = &nop_trace;
7040 
7041 	INIT_LIST_HEAD(&tr->systems);
7042 	INIT_LIST_HEAD(&tr->events);
7043 
7044 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7045 		goto out_free_tr;
7046 
7047 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7048 	if (!tr->dir)
7049 		goto out_free_tr;
7050 
7051 	ret = event_trace_add_tracer(tr->dir, tr);
7052 	if (ret) {
7053 		tracefs_remove_recursive(tr->dir);
7054 		goto out_free_tr;
7055 	}
7056 
7057 	init_tracer_tracefs(tr, tr->dir);
7058 	init_trace_flags_index(tr);
7059 	__update_tracer_options(tr);
7060 
7061 	list_add(&tr->list, &ftrace_trace_arrays);
7062 
7063 	mutex_unlock(&trace_types_lock);
7064 
7065 	return 0;
7066 
7067  out_free_tr:
7068 	free_trace_buffers(tr);
7069 	free_cpumask_var(tr->tracing_cpumask);
7070 	kfree(tr->name);
7071 	kfree(tr);
7072 
7073  out_unlock:
7074 	mutex_unlock(&trace_types_lock);
7075 
7076 	return ret;
7077 
7078 }
7079 
7080 static int instance_rmdir(const char *name)
7081 {
7082 	struct trace_array *tr;
7083 	int found = 0;
7084 	int ret;
7085 	int i;
7086 
7087 	mutex_lock(&trace_types_lock);
7088 
7089 	ret = -ENODEV;
7090 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7091 		if (tr->name && strcmp(tr->name, name) == 0) {
7092 			found = 1;
7093 			break;
7094 		}
7095 	}
7096 	if (!found)
7097 		goto out_unlock;
7098 
7099 	ret = -EBUSY;
7100 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7101 		goto out_unlock;
7102 
7103 	list_del(&tr->list);
7104 
7105 	/* Disable all the flags that were enabled coming in */
7106 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7107 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7108 			set_tracer_flag(tr, 1 << i, 0);
7109 	}
7110 
7111 	tracing_set_nop(tr);
7112 	event_trace_del_tracer(tr);
7113 	ftrace_destroy_function_files(tr);
7114 	tracefs_remove_recursive(tr->dir);
7115 	free_trace_buffers(tr);
7116 
7117 	for (i = 0; i < tr->nr_topts; i++) {
7118 		kfree(tr->topts[i].topts);
7119 	}
7120 	kfree(tr->topts);
7121 
7122 	kfree(tr->name);
7123 	kfree(tr);
7124 
7125 	ret = 0;
7126 
7127  out_unlock:
7128 	mutex_unlock(&trace_types_lock);
7129 
7130 	return ret;
7131 }
7132 
7133 static __init void create_trace_instances(struct dentry *d_tracer)
7134 {
7135 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7136 							 instance_mkdir,
7137 							 instance_rmdir);
7138 	if (WARN_ON(!trace_instance_dir))
7139 		return;
7140 }
7141 
7142 static void
7143 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7144 {
7145 	int cpu;
7146 
7147 	trace_create_file("available_tracers", 0444, d_tracer,
7148 			tr, &show_traces_fops);
7149 
7150 	trace_create_file("current_tracer", 0644, d_tracer,
7151 			tr, &set_tracer_fops);
7152 
7153 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7154 			  tr, &tracing_cpumask_fops);
7155 
7156 	trace_create_file("trace_options", 0644, d_tracer,
7157 			  tr, &tracing_iter_fops);
7158 
7159 	trace_create_file("trace", 0644, d_tracer,
7160 			  tr, &tracing_fops);
7161 
7162 	trace_create_file("trace_pipe", 0444, d_tracer,
7163 			  tr, &tracing_pipe_fops);
7164 
7165 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7166 			  tr, &tracing_entries_fops);
7167 
7168 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7169 			  tr, &tracing_total_entries_fops);
7170 
7171 	trace_create_file("free_buffer", 0200, d_tracer,
7172 			  tr, &tracing_free_buffer_fops);
7173 
7174 	trace_create_file("trace_marker", 0220, d_tracer,
7175 			  tr, &tracing_mark_fops);
7176 
7177 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7178 			  &trace_clock_fops);
7179 
7180 	trace_create_file("tracing_on", 0644, d_tracer,
7181 			  tr, &rb_simple_fops);
7182 
7183 	create_trace_options_dir(tr);
7184 
7185 #ifdef CONFIG_TRACER_MAX_TRACE
7186 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7187 			&tr->max_latency, &tracing_max_lat_fops);
7188 #endif
7189 
7190 	if (ftrace_create_function_files(tr, d_tracer))
7191 		WARN(1, "Could not allocate function filter files");
7192 
7193 #ifdef CONFIG_TRACER_SNAPSHOT
7194 	trace_create_file("snapshot", 0644, d_tracer,
7195 			  tr, &snapshot_fops);
7196 #endif
7197 
7198 	for_each_tracing_cpu(cpu)
7199 		tracing_init_tracefs_percpu(tr, cpu);
7200 
7201 	ftrace_init_tracefs(tr, d_tracer);
7202 }
7203 
7204 static struct vfsmount *trace_automount(void *ingore)
7205 {
7206 	struct vfsmount *mnt;
7207 	struct file_system_type *type;
7208 
7209 	/*
7210 	 * To maintain backward compatibility for tools that mount
7211 	 * debugfs to get to the tracing facility, tracefs is automatically
7212 	 * mounted to the debugfs/tracing directory.
7213 	 */
7214 	type = get_fs_type("tracefs");
7215 	if (!type)
7216 		return NULL;
7217 	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7218 	put_filesystem(type);
7219 	if (IS_ERR(mnt))
7220 		return NULL;
7221 	mntget(mnt);
7222 
7223 	return mnt;
7224 }
7225 
7226 /**
7227  * tracing_init_dentry - initialize top level trace array
7228  *
7229  * This is called when creating files or directories in the tracing
7230  * directory. It is called via fs_initcall() by any of the boot up code
7231  * and expects to return the dentry of the top level tracing directory.
7232  */
7233 struct dentry *tracing_init_dentry(void)
7234 {
7235 	struct trace_array *tr = &global_trace;
7236 
7237 	/* The top level trace array uses  NULL as parent */
7238 	if (tr->dir)
7239 		return NULL;
7240 
7241 	if (WARN_ON(!tracefs_initialized()) ||
7242 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7243 		 WARN_ON(!debugfs_initialized())))
7244 		return ERR_PTR(-ENODEV);
7245 
7246 	/*
7247 	 * As there may still be users that expect the tracing
7248 	 * files to exist in debugfs/tracing, we must automount
7249 	 * the tracefs file system there, so older tools still
7250 	 * work with the newer kerenl.
7251 	 */
7252 	tr->dir = debugfs_create_automount("tracing", NULL,
7253 					   trace_automount, NULL);
7254 	if (!tr->dir) {
7255 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7256 		return ERR_PTR(-ENOMEM);
7257 	}
7258 
7259 	return NULL;
7260 }
7261 
7262 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7263 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7264 
7265 static void __init trace_enum_init(void)
7266 {
7267 	int len;
7268 
7269 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7270 	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7271 }
7272 
7273 #ifdef CONFIG_MODULES
7274 static void trace_module_add_enums(struct module *mod)
7275 {
7276 	if (!mod->num_trace_enums)
7277 		return;
7278 
7279 	/*
7280 	 * Modules with bad taint do not have events created, do
7281 	 * not bother with enums either.
7282 	 */
7283 	if (trace_module_has_bad_taint(mod))
7284 		return;
7285 
7286 	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7287 }
7288 
7289 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7290 static void trace_module_remove_enums(struct module *mod)
7291 {
7292 	union trace_enum_map_item *map;
7293 	union trace_enum_map_item **last = &trace_enum_maps;
7294 
7295 	if (!mod->num_trace_enums)
7296 		return;
7297 
7298 	mutex_lock(&trace_enum_mutex);
7299 
7300 	map = trace_enum_maps;
7301 
7302 	while (map) {
7303 		if (map->head.mod == mod)
7304 			break;
7305 		map = trace_enum_jmp_to_tail(map);
7306 		last = &map->tail.next;
7307 		map = map->tail.next;
7308 	}
7309 	if (!map)
7310 		goto out;
7311 
7312 	*last = trace_enum_jmp_to_tail(map)->tail.next;
7313 	kfree(map);
7314  out:
7315 	mutex_unlock(&trace_enum_mutex);
7316 }
7317 #else
7318 static inline void trace_module_remove_enums(struct module *mod) { }
7319 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7320 
7321 static int trace_module_notify(struct notifier_block *self,
7322 			       unsigned long val, void *data)
7323 {
7324 	struct module *mod = data;
7325 
7326 	switch (val) {
7327 	case MODULE_STATE_COMING:
7328 		trace_module_add_enums(mod);
7329 		break;
7330 	case MODULE_STATE_GOING:
7331 		trace_module_remove_enums(mod);
7332 		break;
7333 	}
7334 
7335 	return 0;
7336 }
7337 
7338 static struct notifier_block trace_module_nb = {
7339 	.notifier_call = trace_module_notify,
7340 	.priority = 0,
7341 };
7342 #endif /* CONFIG_MODULES */
7343 
7344 static __init int tracer_init_tracefs(void)
7345 {
7346 	struct dentry *d_tracer;
7347 
7348 	trace_access_lock_init();
7349 
7350 	d_tracer = tracing_init_dentry();
7351 	if (IS_ERR(d_tracer))
7352 		return 0;
7353 
7354 	init_tracer_tracefs(&global_trace, d_tracer);
7355 
7356 	trace_create_file("tracing_thresh", 0644, d_tracer,
7357 			&global_trace, &tracing_thresh_fops);
7358 
7359 	trace_create_file("README", 0444, d_tracer,
7360 			NULL, &tracing_readme_fops);
7361 
7362 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7363 			NULL, &tracing_saved_cmdlines_fops);
7364 
7365 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7366 			  NULL, &tracing_saved_cmdlines_size_fops);
7367 
7368 	trace_enum_init();
7369 
7370 	trace_create_enum_file(d_tracer);
7371 
7372 #ifdef CONFIG_MODULES
7373 	register_module_notifier(&trace_module_nb);
7374 #endif
7375 
7376 #ifdef CONFIG_DYNAMIC_FTRACE
7377 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7378 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7379 #endif
7380 
7381 	create_trace_instances(d_tracer);
7382 
7383 	update_tracer_options(&global_trace);
7384 
7385 	return 0;
7386 }
7387 
7388 static int trace_panic_handler(struct notifier_block *this,
7389 			       unsigned long event, void *unused)
7390 {
7391 	if (ftrace_dump_on_oops)
7392 		ftrace_dump(ftrace_dump_on_oops);
7393 	return NOTIFY_OK;
7394 }
7395 
7396 static struct notifier_block trace_panic_notifier = {
7397 	.notifier_call  = trace_panic_handler,
7398 	.next           = NULL,
7399 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7400 };
7401 
7402 static int trace_die_handler(struct notifier_block *self,
7403 			     unsigned long val,
7404 			     void *data)
7405 {
7406 	switch (val) {
7407 	case DIE_OOPS:
7408 		if (ftrace_dump_on_oops)
7409 			ftrace_dump(ftrace_dump_on_oops);
7410 		break;
7411 	default:
7412 		break;
7413 	}
7414 	return NOTIFY_OK;
7415 }
7416 
7417 static struct notifier_block trace_die_notifier = {
7418 	.notifier_call = trace_die_handler,
7419 	.priority = 200
7420 };
7421 
7422 /*
7423  * printk is set to max of 1024, we really don't need it that big.
7424  * Nothing should be printing 1000 characters anyway.
7425  */
7426 #define TRACE_MAX_PRINT		1000
7427 
7428 /*
7429  * Define here KERN_TRACE so that we have one place to modify
7430  * it if we decide to change what log level the ftrace dump
7431  * should be at.
7432  */
7433 #define KERN_TRACE		KERN_EMERG
7434 
7435 void
7436 trace_printk_seq(struct trace_seq *s)
7437 {
7438 	/* Probably should print a warning here. */
7439 	if (s->seq.len >= TRACE_MAX_PRINT)
7440 		s->seq.len = TRACE_MAX_PRINT;
7441 
7442 	/*
7443 	 * More paranoid code. Although the buffer size is set to
7444 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7445 	 * an extra layer of protection.
7446 	 */
7447 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7448 		s->seq.len = s->seq.size - 1;
7449 
7450 	/* should be zero ended, but we are paranoid. */
7451 	s->buffer[s->seq.len] = 0;
7452 
7453 	printk(KERN_TRACE "%s", s->buffer);
7454 
7455 	trace_seq_init(s);
7456 }
7457 
7458 void trace_init_global_iter(struct trace_iterator *iter)
7459 {
7460 	iter->tr = &global_trace;
7461 	iter->trace = iter->tr->current_trace;
7462 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7463 	iter->trace_buffer = &global_trace.trace_buffer;
7464 
7465 	if (iter->trace && iter->trace->open)
7466 		iter->trace->open(iter);
7467 
7468 	/* Annotate start of buffers if we had overruns */
7469 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7470 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7471 
7472 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7473 	if (trace_clocks[iter->tr->clock_id].in_ns)
7474 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7475 }
7476 
7477 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7478 {
7479 	/* use static because iter can be a bit big for the stack */
7480 	static struct trace_iterator iter;
7481 	static atomic_t dump_running;
7482 	struct trace_array *tr = &global_trace;
7483 	unsigned int old_userobj;
7484 	unsigned long flags;
7485 	int cnt = 0, cpu;
7486 
7487 	/* Only allow one dump user at a time. */
7488 	if (atomic_inc_return(&dump_running) != 1) {
7489 		atomic_dec(&dump_running);
7490 		return;
7491 	}
7492 
7493 	/*
7494 	 * Always turn off tracing when we dump.
7495 	 * We don't need to show trace output of what happens
7496 	 * between multiple crashes.
7497 	 *
7498 	 * If the user does a sysrq-z, then they can re-enable
7499 	 * tracing with echo 1 > tracing_on.
7500 	 */
7501 	tracing_off();
7502 
7503 	local_irq_save(flags);
7504 
7505 	/* Simulate the iterator */
7506 	trace_init_global_iter(&iter);
7507 
7508 	for_each_tracing_cpu(cpu) {
7509 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7510 	}
7511 
7512 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7513 
7514 	/* don't look at user memory in panic mode */
7515 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7516 
7517 	switch (oops_dump_mode) {
7518 	case DUMP_ALL:
7519 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7520 		break;
7521 	case DUMP_ORIG:
7522 		iter.cpu_file = raw_smp_processor_id();
7523 		break;
7524 	case DUMP_NONE:
7525 		goto out_enable;
7526 	default:
7527 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7528 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7529 	}
7530 
7531 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7532 
7533 	/* Did function tracer already get disabled? */
7534 	if (ftrace_is_dead()) {
7535 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7536 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7537 	}
7538 
7539 	/*
7540 	 * We need to stop all tracing on all CPUS to read the
7541 	 * the next buffer. This is a bit expensive, but is
7542 	 * not done often. We fill all what we can read,
7543 	 * and then release the locks again.
7544 	 */
7545 
7546 	while (!trace_empty(&iter)) {
7547 
7548 		if (!cnt)
7549 			printk(KERN_TRACE "---------------------------------\n");
7550 
7551 		cnt++;
7552 
7553 		/* reset all but tr, trace, and overruns */
7554 		memset(&iter.seq, 0,
7555 		       sizeof(struct trace_iterator) -
7556 		       offsetof(struct trace_iterator, seq));
7557 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7558 		iter.pos = -1;
7559 
7560 		if (trace_find_next_entry_inc(&iter) != NULL) {
7561 			int ret;
7562 
7563 			ret = print_trace_line(&iter);
7564 			if (ret != TRACE_TYPE_NO_CONSUME)
7565 				trace_consume(&iter);
7566 		}
7567 		touch_nmi_watchdog();
7568 
7569 		trace_printk_seq(&iter.seq);
7570 	}
7571 
7572 	if (!cnt)
7573 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7574 	else
7575 		printk(KERN_TRACE "---------------------------------\n");
7576 
7577  out_enable:
7578 	tr->trace_flags |= old_userobj;
7579 
7580 	for_each_tracing_cpu(cpu) {
7581 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7582 	}
7583  	atomic_dec(&dump_running);
7584 	local_irq_restore(flags);
7585 }
7586 EXPORT_SYMBOL_GPL(ftrace_dump);
7587 
7588 __init static int tracer_alloc_buffers(void)
7589 {
7590 	int ring_buf_size;
7591 	int ret = -ENOMEM;
7592 
7593 	/*
7594 	 * Make sure we don't accidently add more trace options
7595 	 * than we have bits for.
7596 	 */
7597 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7598 
7599 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7600 		goto out;
7601 
7602 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7603 		goto out_free_buffer_mask;
7604 
7605 	/* Only allocate trace_printk buffers if a trace_printk exists */
7606 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7607 		/* Must be called before global_trace.buffer is allocated */
7608 		trace_printk_init_buffers();
7609 
7610 	/* To save memory, keep the ring buffer size to its minimum */
7611 	if (ring_buffer_expanded)
7612 		ring_buf_size = trace_buf_size;
7613 	else
7614 		ring_buf_size = 1;
7615 
7616 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7617 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7618 
7619 	raw_spin_lock_init(&global_trace.start_lock);
7620 
7621 	/* Used for event triggers */
7622 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7623 	if (!temp_buffer)
7624 		goto out_free_cpumask;
7625 
7626 	if (trace_create_savedcmd() < 0)
7627 		goto out_free_temp_buffer;
7628 
7629 	/* TODO: make the number of buffers hot pluggable with CPUS */
7630 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7631 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7632 		WARN_ON(1);
7633 		goto out_free_savedcmd;
7634 	}
7635 
7636 	if (global_trace.buffer_disabled)
7637 		tracing_off();
7638 
7639 	if (trace_boot_clock) {
7640 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7641 		if (ret < 0)
7642 			pr_warn("Trace clock %s not defined, going back to default\n",
7643 				trace_boot_clock);
7644 	}
7645 
7646 	/*
7647 	 * register_tracer() might reference current_trace, so it
7648 	 * needs to be set before we register anything. This is
7649 	 * just a bootstrap of current_trace anyway.
7650 	 */
7651 	global_trace.current_trace = &nop_trace;
7652 
7653 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7654 
7655 	ftrace_init_global_array_ops(&global_trace);
7656 
7657 	init_trace_flags_index(&global_trace);
7658 
7659 	register_tracer(&nop_trace);
7660 
7661 	/* All seems OK, enable tracing */
7662 	tracing_disabled = 0;
7663 
7664 	atomic_notifier_chain_register(&panic_notifier_list,
7665 				       &trace_panic_notifier);
7666 
7667 	register_die_notifier(&trace_die_notifier);
7668 
7669 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7670 
7671 	INIT_LIST_HEAD(&global_trace.systems);
7672 	INIT_LIST_HEAD(&global_trace.events);
7673 	list_add(&global_trace.list, &ftrace_trace_arrays);
7674 
7675 	apply_trace_boot_options();
7676 
7677 	register_snapshot_cmd();
7678 
7679 	return 0;
7680 
7681 out_free_savedcmd:
7682 	free_saved_cmdlines_buffer(savedcmd);
7683 out_free_temp_buffer:
7684 	ring_buffer_free(temp_buffer);
7685 out_free_cpumask:
7686 	free_cpumask_var(global_trace.tracing_cpumask);
7687 out_free_buffer_mask:
7688 	free_cpumask_var(tracing_buffer_mask);
7689 out:
7690 	return ret;
7691 }
7692 
7693 void __init trace_init(void)
7694 {
7695 	if (tracepoint_printk) {
7696 		tracepoint_print_iter =
7697 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7698 		if (WARN_ON(!tracepoint_print_iter))
7699 			tracepoint_printk = 0;
7700 	}
7701 	tracer_alloc_buffers();
7702 	trace_event_init();
7703 }
7704 
7705 __init static int clear_boot_tracer(void)
7706 {
7707 	/*
7708 	 * The default tracer at boot buffer is an init section.
7709 	 * This function is called in lateinit. If we did not
7710 	 * find the boot tracer, then clear it out, to prevent
7711 	 * later registration from accessing the buffer that is
7712 	 * about to be freed.
7713 	 */
7714 	if (!default_bootup_tracer)
7715 		return 0;
7716 
7717 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7718 	       default_bootup_tracer);
7719 	default_bootup_tracer = NULL;
7720 
7721 	return 0;
7722 }
7723 
7724 fs_initcall(tracer_init_tracefs);
7725 late_initcall(clear_boot_tracer);
7726