xref: /linux-6.15/kernel/trace/trace.c (revision 228cd2db)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
6  * Copyright (C) 2008 Ingo Molnar <[email protected]>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <[email protected]>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47 
48 #include "trace.h"
49 #include "trace_output.h"
50 
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56 
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65 
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70 
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75 
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 	{ }
79 };
80 
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 	return 0;
85 }
86 
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93 
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101 
102 cpumask_var_t __read_mostly	tracing_buffer_mask;
103 
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119 
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121 
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124 
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 	struct module			*mod;
129 	unsigned long			length;
130 };
131 
132 union trace_eval_map_item;
133 
134 struct trace_eval_map_tail {
135 	/*
136 	 * "end" is first and points to NULL as it must be different
137 	 * than "mod" or "eval_string"
138 	 */
139 	union trace_eval_map_item	*next;
140 	const char			*end;	/* points to NULL */
141 };
142 
143 static DEFINE_MUTEX(trace_eval_mutex);
144 
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153 	struct trace_eval_map		map;
154 	struct trace_eval_map_head	head;
155 	struct trace_eval_map_tail	tail;
156 };
157 
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160 
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 
163 #define MAX_TRACER_SIZE		100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166 
167 static bool allocate_snapshot;
168 
169 static int __init set_cmdline_ftrace(char *str)
170 {
171 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 	default_bootup_tracer = bootup_tracer_buf;
173 	/* We are using ftrace early, expand it */
174 	ring_buffer_expanded = true;
175 	return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178 
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181 	if (*str++ != '=' || !*str) {
182 		ftrace_dump_on_oops = DUMP_ALL;
183 		return 1;
184 	}
185 
186 	if (!strcmp("orig_cpu", str)) {
187 		ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190 
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194 
195 static int __init stop_trace_on_warning(char *str)
196 {
197 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 		__disable_trace_on_warning = 1;
199 	return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202 
203 static int __init boot_alloc_snapshot(char *str)
204 {
205 	allocate_snapshot = true;
206 	/* We also need the main ring buffer expanded */
207 	ring_buffer_expanded = true;
208 	return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211 
212 
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214 
215 static int __init set_trace_boot_options(char *str)
216 {
217 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 	return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221 
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224 
225 static int __init set_trace_boot_clock(char *str)
226 {
227 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 	trace_boot_clock = trace_boot_clock_buf;
229 	return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232 
233 static int __init set_tracepoint_printk(char *str)
234 {
235 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 		tracepoint_printk = 1;
237 	return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240 
241 unsigned long long ns2usecs(u64 nsec)
242 {
243 	nsec += 500;
244 	do_div(nsec, 1000);
245 	return nsec;
246 }
247 
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS						\
250 	(FUNCTION_DEFAULT_FLAGS |					\
251 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
252 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
253 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
254 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255 
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
258 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259 
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269 	.trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271 
272 LIST_HEAD(ftrace_trace_arrays);
273 
274 int trace_array_get(struct trace_array *this_tr)
275 {
276 	struct trace_array *tr;
277 	int ret = -ENODEV;
278 
279 	mutex_lock(&trace_types_lock);
280 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 		if (tr == this_tr) {
282 			tr->ref++;
283 			ret = 0;
284 			break;
285 		}
286 	}
287 	mutex_unlock(&trace_types_lock);
288 
289 	return ret;
290 }
291 
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294 	WARN_ON(!this_tr->ref);
295 	this_tr->ref--;
296 }
297 
298 void trace_array_put(struct trace_array *this_tr)
299 {
300 	mutex_lock(&trace_types_lock);
301 	__trace_array_put(this_tr);
302 	mutex_unlock(&trace_types_lock);
303 }
304 
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 			      struct ring_buffer *buffer,
307 			      struct ring_buffer_event *event)
308 {
309 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 	    !filter_match_preds(call->filter, rec)) {
311 		__trace_event_discard_commit(buffer, event);
312 		return 1;
313 	}
314 
315 	return 0;
316 }
317 
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320 	vfree(pid_list->pids);
321 	kfree(pid_list);
322 }
323 
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334 	/*
335 	 * If pid_max changed after filtered_pids was created, we
336 	 * by default ignore all pids greater than the previous pid_max.
337 	 */
338 	if (search_pid >= filtered_pids->pid_max)
339 		return false;
340 
341 	return test_bit(search_pid, filtered_pids->pids);
342 }
343 
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356 	/*
357 	 * Return false, because if filtered_pids does not exist,
358 	 * all pids are good to trace.
359 	 */
360 	if (!filtered_pids)
361 		return false;
362 
363 	return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365 
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 				  struct task_struct *self,
380 				  struct task_struct *task)
381 {
382 	if (!pid_list)
383 		return;
384 
385 	/* For forks, we only add if the forking task is listed */
386 	if (self) {
387 		if (!trace_find_filtered_pid(pid_list, self->pid))
388 			return;
389 	}
390 
391 	/* Sorry, but we don't support pid_max changing after setting */
392 	if (task->pid >= pid_list->pid_max)
393 		return;
394 
395 	/* "self" is set for forks, and NULL for exits */
396 	if (self)
397 		set_bit(task->pid, pid_list->pids);
398 	else
399 		clear_bit(task->pid, pid_list->pids);
400 }
401 
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416 	unsigned long pid = (unsigned long)v;
417 
418 	(*pos)++;
419 
420 	/* pid already is +1 of the actual prevous bit */
421 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422 
423 	/* Return pid + 1 to allow zero to be represented */
424 	if (pid < pid_list->pid_max)
425 		return (void *)(pid + 1);
426 
427 	return NULL;
428 }
429 
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443 	unsigned long pid;
444 	loff_t l = 0;
445 
446 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 	if (pid >= pid_list->pid_max)
448 		return NULL;
449 
450 	/* Return pid + 1 so that zero can be the exit value */
451 	for (pid++; pid && l < *pos;
452 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 		;
454 	return (void *)pid;
455 }
456 
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467 	unsigned long pid = (unsigned long)v - 1;
468 
469 	seq_printf(m, "%lu\n", pid);
470 	return 0;
471 }
472 
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE		127
475 
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477 		    struct trace_pid_list **new_pid_list,
478 		    const char __user *ubuf, size_t cnt)
479 {
480 	struct trace_pid_list *pid_list;
481 	struct trace_parser parser;
482 	unsigned long val;
483 	int nr_pids = 0;
484 	ssize_t read = 0;
485 	ssize_t ret = 0;
486 	loff_t pos;
487 	pid_t pid;
488 
489 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 		return -ENOMEM;
491 
492 	/*
493 	 * Always recreate a new array. The write is an all or nothing
494 	 * operation. Always create a new array when adding new pids by
495 	 * the user. If the operation fails, then the current list is
496 	 * not modified.
497 	 */
498 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 	if (!pid_list)
500 		return -ENOMEM;
501 
502 	pid_list->pid_max = READ_ONCE(pid_max);
503 
504 	/* Only truncating will shrink pid_max */
505 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 		pid_list->pid_max = filtered_pids->pid_max;
507 
508 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 	if (!pid_list->pids) {
510 		kfree(pid_list);
511 		return -ENOMEM;
512 	}
513 
514 	if (filtered_pids) {
515 		/* copy the current bits to the new max */
516 		for_each_set_bit(pid, filtered_pids->pids,
517 				 filtered_pids->pid_max) {
518 			set_bit(pid, pid_list->pids);
519 			nr_pids++;
520 		}
521 	}
522 
523 	while (cnt > 0) {
524 
525 		pos = 0;
526 
527 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
528 		if (ret < 0 || !trace_parser_loaded(&parser))
529 			break;
530 
531 		read += ret;
532 		ubuf += ret;
533 		cnt -= ret;
534 
535 		ret = -EINVAL;
536 		if (kstrtoul(parser.buffer, 0, &val))
537 			break;
538 		if (val >= pid_list->pid_max)
539 			break;
540 
541 		pid = (pid_t)val;
542 
543 		set_bit(pid, pid_list->pids);
544 		nr_pids++;
545 
546 		trace_parser_clear(&parser);
547 		ret = 0;
548 	}
549 	trace_parser_put(&parser);
550 
551 	if (ret < 0) {
552 		trace_free_pid_list(pid_list);
553 		return ret;
554 	}
555 
556 	if (!nr_pids) {
557 		/* Cleared the list of pids */
558 		trace_free_pid_list(pid_list);
559 		read = ret;
560 		pid_list = NULL;
561 	}
562 
563 	*new_pid_list = pid_list;
564 
565 	return read;
566 }
567 
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 	u64 ts;
571 
572 	/* Early boot up does not have a buffer yet */
573 	if (!buf->buffer)
574 		return trace_clock_local();
575 
576 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578 
579 	return ts;
580 }
581 
582 u64 ftrace_now(int cpu)
583 {
584 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586 
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598 	/*
599 	 * For quick access (irqsoff uses this in fast path), just
600 	 * return the mirror variable of the state of the ring buffer.
601 	 * It's a little racy, but we don't really care.
602 	 */
603 	smp_rmb();
604 	return !global_trace.buffer_disabled;
605 }
606 
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
618 
619 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620 
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer		*trace_types __read_mostly;
623 
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628 
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650 
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654 
655 static inline void trace_access_lock(int cpu)
656 {
657 	if (cpu == RING_BUFFER_ALL_CPUS) {
658 		/* gain it for accessing the whole ring buffer. */
659 		down_write(&all_cpu_access_lock);
660 	} else {
661 		/* gain it for accessing a cpu ring buffer. */
662 
663 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 		down_read(&all_cpu_access_lock);
665 
666 		/* Secondly block other access to this @cpu ring buffer. */
667 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 	}
669 }
670 
671 static inline void trace_access_unlock(int cpu)
672 {
673 	if (cpu == RING_BUFFER_ALL_CPUS) {
674 		up_write(&all_cpu_access_lock);
675 	} else {
676 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 		up_read(&all_cpu_access_lock);
678 	}
679 }
680 
681 static inline void trace_access_lock_init(void)
682 {
683 	int cpu;
684 
685 	for_each_possible_cpu(cpu)
686 		mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688 
689 #else
690 
691 static DEFINE_MUTEX(access_lock);
692 
693 static inline void trace_access_lock(int cpu)
694 {
695 	(void)cpu;
696 	mutex_lock(&access_lock);
697 }
698 
699 static inline void trace_access_unlock(int cpu)
700 {
701 	(void)cpu;
702 	mutex_unlock(&access_lock);
703 }
704 
705 static inline void trace_access_lock_init(void)
706 {
707 }
708 
709 #endif
710 
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 				 unsigned long flags,
714 				 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 				      struct ring_buffer *buffer,
717 				      unsigned long flags,
718 				      int skip, int pc, struct pt_regs *regs);
719 
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 					unsigned long flags,
723 					int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 				      struct ring_buffer *buffer,
728 				      unsigned long flags,
729 				      int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 
733 #endif
734 
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 		  int type, unsigned long flags, int pc)
738 {
739 	struct trace_entry *ent = ring_buffer_event_data(event);
740 
741 	tracing_generic_entry_update(ent, flags, pc);
742 	ent->type = type;
743 }
744 
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 			  int type,
748 			  unsigned long len,
749 			  unsigned long flags, int pc)
750 {
751 	struct ring_buffer_event *event;
752 
753 	event = ring_buffer_lock_reserve(buffer, len);
754 	if (event != NULL)
755 		trace_event_setup(event, type, flags, pc);
756 
757 	return event;
758 }
759 
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 	if (tr->trace_buffer.buffer)
763 		ring_buffer_record_on(tr->trace_buffer.buffer);
764 	/*
765 	 * This flag is looked at when buffers haven't been allocated
766 	 * yet, or by some tracers (like irqsoff), that just want to
767 	 * know if the ring buffer has been disabled, but it can handle
768 	 * races of where it gets disabled but we still do a record.
769 	 * As the check is in the fast path of the tracers, it is more
770 	 * important to be fast than accurate.
771 	 */
772 	tr->buffer_disabled = 0;
773 	/* Make the flag seen by readers */
774 	smp_wmb();
775 }
776 
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785 	tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788 
789 
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 	__this_cpu_write(trace_taskinfo_save, true);
794 
795 	/* If this is the temp buffer, we need to commit fully */
796 	if (this_cpu_read(trace_buffered_event) == event) {
797 		/* Length is in event->array[0] */
798 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 		/* Release the temp buffer */
800 		this_cpu_dec(trace_buffered_event_cnt);
801 	} else
802 		ring_buffer_unlock_commit(buffer, event);
803 }
804 
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:	   The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 	struct ring_buffer_event *event;
814 	struct ring_buffer *buffer;
815 	struct print_entry *entry;
816 	unsigned long irq_flags;
817 	int alloc;
818 	int pc;
819 
820 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 		return 0;
822 
823 	pc = preempt_count();
824 
825 	if (unlikely(tracing_selftest_running || tracing_disabled))
826 		return 0;
827 
828 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
829 
830 	local_save_flags(irq_flags);
831 	buffer = global_trace.trace_buffer.buffer;
832 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 					    irq_flags, pc);
834 	if (!event)
835 		return 0;
836 
837 	entry = ring_buffer_event_data(event);
838 	entry->ip = ip;
839 
840 	memcpy(&entry->buf, str, size);
841 
842 	/* Add a newline if necessary */
843 	if (entry->buf[size - 1] != '\n') {
844 		entry->buf[size] = '\n';
845 		entry->buf[size + 1] = '\0';
846 	} else
847 		entry->buf[size] = '\0';
848 
849 	__buffer_unlock_commit(buffer, event);
850 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851 
852 	return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855 
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:	   The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 	struct ring_buffer_event *event;
864 	struct ring_buffer *buffer;
865 	struct bputs_entry *entry;
866 	unsigned long irq_flags;
867 	int size = sizeof(struct bputs_entry);
868 	int pc;
869 
870 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 		return 0;
872 
873 	pc = preempt_count();
874 
875 	if (unlikely(tracing_selftest_running || tracing_disabled))
876 		return 0;
877 
878 	local_save_flags(irq_flags);
879 	buffer = global_trace.trace_buffer.buffer;
880 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 					    irq_flags, pc);
882 	if (!event)
883 		return 0;
884 
885 	entry = ring_buffer_event_data(event);
886 	entry->ip			= ip;
887 	entry->str			= str;
888 
889 	__buffer_unlock_commit(buffer, event);
890 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891 
892 	return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895 
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898 {
899 	struct tracer *tracer = tr->current_trace;
900 	unsigned long flags;
901 
902 	if (in_nmi()) {
903 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 		internal_trace_puts("*** snapshot is being ignored        ***\n");
905 		return;
906 	}
907 
908 	if (!tr->allocated_snapshot) {
909 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 		internal_trace_puts("*** stopping trace here!   ***\n");
911 		tracing_off();
912 		return;
913 	}
914 
915 	/* Note, snapshot can not be used when the tracer uses it */
916 	if (tracer->use_max_tr) {
917 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 		return;
920 	}
921 
922 	local_irq_save(flags);
923 	update_max_tr(tr, current, smp_processor_id(), cond_data);
924 	local_irq_restore(flags);
925 }
926 
927 void tracing_snapshot_instance(struct trace_array *tr)
928 {
929 	tracing_snapshot_instance_cond(tr, NULL);
930 }
931 
932 /**
933  * tracing_snapshot - take a snapshot of the current buffer.
934  *
935  * This causes a swap between the snapshot buffer and the current live
936  * tracing buffer. You can use this to take snapshots of the live
937  * trace when some condition is triggered, but continue to trace.
938  *
939  * Note, make sure to allocate the snapshot with either
940  * a tracing_snapshot_alloc(), or by doing it manually
941  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942  *
943  * If the snapshot buffer is not allocated, it will stop tracing.
944  * Basically making a permanent snapshot.
945  */
946 void tracing_snapshot(void)
947 {
948 	struct trace_array *tr = &global_trace;
949 
950 	tracing_snapshot_instance(tr);
951 }
952 EXPORT_SYMBOL_GPL(tracing_snapshot);
953 
954 /**
955  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956  * @tr:		The tracing instance to snapshot
957  * @cond_data:	The data to be tested conditionally, and possibly saved
958  *
959  * This is the same as tracing_snapshot() except that the snapshot is
960  * conditional - the snapshot will only happen if the
961  * cond_snapshot.update() implementation receiving the cond_data
962  * returns true, which means that the trace array's cond_snapshot
963  * update() operation used the cond_data to determine whether the
964  * snapshot should be taken, and if it was, presumably saved it along
965  * with the snapshot.
966  */
967 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968 {
969 	tracing_snapshot_instance_cond(tr, cond_data);
970 }
971 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972 
973 /**
974  * tracing_snapshot_cond_data - get the user data associated with a snapshot
975  * @tr:		The tracing instance
976  *
977  * When the user enables a conditional snapshot using
978  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979  * with the snapshot.  This accessor is used to retrieve it.
980  *
981  * Should not be called from cond_snapshot.update(), since it takes
982  * the tr->max_lock lock, which the code calling
983  * cond_snapshot.update() has already done.
984  *
985  * Returns the cond_data associated with the trace array's snapshot.
986  */
987 void *tracing_cond_snapshot_data(struct trace_array *tr)
988 {
989 	void *cond_data = NULL;
990 
991 	arch_spin_lock(&tr->max_lock);
992 
993 	if (tr->cond_snapshot)
994 		cond_data = tr->cond_snapshot->cond_data;
995 
996 	arch_spin_unlock(&tr->max_lock);
997 
998 	return cond_data;
999 }
1000 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001 
1002 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003 					struct trace_buffer *size_buf, int cpu_id);
1004 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005 
1006 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007 {
1008 	int ret;
1009 
1010 	if (!tr->allocated_snapshot) {
1011 
1012 		/* allocate spare buffer */
1013 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015 		if (ret < 0)
1016 			return ret;
1017 
1018 		tr->allocated_snapshot = true;
1019 	}
1020 
1021 	return 0;
1022 }
1023 
1024 static void free_snapshot(struct trace_array *tr)
1025 {
1026 	/*
1027 	 * We don't free the ring buffer. instead, resize it because
1028 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1029 	 * we want preserve it.
1030 	 */
1031 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032 	set_buffer_entries(&tr->max_buffer, 1);
1033 	tracing_reset_online_cpus(&tr->max_buffer);
1034 	tr->allocated_snapshot = false;
1035 }
1036 
1037 /**
1038  * tracing_alloc_snapshot - allocate snapshot buffer.
1039  *
1040  * This only allocates the snapshot buffer if it isn't already
1041  * allocated - it doesn't also take a snapshot.
1042  *
1043  * This is meant to be used in cases where the snapshot buffer needs
1044  * to be set up for events that can't sleep but need to be able to
1045  * trigger a snapshot.
1046  */
1047 int tracing_alloc_snapshot(void)
1048 {
1049 	struct trace_array *tr = &global_trace;
1050 	int ret;
1051 
1052 	ret = tracing_alloc_snapshot_instance(tr);
1053 	WARN_ON(ret < 0);
1054 
1055 	return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058 
1059 /**
1060  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061  *
1062  * This is similar to tracing_snapshot(), but it will allocate the
1063  * snapshot buffer if it isn't already allocated. Use this only
1064  * where it is safe to sleep, as the allocation may sleep.
1065  *
1066  * This causes a swap between the snapshot buffer and the current live
1067  * tracing buffer. You can use this to take snapshots of the live
1068  * trace when some condition is triggered, but continue to trace.
1069  */
1070 void tracing_snapshot_alloc(void)
1071 {
1072 	int ret;
1073 
1074 	ret = tracing_alloc_snapshot();
1075 	if (ret < 0)
1076 		return;
1077 
1078 	tracing_snapshot();
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081 
1082 /**
1083  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084  * @tr:		The tracing instance
1085  * @cond_data:	User data to associate with the snapshot
1086  * @update:	Implementation of the cond_snapshot update function
1087  *
1088  * Check whether the conditional snapshot for the given instance has
1089  * already been enabled, or if the current tracer is already using a
1090  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091  * save the cond_data and update function inside.
1092  *
1093  * Returns 0 if successful, error otherwise.
1094  */
1095 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096 				 cond_update_fn_t update)
1097 {
1098 	struct cond_snapshot *cond_snapshot;
1099 	int ret = 0;
1100 
1101 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102 	if (!cond_snapshot)
1103 		return -ENOMEM;
1104 
1105 	cond_snapshot->cond_data = cond_data;
1106 	cond_snapshot->update = update;
1107 
1108 	mutex_lock(&trace_types_lock);
1109 
1110 	ret = tracing_alloc_snapshot_instance(tr);
1111 	if (ret)
1112 		goto fail_unlock;
1113 
1114 	if (tr->current_trace->use_max_tr) {
1115 		ret = -EBUSY;
1116 		goto fail_unlock;
1117 	}
1118 
1119 	/*
1120 	 * The cond_snapshot can only change to NULL without the
1121 	 * trace_types_lock. We don't care if we race with it going
1122 	 * to NULL, but we want to make sure that it's not set to
1123 	 * something other than NULL when we get here, which we can
1124 	 * do safely with only holding the trace_types_lock and not
1125 	 * having to take the max_lock.
1126 	 */
1127 	if (tr->cond_snapshot) {
1128 		ret = -EBUSY;
1129 		goto fail_unlock;
1130 	}
1131 
1132 	arch_spin_lock(&tr->max_lock);
1133 	tr->cond_snapshot = cond_snapshot;
1134 	arch_spin_unlock(&tr->max_lock);
1135 
1136 	mutex_unlock(&trace_types_lock);
1137 
1138 	return ret;
1139 
1140  fail_unlock:
1141 	mutex_unlock(&trace_types_lock);
1142 	kfree(cond_snapshot);
1143 	return ret;
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1146 
1147 /**
1148  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1149  * @tr:		The tracing instance
1150  *
1151  * Check whether the conditional snapshot for the given instance is
1152  * enabled; if so, free the cond_snapshot associated with it,
1153  * otherwise return -EINVAL.
1154  *
1155  * Returns 0 if successful, error otherwise.
1156  */
1157 int tracing_snapshot_cond_disable(struct trace_array *tr)
1158 {
1159 	int ret = 0;
1160 
1161 	arch_spin_lock(&tr->max_lock);
1162 
1163 	if (!tr->cond_snapshot)
1164 		ret = -EINVAL;
1165 	else {
1166 		kfree(tr->cond_snapshot);
1167 		tr->cond_snapshot = NULL;
1168 	}
1169 
1170 	arch_spin_unlock(&tr->max_lock);
1171 
1172 	return ret;
1173 }
1174 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1175 #else
1176 void tracing_snapshot(void)
1177 {
1178 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1179 }
1180 EXPORT_SYMBOL_GPL(tracing_snapshot);
1181 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1182 {
1183 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1184 }
1185 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1186 int tracing_alloc_snapshot(void)
1187 {
1188 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1189 	return -ENODEV;
1190 }
1191 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1192 void tracing_snapshot_alloc(void)
1193 {
1194 	/* Give warning */
1195 	tracing_snapshot();
1196 }
1197 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	return NULL;
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1204 {
1205 	return -ENODEV;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1208 int tracing_snapshot_cond_disable(struct trace_array *tr)
1209 {
1210 	return false;
1211 }
1212 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1213 #endif /* CONFIG_TRACER_SNAPSHOT */
1214 
1215 void tracer_tracing_off(struct trace_array *tr)
1216 {
1217 	if (tr->trace_buffer.buffer)
1218 		ring_buffer_record_off(tr->trace_buffer.buffer);
1219 	/*
1220 	 * This flag is looked at when buffers haven't been allocated
1221 	 * yet, or by some tracers (like irqsoff), that just want to
1222 	 * know if the ring buffer has been disabled, but it can handle
1223 	 * races of where it gets disabled but we still do a record.
1224 	 * As the check is in the fast path of the tracers, it is more
1225 	 * important to be fast than accurate.
1226 	 */
1227 	tr->buffer_disabled = 1;
1228 	/* Make the flag seen by readers */
1229 	smp_wmb();
1230 }
1231 
1232 /**
1233  * tracing_off - turn off tracing buffers
1234  *
1235  * This function stops the tracing buffers from recording data.
1236  * It does not disable any overhead the tracers themselves may
1237  * be causing. This function simply causes all recording to
1238  * the ring buffers to fail.
1239  */
1240 void tracing_off(void)
1241 {
1242 	tracer_tracing_off(&global_trace);
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_off);
1245 
1246 void disable_trace_on_warning(void)
1247 {
1248 	if (__disable_trace_on_warning)
1249 		tracing_off();
1250 }
1251 
1252 /**
1253  * tracer_tracing_is_on - show real state of ring buffer enabled
1254  * @tr : the trace array to know if ring buffer is enabled
1255  *
1256  * Shows real state of the ring buffer if it is enabled or not.
1257  */
1258 bool tracer_tracing_is_on(struct trace_array *tr)
1259 {
1260 	if (tr->trace_buffer.buffer)
1261 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1262 	return !tr->buffer_disabled;
1263 }
1264 
1265 /**
1266  * tracing_is_on - show state of ring buffers enabled
1267  */
1268 int tracing_is_on(void)
1269 {
1270 	return tracer_tracing_is_on(&global_trace);
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_is_on);
1273 
1274 static int __init set_buf_size(char *str)
1275 {
1276 	unsigned long buf_size;
1277 
1278 	if (!str)
1279 		return 0;
1280 	buf_size = memparse(str, &str);
1281 	/* nr_entries can not be zero */
1282 	if (buf_size == 0)
1283 		return 0;
1284 	trace_buf_size = buf_size;
1285 	return 1;
1286 }
1287 __setup("trace_buf_size=", set_buf_size);
1288 
1289 static int __init set_tracing_thresh(char *str)
1290 {
1291 	unsigned long threshold;
1292 	int ret;
1293 
1294 	if (!str)
1295 		return 0;
1296 	ret = kstrtoul(str, 0, &threshold);
1297 	if (ret < 0)
1298 		return 0;
1299 	tracing_thresh = threshold * 1000;
1300 	return 1;
1301 }
1302 __setup("tracing_thresh=", set_tracing_thresh);
1303 
1304 unsigned long nsecs_to_usecs(unsigned long nsecs)
1305 {
1306 	return nsecs / 1000;
1307 }
1308 
1309 /*
1310  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1311  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1312  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1313  * of strings in the order that the evals (enum) were defined.
1314  */
1315 #undef C
1316 #define C(a, b) b
1317 
1318 /* These must match the bit postions in trace_iterator_flags */
1319 static const char *trace_options[] = {
1320 	TRACE_FLAGS
1321 	NULL
1322 };
1323 
1324 static struct {
1325 	u64 (*func)(void);
1326 	const char *name;
1327 	int in_ns;		/* is this clock in nanoseconds? */
1328 } trace_clocks[] = {
1329 	{ trace_clock_local,		"local",	1 },
1330 	{ trace_clock_global,		"global",	1 },
1331 	{ trace_clock_counter,		"counter",	0 },
1332 	{ trace_clock_jiffies,		"uptime",	0 },
1333 	{ trace_clock,			"perf",		1 },
1334 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1335 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1336 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1337 	ARCH_TRACE_CLOCKS
1338 };
1339 
1340 bool trace_clock_in_ns(struct trace_array *tr)
1341 {
1342 	if (trace_clocks[tr->clock_id].in_ns)
1343 		return true;
1344 
1345 	return false;
1346 }
1347 
1348 /*
1349  * trace_parser_get_init - gets the buffer for trace parser
1350  */
1351 int trace_parser_get_init(struct trace_parser *parser, int size)
1352 {
1353 	memset(parser, 0, sizeof(*parser));
1354 
1355 	parser->buffer = kmalloc(size, GFP_KERNEL);
1356 	if (!parser->buffer)
1357 		return 1;
1358 
1359 	parser->size = size;
1360 	return 0;
1361 }
1362 
1363 /*
1364  * trace_parser_put - frees the buffer for trace parser
1365  */
1366 void trace_parser_put(struct trace_parser *parser)
1367 {
1368 	kfree(parser->buffer);
1369 	parser->buffer = NULL;
1370 }
1371 
1372 /*
1373  * trace_get_user - reads the user input string separated by  space
1374  * (matched by isspace(ch))
1375  *
1376  * For each string found the 'struct trace_parser' is updated,
1377  * and the function returns.
1378  *
1379  * Returns number of bytes read.
1380  *
1381  * See kernel/trace/trace.h for 'struct trace_parser' details.
1382  */
1383 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1384 	size_t cnt, loff_t *ppos)
1385 {
1386 	char ch;
1387 	size_t read = 0;
1388 	ssize_t ret;
1389 
1390 	if (!*ppos)
1391 		trace_parser_clear(parser);
1392 
1393 	ret = get_user(ch, ubuf++);
1394 	if (ret)
1395 		goto out;
1396 
1397 	read++;
1398 	cnt--;
1399 
1400 	/*
1401 	 * The parser is not finished with the last write,
1402 	 * continue reading the user input without skipping spaces.
1403 	 */
1404 	if (!parser->cont) {
1405 		/* skip white space */
1406 		while (cnt && isspace(ch)) {
1407 			ret = get_user(ch, ubuf++);
1408 			if (ret)
1409 				goto out;
1410 			read++;
1411 			cnt--;
1412 		}
1413 
1414 		parser->idx = 0;
1415 
1416 		/* only spaces were written */
1417 		if (isspace(ch) || !ch) {
1418 			*ppos += read;
1419 			ret = read;
1420 			goto out;
1421 		}
1422 	}
1423 
1424 	/* read the non-space input */
1425 	while (cnt && !isspace(ch) && ch) {
1426 		if (parser->idx < parser->size - 1)
1427 			parser->buffer[parser->idx++] = ch;
1428 		else {
1429 			ret = -EINVAL;
1430 			goto out;
1431 		}
1432 		ret = get_user(ch, ubuf++);
1433 		if (ret)
1434 			goto out;
1435 		read++;
1436 		cnt--;
1437 	}
1438 
1439 	/* We either got finished input or we have to wait for another call. */
1440 	if (isspace(ch) || !ch) {
1441 		parser->buffer[parser->idx] = 0;
1442 		parser->cont = false;
1443 	} else if (parser->idx < parser->size - 1) {
1444 		parser->cont = true;
1445 		parser->buffer[parser->idx++] = ch;
1446 		/* Make sure the parsed string always terminates with '\0'. */
1447 		parser->buffer[parser->idx] = 0;
1448 	} else {
1449 		ret = -EINVAL;
1450 		goto out;
1451 	}
1452 
1453 	*ppos += read;
1454 	ret = read;
1455 
1456 out:
1457 	return ret;
1458 }
1459 
1460 /* TODO add a seq_buf_to_buffer() */
1461 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1462 {
1463 	int len;
1464 
1465 	if (trace_seq_used(s) <= s->seq.readpos)
1466 		return -EBUSY;
1467 
1468 	len = trace_seq_used(s) - s->seq.readpos;
1469 	if (cnt > len)
1470 		cnt = len;
1471 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1472 
1473 	s->seq.readpos += cnt;
1474 	return cnt;
1475 }
1476 
1477 unsigned long __read_mostly	tracing_thresh;
1478 
1479 #ifdef CONFIG_TRACER_MAX_TRACE
1480 /*
1481  * Copy the new maximum trace into the separate maximum-trace
1482  * structure. (this way the maximum trace is permanently saved,
1483  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1484  */
1485 static void
1486 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1487 {
1488 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1489 	struct trace_buffer *max_buf = &tr->max_buffer;
1490 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1491 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1492 
1493 	max_buf->cpu = cpu;
1494 	max_buf->time_start = data->preempt_timestamp;
1495 
1496 	max_data->saved_latency = tr->max_latency;
1497 	max_data->critical_start = data->critical_start;
1498 	max_data->critical_end = data->critical_end;
1499 
1500 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1501 	max_data->pid = tsk->pid;
1502 	/*
1503 	 * If tsk == current, then use current_uid(), as that does not use
1504 	 * RCU. The irq tracer can be called out of RCU scope.
1505 	 */
1506 	if (tsk == current)
1507 		max_data->uid = current_uid();
1508 	else
1509 		max_data->uid = task_uid(tsk);
1510 
1511 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1512 	max_data->policy = tsk->policy;
1513 	max_data->rt_priority = tsk->rt_priority;
1514 
1515 	/* record this tasks comm */
1516 	tracing_record_cmdline(tsk);
1517 }
1518 
1519 /**
1520  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1521  * @tr: tracer
1522  * @tsk: the task with the latency
1523  * @cpu: The cpu that initiated the trace.
1524  * @cond_data: User data associated with a conditional snapshot
1525  *
1526  * Flip the buffers between the @tr and the max_tr and record information
1527  * about which task was the cause of this latency.
1528  */
1529 void
1530 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1531 	      void *cond_data)
1532 {
1533 	if (tr->stop_count)
1534 		return;
1535 
1536 	WARN_ON_ONCE(!irqs_disabled());
1537 
1538 	if (!tr->allocated_snapshot) {
1539 		/* Only the nop tracer should hit this when disabling */
1540 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1541 		return;
1542 	}
1543 
1544 	arch_spin_lock(&tr->max_lock);
1545 
1546 	/* Inherit the recordable setting from trace_buffer */
1547 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1548 		ring_buffer_record_on(tr->max_buffer.buffer);
1549 	else
1550 		ring_buffer_record_off(tr->max_buffer.buffer);
1551 
1552 #ifdef CONFIG_TRACER_SNAPSHOT
1553 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1554 		goto out_unlock;
1555 #endif
1556 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1557 
1558 	__update_max_tr(tr, tsk, cpu);
1559 
1560  out_unlock:
1561 	arch_spin_unlock(&tr->max_lock);
1562 }
1563 
1564 /**
1565  * update_max_tr_single - only copy one trace over, and reset the rest
1566  * @tr - tracer
1567  * @tsk - task with the latency
1568  * @cpu - the cpu of the buffer to copy.
1569  *
1570  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1571  */
1572 void
1573 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1574 {
1575 	int ret;
1576 
1577 	if (tr->stop_count)
1578 		return;
1579 
1580 	WARN_ON_ONCE(!irqs_disabled());
1581 	if (!tr->allocated_snapshot) {
1582 		/* Only the nop tracer should hit this when disabling */
1583 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1584 		return;
1585 	}
1586 
1587 	arch_spin_lock(&tr->max_lock);
1588 
1589 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1590 
1591 	if (ret == -EBUSY) {
1592 		/*
1593 		 * We failed to swap the buffer due to a commit taking
1594 		 * place on this CPU. We fail to record, but we reset
1595 		 * the max trace buffer (no one writes directly to it)
1596 		 * and flag that it failed.
1597 		 */
1598 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1599 			"Failed to swap buffers due to commit in progress\n");
1600 	}
1601 
1602 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1603 
1604 	__update_max_tr(tr, tsk, cpu);
1605 	arch_spin_unlock(&tr->max_lock);
1606 }
1607 #endif /* CONFIG_TRACER_MAX_TRACE */
1608 
1609 static int wait_on_pipe(struct trace_iterator *iter, int full)
1610 {
1611 	/* Iterators are static, they should be filled or empty */
1612 	if (trace_buffer_iter(iter, iter->cpu_file))
1613 		return 0;
1614 
1615 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1616 				full);
1617 }
1618 
1619 #ifdef CONFIG_FTRACE_STARTUP_TEST
1620 static bool selftests_can_run;
1621 
1622 struct trace_selftests {
1623 	struct list_head		list;
1624 	struct tracer			*type;
1625 };
1626 
1627 static LIST_HEAD(postponed_selftests);
1628 
1629 static int save_selftest(struct tracer *type)
1630 {
1631 	struct trace_selftests *selftest;
1632 
1633 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1634 	if (!selftest)
1635 		return -ENOMEM;
1636 
1637 	selftest->type = type;
1638 	list_add(&selftest->list, &postponed_selftests);
1639 	return 0;
1640 }
1641 
1642 static int run_tracer_selftest(struct tracer *type)
1643 {
1644 	struct trace_array *tr = &global_trace;
1645 	struct tracer *saved_tracer = tr->current_trace;
1646 	int ret;
1647 
1648 	if (!type->selftest || tracing_selftest_disabled)
1649 		return 0;
1650 
1651 	/*
1652 	 * If a tracer registers early in boot up (before scheduling is
1653 	 * initialized and such), then do not run its selftests yet.
1654 	 * Instead, run it a little later in the boot process.
1655 	 */
1656 	if (!selftests_can_run)
1657 		return save_selftest(type);
1658 
1659 	/*
1660 	 * Run a selftest on this tracer.
1661 	 * Here we reset the trace buffer, and set the current
1662 	 * tracer to be this tracer. The tracer can then run some
1663 	 * internal tracing to verify that everything is in order.
1664 	 * If we fail, we do not register this tracer.
1665 	 */
1666 	tracing_reset_online_cpus(&tr->trace_buffer);
1667 
1668 	tr->current_trace = type;
1669 
1670 #ifdef CONFIG_TRACER_MAX_TRACE
1671 	if (type->use_max_tr) {
1672 		/* If we expanded the buffers, make sure the max is expanded too */
1673 		if (ring_buffer_expanded)
1674 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1675 					   RING_BUFFER_ALL_CPUS);
1676 		tr->allocated_snapshot = true;
1677 	}
1678 #endif
1679 
1680 	/* the test is responsible for initializing and enabling */
1681 	pr_info("Testing tracer %s: ", type->name);
1682 	ret = type->selftest(type, tr);
1683 	/* the test is responsible for resetting too */
1684 	tr->current_trace = saved_tracer;
1685 	if (ret) {
1686 		printk(KERN_CONT "FAILED!\n");
1687 		/* Add the warning after printing 'FAILED' */
1688 		WARN_ON(1);
1689 		return -1;
1690 	}
1691 	/* Only reset on passing, to avoid touching corrupted buffers */
1692 	tracing_reset_online_cpus(&tr->trace_buffer);
1693 
1694 #ifdef CONFIG_TRACER_MAX_TRACE
1695 	if (type->use_max_tr) {
1696 		tr->allocated_snapshot = false;
1697 
1698 		/* Shrink the max buffer again */
1699 		if (ring_buffer_expanded)
1700 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1701 					   RING_BUFFER_ALL_CPUS);
1702 	}
1703 #endif
1704 
1705 	printk(KERN_CONT "PASSED\n");
1706 	return 0;
1707 }
1708 
1709 static __init int init_trace_selftests(void)
1710 {
1711 	struct trace_selftests *p, *n;
1712 	struct tracer *t, **last;
1713 	int ret;
1714 
1715 	selftests_can_run = true;
1716 
1717 	mutex_lock(&trace_types_lock);
1718 
1719 	if (list_empty(&postponed_selftests))
1720 		goto out;
1721 
1722 	pr_info("Running postponed tracer tests:\n");
1723 
1724 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1725 		ret = run_tracer_selftest(p->type);
1726 		/* If the test fails, then warn and remove from available_tracers */
1727 		if (ret < 0) {
1728 			WARN(1, "tracer: %s failed selftest, disabling\n",
1729 			     p->type->name);
1730 			last = &trace_types;
1731 			for (t = trace_types; t; t = t->next) {
1732 				if (t == p->type) {
1733 					*last = t->next;
1734 					break;
1735 				}
1736 				last = &t->next;
1737 			}
1738 		}
1739 		list_del(&p->list);
1740 		kfree(p);
1741 	}
1742 
1743  out:
1744 	mutex_unlock(&trace_types_lock);
1745 
1746 	return 0;
1747 }
1748 core_initcall(init_trace_selftests);
1749 #else
1750 static inline int run_tracer_selftest(struct tracer *type)
1751 {
1752 	return 0;
1753 }
1754 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1755 
1756 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1757 
1758 static void __init apply_trace_boot_options(void);
1759 
1760 /**
1761  * register_tracer - register a tracer with the ftrace system.
1762  * @type - the plugin for the tracer
1763  *
1764  * Register a new plugin tracer.
1765  */
1766 int __init register_tracer(struct tracer *type)
1767 {
1768 	struct tracer *t;
1769 	int ret = 0;
1770 
1771 	if (!type->name) {
1772 		pr_info("Tracer must have a name\n");
1773 		return -1;
1774 	}
1775 
1776 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1777 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1778 		return -1;
1779 	}
1780 
1781 	mutex_lock(&trace_types_lock);
1782 
1783 	tracing_selftest_running = true;
1784 
1785 	for (t = trace_types; t; t = t->next) {
1786 		if (strcmp(type->name, t->name) == 0) {
1787 			/* already found */
1788 			pr_info("Tracer %s already registered\n",
1789 				type->name);
1790 			ret = -1;
1791 			goto out;
1792 		}
1793 	}
1794 
1795 	if (!type->set_flag)
1796 		type->set_flag = &dummy_set_flag;
1797 	if (!type->flags) {
1798 		/*allocate a dummy tracer_flags*/
1799 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1800 		if (!type->flags) {
1801 			ret = -ENOMEM;
1802 			goto out;
1803 		}
1804 		type->flags->val = 0;
1805 		type->flags->opts = dummy_tracer_opt;
1806 	} else
1807 		if (!type->flags->opts)
1808 			type->flags->opts = dummy_tracer_opt;
1809 
1810 	/* store the tracer for __set_tracer_option */
1811 	type->flags->trace = type;
1812 
1813 	ret = run_tracer_selftest(type);
1814 	if (ret < 0)
1815 		goto out;
1816 
1817 	type->next = trace_types;
1818 	trace_types = type;
1819 	add_tracer_options(&global_trace, type);
1820 
1821  out:
1822 	tracing_selftest_running = false;
1823 	mutex_unlock(&trace_types_lock);
1824 
1825 	if (ret || !default_bootup_tracer)
1826 		goto out_unlock;
1827 
1828 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1829 		goto out_unlock;
1830 
1831 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1832 	/* Do we want this tracer to start on bootup? */
1833 	tracing_set_tracer(&global_trace, type->name);
1834 	default_bootup_tracer = NULL;
1835 
1836 	apply_trace_boot_options();
1837 
1838 	/* disable other selftests, since this will break it. */
1839 	tracing_selftest_disabled = true;
1840 #ifdef CONFIG_FTRACE_STARTUP_TEST
1841 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1842 	       type->name);
1843 #endif
1844 
1845  out_unlock:
1846 	return ret;
1847 }
1848 
1849 void tracing_reset(struct trace_buffer *buf, int cpu)
1850 {
1851 	struct ring_buffer *buffer = buf->buffer;
1852 
1853 	if (!buffer)
1854 		return;
1855 
1856 	ring_buffer_record_disable(buffer);
1857 
1858 	/* Make sure all commits have finished */
1859 	synchronize_rcu();
1860 	ring_buffer_reset_cpu(buffer, cpu);
1861 
1862 	ring_buffer_record_enable(buffer);
1863 }
1864 
1865 void tracing_reset_online_cpus(struct trace_buffer *buf)
1866 {
1867 	struct ring_buffer *buffer = buf->buffer;
1868 	int cpu;
1869 
1870 	if (!buffer)
1871 		return;
1872 
1873 	ring_buffer_record_disable(buffer);
1874 
1875 	/* Make sure all commits have finished */
1876 	synchronize_rcu();
1877 
1878 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1879 
1880 	for_each_online_cpu(cpu)
1881 		ring_buffer_reset_cpu(buffer, cpu);
1882 
1883 	ring_buffer_record_enable(buffer);
1884 }
1885 
1886 /* Must have trace_types_lock held */
1887 void tracing_reset_all_online_cpus(void)
1888 {
1889 	struct trace_array *tr;
1890 
1891 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1892 		if (!tr->clear_trace)
1893 			continue;
1894 		tr->clear_trace = false;
1895 		tracing_reset_online_cpus(&tr->trace_buffer);
1896 #ifdef CONFIG_TRACER_MAX_TRACE
1897 		tracing_reset_online_cpus(&tr->max_buffer);
1898 #endif
1899 	}
1900 }
1901 
1902 static int *tgid_map;
1903 
1904 #define SAVED_CMDLINES_DEFAULT 128
1905 #define NO_CMDLINE_MAP UINT_MAX
1906 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1907 struct saved_cmdlines_buffer {
1908 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1909 	unsigned *map_cmdline_to_pid;
1910 	unsigned cmdline_num;
1911 	int cmdline_idx;
1912 	char *saved_cmdlines;
1913 };
1914 static struct saved_cmdlines_buffer *savedcmd;
1915 
1916 /* temporary disable recording */
1917 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1918 
1919 static inline char *get_saved_cmdlines(int idx)
1920 {
1921 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1922 }
1923 
1924 static inline void set_cmdline(int idx, const char *cmdline)
1925 {
1926 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1927 }
1928 
1929 static int allocate_cmdlines_buffer(unsigned int val,
1930 				    struct saved_cmdlines_buffer *s)
1931 {
1932 	s->map_cmdline_to_pid = kmalloc_array(val,
1933 					      sizeof(*s->map_cmdline_to_pid),
1934 					      GFP_KERNEL);
1935 	if (!s->map_cmdline_to_pid)
1936 		return -ENOMEM;
1937 
1938 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1939 	if (!s->saved_cmdlines) {
1940 		kfree(s->map_cmdline_to_pid);
1941 		return -ENOMEM;
1942 	}
1943 
1944 	s->cmdline_idx = 0;
1945 	s->cmdline_num = val;
1946 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1947 	       sizeof(s->map_pid_to_cmdline));
1948 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1949 	       val * sizeof(*s->map_cmdline_to_pid));
1950 
1951 	return 0;
1952 }
1953 
1954 static int trace_create_savedcmd(void)
1955 {
1956 	int ret;
1957 
1958 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1959 	if (!savedcmd)
1960 		return -ENOMEM;
1961 
1962 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1963 	if (ret < 0) {
1964 		kfree(savedcmd);
1965 		savedcmd = NULL;
1966 		return -ENOMEM;
1967 	}
1968 
1969 	return 0;
1970 }
1971 
1972 int is_tracing_stopped(void)
1973 {
1974 	return global_trace.stop_count;
1975 }
1976 
1977 /**
1978  * tracing_start - quick start of the tracer
1979  *
1980  * If tracing is enabled but was stopped by tracing_stop,
1981  * this will start the tracer back up.
1982  */
1983 void tracing_start(void)
1984 {
1985 	struct ring_buffer *buffer;
1986 	unsigned long flags;
1987 
1988 	if (tracing_disabled)
1989 		return;
1990 
1991 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1992 	if (--global_trace.stop_count) {
1993 		if (global_trace.stop_count < 0) {
1994 			/* Someone screwed up their debugging */
1995 			WARN_ON_ONCE(1);
1996 			global_trace.stop_count = 0;
1997 		}
1998 		goto out;
1999 	}
2000 
2001 	/* Prevent the buffers from switching */
2002 	arch_spin_lock(&global_trace.max_lock);
2003 
2004 	buffer = global_trace.trace_buffer.buffer;
2005 	if (buffer)
2006 		ring_buffer_record_enable(buffer);
2007 
2008 #ifdef CONFIG_TRACER_MAX_TRACE
2009 	buffer = global_trace.max_buffer.buffer;
2010 	if (buffer)
2011 		ring_buffer_record_enable(buffer);
2012 #endif
2013 
2014 	arch_spin_unlock(&global_trace.max_lock);
2015 
2016  out:
2017 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2018 }
2019 
2020 static void tracing_start_tr(struct trace_array *tr)
2021 {
2022 	struct ring_buffer *buffer;
2023 	unsigned long flags;
2024 
2025 	if (tracing_disabled)
2026 		return;
2027 
2028 	/* If global, we need to also start the max tracer */
2029 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2030 		return tracing_start();
2031 
2032 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2033 
2034 	if (--tr->stop_count) {
2035 		if (tr->stop_count < 0) {
2036 			/* Someone screwed up their debugging */
2037 			WARN_ON_ONCE(1);
2038 			tr->stop_count = 0;
2039 		}
2040 		goto out;
2041 	}
2042 
2043 	buffer = tr->trace_buffer.buffer;
2044 	if (buffer)
2045 		ring_buffer_record_enable(buffer);
2046 
2047  out:
2048 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2049 }
2050 
2051 /**
2052  * tracing_stop - quick stop of the tracer
2053  *
2054  * Light weight way to stop tracing. Use in conjunction with
2055  * tracing_start.
2056  */
2057 void tracing_stop(void)
2058 {
2059 	struct ring_buffer *buffer;
2060 	unsigned long flags;
2061 
2062 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2063 	if (global_trace.stop_count++)
2064 		goto out;
2065 
2066 	/* Prevent the buffers from switching */
2067 	arch_spin_lock(&global_trace.max_lock);
2068 
2069 	buffer = global_trace.trace_buffer.buffer;
2070 	if (buffer)
2071 		ring_buffer_record_disable(buffer);
2072 
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074 	buffer = global_trace.max_buffer.buffer;
2075 	if (buffer)
2076 		ring_buffer_record_disable(buffer);
2077 #endif
2078 
2079 	arch_spin_unlock(&global_trace.max_lock);
2080 
2081  out:
2082 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2083 }
2084 
2085 static void tracing_stop_tr(struct trace_array *tr)
2086 {
2087 	struct ring_buffer *buffer;
2088 	unsigned long flags;
2089 
2090 	/* If global, we need to also stop the max tracer */
2091 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2092 		return tracing_stop();
2093 
2094 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2095 	if (tr->stop_count++)
2096 		goto out;
2097 
2098 	buffer = tr->trace_buffer.buffer;
2099 	if (buffer)
2100 		ring_buffer_record_disable(buffer);
2101 
2102  out:
2103 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2104 }
2105 
2106 static int trace_save_cmdline(struct task_struct *tsk)
2107 {
2108 	unsigned pid, idx;
2109 
2110 	/* treat recording of idle task as a success */
2111 	if (!tsk->pid)
2112 		return 1;
2113 
2114 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2115 		return 0;
2116 
2117 	/*
2118 	 * It's not the end of the world if we don't get
2119 	 * the lock, but we also don't want to spin
2120 	 * nor do we want to disable interrupts,
2121 	 * so if we miss here, then better luck next time.
2122 	 */
2123 	if (!arch_spin_trylock(&trace_cmdline_lock))
2124 		return 0;
2125 
2126 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2127 	if (idx == NO_CMDLINE_MAP) {
2128 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2129 
2130 		/*
2131 		 * Check whether the cmdline buffer at idx has a pid
2132 		 * mapped. We are going to overwrite that entry so we
2133 		 * need to clear the map_pid_to_cmdline. Otherwise we
2134 		 * would read the new comm for the old pid.
2135 		 */
2136 		pid = savedcmd->map_cmdline_to_pid[idx];
2137 		if (pid != NO_CMDLINE_MAP)
2138 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2139 
2140 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2141 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2142 
2143 		savedcmd->cmdline_idx = idx;
2144 	}
2145 
2146 	set_cmdline(idx, tsk->comm);
2147 
2148 	arch_spin_unlock(&trace_cmdline_lock);
2149 
2150 	return 1;
2151 }
2152 
2153 static void __trace_find_cmdline(int pid, char comm[])
2154 {
2155 	unsigned map;
2156 
2157 	if (!pid) {
2158 		strcpy(comm, "<idle>");
2159 		return;
2160 	}
2161 
2162 	if (WARN_ON_ONCE(pid < 0)) {
2163 		strcpy(comm, "<XXX>");
2164 		return;
2165 	}
2166 
2167 	if (pid > PID_MAX_DEFAULT) {
2168 		strcpy(comm, "<...>");
2169 		return;
2170 	}
2171 
2172 	map = savedcmd->map_pid_to_cmdline[pid];
2173 	if (map != NO_CMDLINE_MAP)
2174 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2175 	else
2176 		strcpy(comm, "<...>");
2177 }
2178 
2179 void trace_find_cmdline(int pid, char comm[])
2180 {
2181 	preempt_disable();
2182 	arch_spin_lock(&trace_cmdline_lock);
2183 
2184 	__trace_find_cmdline(pid, comm);
2185 
2186 	arch_spin_unlock(&trace_cmdline_lock);
2187 	preempt_enable();
2188 }
2189 
2190 int trace_find_tgid(int pid)
2191 {
2192 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2193 		return 0;
2194 
2195 	return tgid_map[pid];
2196 }
2197 
2198 static int trace_save_tgid(struct task_struct *tsk)
2199 {
2200 	/* treat recording of idle task as a success */
2201 	if (!tsk->pid)
2202 		return 1;
2203 
2204 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2205 		return 0;
2206 
2207 	tgid_map[tsk->pid] = tsk->tgid;
2208 	return 1;
2209 }
2210 
2211 static bool tracing_record_taskinfo_skip(int flags)
2212 {
2213 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2214 		return true;
2215 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2216 		return true;
2217 	if (!__this_cpu_read(trace_taskinfo_save))
2218 		return true;
2219 	return false;
2220 }
2221 
2222 /**
2223  * tracing_record_taskinfo - record the task info of a task
2224  *
2225  * @task  - task to record
2226  * @flags - TRACE_RECORD_CMDLINE for recording comm
2227  *        - TRACE_RECORD_TGID for recording tgid
2228  */
2229 void tracing_record_taskinfo(struct task_struct *task, int flags)
2230 {
2231 	bool done;
2232 
2233 	if (tracing_record_taskinfo_skip(flags))
2234 		return;
2235 
2236 	/*
2237 	 * Record as much task information as possible. If some fail, continue
2238 	 * to try to record the others.
2239 	 */
2240 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2241 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2242 
2243 	/* If recording any information failed, retry again soon. */
2244 	if (!done)
2245 		return;
2246 
2247 	__this_cpu_write(trace_taskinfo_save, false);
2248 }
2249 
2250 /**
2251  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2252  *
2253  * @prev - previous task during sched_switch
2254  * @next - next task during sched_switch
2255  * @flags - TRACE_RECORD_CMDLINE for recording comm
2256  *          TRACE_RECORD_TGID for recording tgid
2257  */
2258 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2259 					  struct task_struct *next, int flags)
2260 {
2261 	bool done;
2262 
2263 	if (tracing_record_taskinfo_skip(flags))
2264 		return;
2265 
2266 	/*
2267 	 * Record as much task information as possible. If some fail, continue
2268 	 * to try to record the others.
2269 	 */
2270 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2271 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2272 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2273 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2274 
2275 	/* If recording any information failed, retry again soon. */
2276 	if (!done)
2277 		return;
2278 
2279 	__this_cpu_write(trace_taskinfo_save, false);
2280 }
2281 
2282 /* Helpers to record a specific task information */
2283 void tracing_record_cmdline(struct task_struct *task)
2284 {
2285 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2286 }
2287 
2288 void tracing_record_tgid(struct task_struct *task)
2289 {
2290 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2291 }
2292 
2293 /*
2294  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2295  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2296  * simplifies those functions and keeps them in sync.
2297  */
2298 enum print_line_t trace_handle_return(struct trace_seq *s)
2299 {
2300 	return trace_seq_has_overflowed(s) ?
2301 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2302 }
2303 EXPORT_SYMBOL_GPL(trace_handle_return);
2304 
2305 void
2306 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2307 			     int pc)
2308 {
2309 	struct task_struct *tsk = current;
2310 
2311 	entry->preempt_count		= pc & 0xff;
2312 	entry->pid			= (tsk) ? tsk->pid : 0;
2313 	entry->flags =
2314 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2315 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2316 #else
2317 		TRACE_FLAG_IRQS_NOSUPPORT |
2318 #endif
2319 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2320 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2321 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2322 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2323 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2324 }
2325 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2326 
2327 struct ring_buffer_event *
2328 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2329 			  int type,
2330 			  unsigned long len,
2331 			  unsigned long flags, int pc)
2332 {
2333 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2334 }
2335 
2336 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2337 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2338 static int trace_buffered_event_ref;
2339 
2340 /**
2341  * trace_buffered_event_enable - enable buffering events
2342  *
2343  * When events are being filtered, it is quicker to use a temporary
2344  * buffer to write the event data into if there's a likely chance
2345  * that it will not be committed. The discard of the ring buffer
2346  * is not as fast as committing, and is much slower than copying
2347  * a commit.
2348  *
2349  * When an event is to be filtered, allocate per cpu buffers to
2350  * write the event data into, and if the event is filtered and discarded
2351  * it is simply dropped, otherwise, the entire data is to be committed
2352  * in one shot.
2353  */
2354 void trace_buffered_event_enable(void)
2355 {
2356 	struct ring_buffer_event *event;
2357 	struct page *page;
2358 	int cpu;
2359 
2360 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2361 
2362 	if (trace_buffered_event_ref++)
2363 		return;
2364 
2365 	for_each_tracing_cpu(cpu) {
2366 		page = alloc_pages_node(cpu_to_node(cpu),
2367 					GFP_KERNEL | __GFP_NORETRY, 0);
2368 		if (!page)
2369 			goto failed;
2370 
2371 		event = page_address(page);
2372 		memset(event, 0, sizeof(*event));
2373 
2374 		per_cpu(trace_buffered_event, cpu) = event;
2375 
2376 		preempt_disable();
2377 		if (cpu == smp_processor_id() &&
2378 		    this_cpu_read(trace_buffered_event) !=
2379 		    per_cpu(trace_buffered_event, cpu))
2380 			WARN_ON_ONCE(1);
2381 		preempt_enable();
2382 	}
2383 
2384 	return;
2385  failed:
2386 	trace_buffered_event_disable();
2387 }
2388 
2389 static void enable_trace_buffered_event(void *data)
2390 {
2391 	/* Probably not needed, but do it anyway */
2392 	smp_rmb();
2393 	this_cpu_dec(trace_buffered_event_cnt);
2394 }
2395 
2396 static void disable_trace_buffered_event(void *data)
2397 {
2398 	this_cpu_inc(trace_buffered_event_cnt);
2399 }
2400 
2401 /**
2402  * trace_buffered_event_disable - disable buffering events
2403  *
2404  * When a filter is removed, it is faster to not use the buffered
2405  * events, and to commit directly into the ring buffer. Free up
2406  * the temp buffers when there are no more users. This requires
2407  * special synchronization with current events.
2408  */
2409 void trace_buffered_event_disable(void)
2410 {
2411 	int cpu;
2412 
2413 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2414 
2415 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2416 		return;
2417 
2418 	if (--trace_buffered_event_ref)
2419 		return;
2420 
2421 	preempt_disable();
2422 	/* For each CPU, set the buffer as used. */
2423 	smp_call_function_many(tracing_buffer_mask,
2424 			       disable_trace_buffered_event, NULL, 1);
2425 	preempt_enable();
2426 
2427 	/* Wait for all current users to finish */
2428 	synchronize_rcu();
2429 
2430 	for_each_tracing_cpu(cpu) {
2431 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2432 		per_cpu(trace_buffered_event, cpu) = NULL;
2433 	}
2434 	/*
2435 	 * Make sure trace_buffered_event is NULL before clearing
2436 	 * trace_buffered_event_cnt.
2437 	 */
2438 	smp_wmb();
2439 
2440 	preempt_disable();
2441 	/* Do the work on each cpu */
2442 	smp_call_function_many(tracing_buffer_mask,
2443 			       enable_trace_buffered_event, NULL, 1);
2444 	preempt_enable();
2445 }
2446 
2447 static struct ring_buffer *temp_buffer;
2448 
2449 struct ring_buffer_event *
2450 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2451 			  struct trace_event_file *trace_file,
2452 			  int type, unsigned long len,
2453 			  unsigned long flags, int pc)
2454 {
2455 	struct ring_buffer_event *entry;
2456 	int val;
2457 
2458 	*current_rb = trace_file->tr->trace_buffer.buffer;
2459 
2460 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2461 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2462 	    (entry = this_cpu_read(trace_buffered_event))) {
2463 		/* Try to use the per cpu buffer first */
2464 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2465 		if (val == 1) {
2466 			trace_event_setup(entry, type, flags, pc);
2467 			entry->array[0] = len;
2468 			return entry;
2469 		}
2470 		this_cpu_dec(trace_buffered_event_cnt);
2471 	}
2472 
2473 	entry = __trace_buffer_lock_reserve(*current_rb,
2474 					    type, len, flags, pc);
2475 	/*
2476 	 * If tracing is off, but we have triggers enabled
2477 	 * we still need to look at the event data. Use the temp_buffer
2478 	 * to store the trace event for the tigger to use. It's recusive
2479 	 * safe and will not be recorded anywhere.
2480 	 */
2481 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2482 		*current_rb = temp_buffer;
2483 		entry = __trace_buffer_lock_reserve(*current_rb,
2484 						    type, len, flags, pc);
2485 	}
2486 	return entry;
2487 }
2488 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2489 
2490 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2491 static DEFINE_MUTEX(tracepoint_printk_mutex);
2492 
2493 static void output_printk(struct trace_event_buffer *fbuffer)
2494 {
2495 	struct trace_event_call *event_call;
2496 	struct trace_event *event;
2497 	unsigned long flags;
2498 	struct trace_iterator *iter = tracepoint_print_iter;
2499 
2500 	/* We should never get here if iter is NULL */
2501 	if (WARN_ON_ONCE(!iter))
2502 		return;
2503 
2504 	event_call = fbuffer->trace_file->event_call;
2505 	if (!event_call || !event_call->event.funcs ||
2506 	    !event_call->event.funcs->trace)
2507 		return;
2508 
2509 	event = &fbuffer->trace_file->event_call->event;
2510 
2511 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2512 	trace_seq_init(&iter->seq);
2513 	iter->ent = fbuffer->entry;
2514 	event_call->event.funcs->trace(iter, 0, event);
2515 	trace_seq_putc(&iter->seq, 0);
2516 	printk("%s", iter->seq.buffer);
2517 
2518 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2519 }
2520 
2521 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2522 			     void __user *buffer, size_t *lenp,
2523 			     loff_t *ppos)
2524 {
2525 	int save_tracepoint_printk;
2526 	int ret;
2527 
2528 	mutex_lock(&tracepoint_printk_mutex);
2529 	save_tracepoint_printk = tracepoint_printk;
2530 
2531 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2532 
2533 	/*
2534 	 * This will force exiting early, as tracepoint_printk
2535 	 * is always zero when tracepoint_printk_iter is not allocated
2536 	 */
2537 	if (!tracepoint_print_iter)
2538 		tracepoint_printk = 0;
2539 
2540 	if (save_tracepoint_printk == tracepoint_printk)
2541 		goto out;
2542 
2543 	if (tracepoint_printk)
2544 		static_key_enable(&tracepoint_printk_key.key);
2545 	else
2546 		static_key_disable(&tracepoint_printk_key.key);
2547 
2548  out:
2549 	mutex_unlock(&tracepoint_printk_mutex);
2550 
2551 	return ret;
2552 }
2553 
2554 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2555 {
2556 	if (static_key_false(&tracepoint_printk_key.key))
2557 		output_printk(fbuffer);
2558 
2559 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2560 				    fbuffer->event, fbuffer->entry,
2561 				    fbuffer->flags, fbuffer->pc);
2562 }
2563 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2564 
2565 /*
2566  * Skip 3:
2567  *
2568  *   trace_buffer_unlock_commit_regs()
2569  *   trace_event_buffer_commit()
2570  *   trace_event_raw_event_xxx()
2571  */
2572 # define STACK_SKIP 3
2573 
2574 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2575 				     struct ring_buffer *buffer,
2576 				     struct ring_buffer_event *event,
2577 				     unsigned long flags, int pc,
2578 				     struct pt_regs *regs)
2579 {
2580 	__buffer_unlock_commit(buffer, event);
2581 
2582 	/*
2583 	 * If regs is not set, then skip the necessary functions.
2584 	 * Note, we can still get here via blktrace, wakeup tracer
2585 	 * and mmiotrace, but that's ok if they lose a function or
2586 	 * two. They are not that meaningful.
2587 	 */
2588 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2589 	ftrace_trace_userstack(buffer, flags, pc);
2590 }
2591 
2592 /*
2593  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2594  */
2595 void
2596 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2597 				   struct ring_buffer_event *event)
2598 {
2599 	__buffer_unlock_commit(buffer, event);
2600 }
2601 
2602 static void
2603 trace_process_export(struct trace_export *export,
2604 	       struct ring_buffer_event *event)
2605 {
2606 	struct trace_entry *entry;
2607 	unsigned int size = 0;
2608 
2609 	entry = ring_buffer_event_data(event);
2610 	size = ring_buffer_event_length(event);
2611 	export->write(export, entry, size);
2612 }
2613 
2614 static DEFINE_MUTEX(ftrace_export_lock);
2615 
2616 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2617 
2618 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2619 
2620 static inline void ftrace_exports_enable(void)
2621 {
2622 	static_branch_enable(&ftrace_exports_enabled);
2623 }
2624 
2625 static inline void ftrace_exports_disable(void)
2626 {
2627 	static_branch_disable(&ftrace_exports_enabled);
2628 }
2629 
2630 static void ftrace_exports(struct ring_buffer_event *event)
2631 {
2632 	struct trace_export *export;
2633 
2634 	preempt_disable_notrace();
2635 
2636 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2637 	while (export) {
2638 		trace_process_export(export, event);
2639 		export = rcu_dereference_raw_notrace(export->next);
2640 	}
2641 
2642 	preempt_enable_notrace();
2643 }
2644 
2645 static inline void
2646 add_trace_export(struct trace_export **list, struct trace_export *export)
2647 {
2648 	rcu_assign_pointer(export->next, *list);
2649 	/*
2650 	 * We are entering export into the list but another
2651 	 * CPU might be walking that list. We need to make sure
2652 	 * the export->next pointer is valid before another CPU sees
2653 	 * the export pointer included into the list.
2654 	 */
2655 	rcu_assign_pointer(*list, export);
2656 }
2657 
2658 static inline int
2659 rm_trace_export(struct trace_export **list, struct trace_export *export)
2660 {
2661 	struct trace_export **p;
2662 
2663 	for (p = list; *p != NULL; p = &(*p)->next)
2664 		if (*p == export)
2665 			break;
2666 
2667 	if (*p != export)
2668 		return -1;
2669 
2670 	rcu_assign_pointer(*p, (*p)->next);
2671 
2672 	return 0;
2673 }
2674 
2675 static inline void
2676 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2677 {
2678 	if (*list == NULL)
2679 		ftrace_exports_enable();
2680 
2681 	add_trace_export(list, export);
2682 }
2683 
2684 static inline int
2685 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687 	int ret;
2688 
2689 	ret = rm_trace_export(list, export);
2690 	if (*list == NULL)
2691 		ftrace_exports_disable();
2692 
2693 	return ret;
2694 }
2695 
2696 int register_ftrace_export(struct trace_export *export)
2697 {
2698 	if (WARN_ON_ONCE(!export->write))
2699 		return -1;
2700 
2701 	mutex_lock(&ftrace_export_lock);
2702 
2703 	add_ftrace_export(&ftrace_exports_list, export);
2704 
2705 	mutex_unlock(&ftrace_export_lock);
2706 
2707 	return 0;
2708 }
2709 EXPORT_SYMBOL_GPL(register_ftrace_export);
2710 
2711 int unregister_ftrace_export(struct trace_export *export)
2712 {
2713 	int ret;
2714 
2715 	mutex_lock(&ftrace_export_lock);
2716 
2717 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2718 
2719 	mutex_unlock(&ftrace_export_lock);
2720 
2721 	return ret;
2722 }
2723 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2724 
2725 void
2726 trace_function(struct trace_array *tr,
2727 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2728 	       int pc)
2729 {
2730 	struct trace_event_call *call = &event_function;
2731 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2732 	struct ring_buffer_event *event;
2733 	struct ftrace_entry *entry;
2734 
2735 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2736 					    flags, pc);
2737 	if (!event)
2738 		return;
2739 	entry	= ring_buffer_event_data(event);
2740 	entry->ip			= ip;
2741 	entry->parent_ip		= parent_ip;
2742 
2743 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2744 		if (static_branch_unlikely(&ftrace_exports_enabled))
2745 			ftrace_exports(event);
2746 		__buffer_unlock_commit(buffer, event);
2747 	}
2748 }
2749 
2750 #ifdef CONFIG_STACKTRACE
2751 
2752 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2753 struct ftrace_stack {
2754 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2755 };
2756 
2757 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2758 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2759 
2760 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2761 				 unsigned long flags,
2762 				 int skip, int pc, struct pt_regs *regs)
2763 {
2764 	struct trace_event_call *call = &event_kernel_stack;
2765 	struct ring_buffer_event *event;
2766 	struct stack_entry *entry;
2767 	struct stack_trace trace;
2768 	int use_stack;
2769 	int size = FTRACE_STACK_ENTRIES;
2770 
2771 	trace.nr_entries	= 0;
2772 	trace.skip		= skip;
2773 
2774 	/*
2775 	 * Add one, for this function and the call to save_stack_trace()
2776 	 * If regs is set, then these functions will not be in the way.
2777 	 */
2778 #ifndef CONFIG_UNWINDER_ORC
2779 	if (!regs)
2780 		trace.skip++;
2781 #endif
2782 
2783 	/*
2784 	 * Since events can happen in NMIs there's no safe way to
2785 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2786 	 * or NMI comes in, it will just have to use the default
2787 	 * FTRACE_STACK_SIZE.
2788 	 */
2789 	preempt_disable_notrace();
2790 
2791 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2792 	/*
2793 	 * We don't need any atomic variables, just a barrier.
2794 	 * If an interrupt comes in, we don't care, because it would
2795 	 * have exited and put the counter back to what we want.
2796 	 * We just need a barrier to keep gcc from moving things
2797 	 * around.
2798 	 */
2799 	barrier();
2800 	if (use_stack == 1) {
2801 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2802 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2803 
2804 		if (regs)
2805 			save_stack_trace_regs(regs, &trace);
2806 		else
2807 			save_stack_trace(&trace);
2808 
2809 		if (trace.nr_entries > size)
2810 			size = trace.nr_entries;
2811 	} else
2812 		/* From now on, use_stack is a boolean */
2813 		use_stack = 0;
2814 
2815 	size *= sizeof(unsigned long);
2816 
2817 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2818 					    sizeof(*entry) + size, flags, pc);
2819 	if (!event)
2820 		goto out;
2821 	entry = ring_buffer_event_data(event);
2822 
2823 	memset(&entry->caller, 0, size);
2824 
2825 	if (use_stack)
2826 		memcpy(&entry->caller, trace.entries,
2827 		       trace.nr_entries * sizeof(unsigned long));
2828 	else {
2829 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2830 		trace.entries		= entry->caller;
2831 		if (regs)
2832 			save_stack_trace_regs(regs, &trace);
2833 		else
2834 			save_stack_trace(&trace);
2835 	}
2836 
2837 	entry->size = trace.nr_entries;
2838 
2839 	if (!call_filter_check_discard(call, entry, buffer, event))
2840 		__buffer_unlock_commit(buffer, event);
2841 
2842  out:
2843 	/* Again, don't let gcc optimize things here */
2844 	barrier();
2845 	__this_cpu_dec(ftrace_stack_reserve);
2846 	preempt_enable_notrace();
2847 
2848 }
2849 
2850 static inline void ftrace_trace_stack(struct trace_array *tr,
2851 				      struct ring_buffer *buffer,
2852 				      unsigned long flags,
2853 				      int skip, int pc, struct pt_regs *regs)
2854 {
2855 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2856 		return;
2857 
2858 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2859 }
2860 
2861 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2862 		   int pc)
2863 {
2864 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2865 
2866 	if (rcu_is_watching()) {
2867 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2868 		return;
2869 	}
2870 
2871 	/*
2872 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2873 	 * but if the above rcu_is_watching() failed, then the NMI
2874 	 * triggered someplace critical, and rcu_irq_enter() should
2875 	 * not be called from NMI.
2876 	 */
2877 	if (unlikely(in_nmi()))
2878 		return;
2879 
2880 	rcu_irq_enter_irqson();
2881 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2882 	rcu_irq_exit_irqson();
2883 }
2884 
2885 /**
2886  * trace_dump_stack - record a stack back trace in the trace buffer
2887  * @skip: Number of functions to skip (helper handlers)
2888  */
2889 void trace_dump_stack(int skip)
2890 {
2891 	unsigned long flags;
2892 
2893 	if (tracing_disabled || tracing_selftest_running)
2894 		return;
2895 
2896 	local_save_flags(flags);
2897 
2898 #ifndef CONFIG_UNWINDER_ORC
2899 	/* Skip 1 to skip this function. */
2900 	skip++;
2901 #endif
2902 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2903 			     flags, skip, preempt_count(), NULL);
2904 }
2905 EXPORT_SYMBOL_GPL(trace_dump_stack);
2906 
2907 static DEFINE_PER_CPU(int, user_stack_count);
2908 
2909 void
2910 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2911 {
2912 	struct trace_event_call *call = &event_user_stack;
2913 	struct ring_buffer_event *event;
2914 	struct userstack_entry *entry;
2915 	struct stack_trace trace;
2916 
2917 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2918 		return;
2919 
2920 	/*
2921 	 * NMIs can not handle page faults, even with fix ups.
2922 	 * The save user stack can (and often does) fault.
2923 	 */
2924 	if (unlikely(in_nmi()))
2925 		return;
2926 
2927 	/*
2928 	 * prevent recursion, since the user stack tracing may
2929 	 * trigger other kernel events.
2930 	 */
2931 	preempt_disable();
2932 	if (__this_cpu_read(user_stack_count))
2933 		goto out;
2934 
2935 	__this_cpu_inc(user_stack_count);
2936 
2937 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2938 					    sizeof(*entry), flags, pc);
2939 	if (!event)
2940 		goto out_drop_count;
2941 	entry	= ring_buffer_event_data(event);
2942 
2943 	entry->tgid		= current->tgid;
2944 	memset(&entry->caller, 0, sizeof(entry->caller));
2945 
2946 	trace.nr_entries	= 0;
2947 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2948 	trace.skip		= 0;
2949 	trace.entries		= entry->caller;
2950 
2951 	save_stack_trace_user(&trace);
2952 	if (!call_filter_check_discard(call, entry, buffer, event))
2953 		__buffer_unlock_commit(buffer, event);
2954 
2955  out_drop_count:
2956 	__this_cpu_dec(user_stack_count);
2957  out:
2958 	preempt_enable();
2959 }
2960 
2961 #ifdef UNUSED
2962 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2963 {
2964 	ftrace_trace_userstack(tr, flags, preempt_count());
2965 }
2966 #endif /* UNUSED */
2967 
2968 #endif /* CONFIG_STACKTRACE */
2969 
2970 /* created for use with alloc_percpu */
2971 struct trace_buffer_struct {
2972 	int nesting;
2973 	char buffer[4][TRACE_BUF_SIZE];
2974 };
2975 
2976 static struct trace_buffer_struct *trace_percpu_buffer;
2977 
2978 /*
2979  * Thise allows for lockless recording.  If we're nested too deeply, then
2980  * this returns NULL.
2981  */
2982 static char *get_trace_buf(void)
2983 {
2984 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2985 
2986 	if (!buffer || buffer->nesting >= 4)
2987 		return NULL;
2988 
2989 	buffer->nesting++;
2990 
2991 	/* Interrupts must see nesting incremented before we use the buffer */
2992 	barrier();
2993 	return &buffer->buffer[buffer->nesting][0];
2994 }
2995 
2996 static void put_trace_buf(void)
2997 {
2998 	/* Don't let the decrement of nesting leak before this */
2999 	barrier();
3000 	this_cpu_dec(trace_percpu_buffer->nesting);
3001 }
3002 
3003 static int alloc_percpu_trace_buffer(void)
3004 {
3005 	struct trace_buffer_struct *buffers;
3006 
3007 	buffers = alloc_percpu(struct trace_buffer_struct);
3008 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3009 		return -ENOMEM;
3010 
3011 	trace_percpu_buffer = buffers;
3012 	return 0;
3013 }
3014 
3015 static int buffers_allocated;
3016 
3017 void trace_printk_init_buffers(void)
3018 {
3019 	if (buffers_allocated)
3020 		return;
3021 
3022 	if (alloc_percpu_trace_buffer())
3023 		return;
3024 
3025 	/* trace_printk() is for debug use only. Don't use it in production. */
3026 
3027 	pr_warn("\n");
3028 	pr_warn("**********************************************************\n");
3029 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3030 	pr_warn("**                                                      **\n");
3031 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3032 	pr_warn("**                                                      **\n");
3033 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3034 	pr_warn("** unsafe for production use.                           **\n");
3035 	pr_warn("**                                                      **\n");
3036 	pr_warn("** If you see this message and you are not debugging    **\n");
3037 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3038 	pr_warn("**                                                      **\n");
3039 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3040 	pr_warn("**********************************************************\n");
3041 
3042 	/* Expand the buffers to set size */
3043 	tracing_update_buffers();
3044 
3045 	buffers_allocated = 1;
3046 
3047 	/*
3048 	 * trace_printk_init_buffers() can be called by modules.
3049 	 * If that happens, then we need to start cmdline recording
3050 	 * directly here. If the global_trace.buffer is already
3051 	 * allocated here, then this was called by module code.
3052 	 */
3053 	if (global_trace.trace_buffer.buffer)
3054 		tracing_start_cmdline_record();
3055 }
3056 
3057 void trace_printk_start_comm(void)
3058 {
3059 	/* Start tracing comms if trace printk is set */
3060 	if (!buffers_allocated)
3061 		return;
3062 	tracing_start_cmdline_record();
3063 }
3064 
3065 static void trace_printk_start_stop_comm(int enabled)
3066 {
3067 	if (!buffers_allocated)
3068 		return;
3069 
3070 	if (enabled)
3071 		tracing_start_cmdline_record();
3072 	else
3073 		tracing_stop_cmdline_record();
3074 }
3075 
3076 /**
3077  * trace_vbprintk - write binary msg to tracing buffer
3078  *
3079  */
3080 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3081 {
3082 	struct trace_event_call *call = &event_bprint;
3083 	struct ring_buffer_event *event;
3084 	struct ring_buffer *buffer;
3085 	struct trace_array *tr = &global_trace;
3086 	struct bprint_entry *entry;
3087 	unsigned long flags;
3088 	char *tbuffer;
3089 	int len = 0, size, pc;
3090 
3091 	if (unlikely(tracing_selftest_running || tracing_disabled))
3092 		return 0;
3093 
3094 	/* Don't pollute graph traces with trace_vprintk internals */
3095 	pause_graph_tracing();
3096 
3097 	pc = preempt_count();
3098 	preempt_disable_notrace();
3099 
3100 	tbuffer = get_trace_buf();
3101 	if (!tbuffer) {
3102 		len = 0;
3103 		goto out_nobuffer;
3104 	}
3105 
3106 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3107 
3108 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3109 		goto out;
3110 
3111 	local_save_flags(flags);
3112 	size = sizeof(*entry) + sizeof(u32) * len;
3113 	buffer = tr->trace_buffer.buffer;
3114 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3115 					    flags, pc);
3116 	if (!event)
3117 		goto out;
3118 	entry = ring_buffer_event_data(event);
3119 	entry->ip			= ip;
3120 	entry->fmt			= fmt;
3121 
3122 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3123 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3124 		__buffer_unlock_commit(buffer, event);
3125 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3126 	}
3127 
3128 out:
3129 	put_trace_buf();
3130 
3131 out_nobuffer:
3132 	preempt_enable_notrace();
3133 	unpause_graph_tracing();
3134 
3135 	return len;
3136 }
3137 EXPORT_SYMBOL_GPL(trace_vbprintk);
3138 
3139 __printf(3, 0)
3140 static int
3141 __trace_array_vprintk(struct ring_buffer *buffer,
3142 		      unsigned long ip, const char *fmt, va_list args)
3143 {
3144 	struct trace_event_call *call = &event_print;
3145 	struct ring_buffer_event *event;
3146 	int len = 0, size, pc;
3147 	struct print_entry *entry;
3148 	unsigned long flags;
3149 	char *tbuffer;
3150 
3151 	if (tracing_disabled || tracing_selftest_running)
3152 		return 0;
3153 
3154 	/* Don't pollute graph traces with trace_vprintk internals */
3155 	pause_graph_tracing();
3156 
3157 	pc = preempt_count();
3158 	preempt_disable_notrace();
3159 
3160 
3161 	tbuffer = get_trace_buf();
3162 	if (!tbuffer) {
3163 		len = 0;
3164 		goto out_nobuffer;
3165 	}
3166 
3167 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3168 
3169 	local_save_flags(flags);
3170 	size = sizeof(*entry) + len + 1;
3171 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3172 					    flags, pc);
3173 	if (!event)
3174 		goto out;
3175 	entry = ring_buffer_event_data(event);
3176 	entry->ip = ip;
3177 
3178 	memcpy(&entry->buf, tbuffer, len + 1);
3179 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3180 		__buffer_unlock_commit(buffer, event);
3181 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3182 	}
3183 
3184 out:
3185 	put_trace_buf();
3186 
3187 out_nobuffer:
3188 	preempt_enable_notrace();
3189 	unpause_graph_tracing();
3190 
3191 	return len;
3192 }
3193 
3194 __printf(3, 0)
3195 int trace_array_vprintk(struct trace_array *tr,
3196 			unsigned long ip, const char *fmt, va_list args)
3197 {
3198 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3199 }
3200 
3201 __printf(3, 0)
3202 int trace_array_printk(struct trace_array *tr,
3203 		       unsigned long ip, const char *fmt, ...)
3204 {
3205 	int ret;
3206 	va_list ap;
3207 
3208 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3209 		return 0;
3210 
3211 	va_start(ap, fmt);
3212 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3213 	va_end(ap);
3214 	return ret;
3215 }
3216 
3217 __printf(3, 4)
3218 int trace_array_printk_buf(struct ring_buffer *buffer,
3219 			   unsigned long ip, const char *fmt, ...)
3220 {
3221 	int ret;
3222 	va_list ap;
3223 
3224 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225 		return 0;
3226 
3227 	va_start(ap, fmt);
3228 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229 	va_end(ap);
3230 	return ret;
3231 }
3232 
3233 __printf(2, 0)
3234 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235 {
3236 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vprintk);
3239 
3240 static void trace_iterator_increment(struct trace_iterator *iter)
3241 {
3242 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243 
3244 	iter->idx++;
3245 	if (buf_iter)
3246 		ring_buffer_read(buf_iter, NULL);
3247 }
3248 
3249 static struct trace_entry *
3250 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251 		unsigned long *lost_events)
3252 {
3253 	struct ring_buffer_event *event;
3254 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255 
3256 	if (buf_iter)
3257 		event = ring_buffer_iter_peek(buf_iter, ts);
3258 	else
3259 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260 					 lost_events);
3261 
3262 	if (event) {
3263 		iter->ent_size = ring_buffer_event_length(event);
3264 		return ring_buffer_event_data(event);
3265 	}
3266 	iter->ent_size = 0;
3267 	return NULL;
3268 }
3269 
3270 static struct trace_entry *
3271 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272 		  unsigned long *missing_events, u64 *ent_ts)
3273 {
3274 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275 	struct trace_entry *ent, *next = NULL;
3276 	unsigned long lost_events = 0, next_lost = 0;
3277 	int cpu_file = iter->cpu_file;
3278 	u64 next_ts = 0, ts;
3279 	int next_cpu = -1;
3280 	int next_size = 0;
3281 	int cpu;
3282 
3283 	/*
3284 	 * If we are in a per_cpu trace file, don't bother by iterating over
3285 	 * all cpu and peek directly.
3286 	 */
3287 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3289 			return NULL;
3290 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291 		if (ent_cpu)
3292 			*ent_cpu = cpu_file;
3293 
3294 		return ent;
3295 	}
3296 
3297 	for_each_tracing_cpu(cpu) {
3298 
3299 		if (ring_buffer_empty_cpu(buffer, cpu))
3300 			continue;
3301 
3302 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303 
3304 		/*
3305 		 * Pick the entry with the smallest timestamp:
3306 		 */
3307 		if (ent && (!next || ts < next_ts)) {
3308 			next = ent;
3309 			next_cpu = cpu;
3310 			next_ts = ts;
3311 			next_lost = lost_events;
3312 			next_size = iter->ent_size;
3313 		}
3314 	}
3315 
3316 	iter->ent_size = next_size;
3317 
3318 	if (ent_cpu)
3319 		*ent_cpu = next_cpu;
3320 
3321 	if (ent_ts)
3322 		*ent_ts = next_ts;
3323 
3324 	if (missing_events)
3325 		*missing_events = next_lost;
3326 
3327 	return next;
3328 }
3329 
3330 /* Find the next real entry, without updating the iterator itself */
3331 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332 					  int *ent_cpu, u64 *ent_ts)
3333 {
3334 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335 }
3336 
3337 /* Find the next real entry, and increment the iterator to the next entry */
3338 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339 {
3340 	iter->ent = __find_next_entry(iter, &iter->cpu,
3341 				      &iter->lost_events, &iter->ts);
3342 
3343 	if (iter->ent)
3344 		trace_iterator_increment(iter);
3345 
3346 	return iter->ent ? iter : NULL;
3347 }
3348 
3349 static void trace_consume(struct trace_iterator *iter)
3350 {
3351 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352 			    &iter->lost_events);
3353 }
3354 
3355 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356 {
3357 	struct trace_iterator *iter = m->private;
3358 	int i = (int)*pos;
3359 	void *ent;
3360 
3361 	WARN_ON_ONCE(iter->leftover);
3362 
3363 	(*pos)++;
3364 
3365 	/* can't go backwards */
3366 	if (iter->idx > i)
3367 		return NULL;
3368 
3369 	if (iter->idx < 0)
3370 		ent = trace_find_next_entry_inc(iter);
3371 	else
3372 		ent = iter;
3373 
3374 	while (ent && iter->idx < i)
3375 		ent = trace_find_next_entry_inc(iter);
3376 
3377 	iter->pos = *pos;
3378 
3379 	return ent;
3380 }
3381 
3382 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383 {
3384 	struct ring_buffer_event *event;
3385 	struct ring_buffer_iter *buf_iter;
3386 	unsigned long entries = 0;
3387 	u64 ts;
3388 
3389 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390 
3391 	buf_iter = trace_buffer_iter(iter, cpu);
3392 	if (!buf_iter)
3393 		return;
3394 
3395 	ring_buffer_iter_reset(buf_iter);
3396 
3397 	/*
3398 	 * We could have the case with the max latency tracers
3399 	 * that a reset never took place on a cpu. This is evident
3400 	 * by the timestamp being before the start of the buffer.
3401 	 */
3402 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403 		if (ts >= iter->trace_buffer->time_start)
3404 			break;
3405 		entries++;
3406 		ring_buffer_read(buf_iter, NULL);
3407 	}
3408 
3409 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410 }
3411 
3412 /*
3413  * The current tracer is copied to avoid a global locking
3414  * all around.
3415  */
3416 static void *s_start(struct seq_file *m, loff_t *pos)
3417 {
3418 	struct trace_iterator *iter = m->private;
3419 	struct trace_array *tr = iter->tr;
3420 	int cpu_file = iter->cpu_file;
3421 	void *p = NULL;
3422 	loff_t l = 0;
3423 	int cpu;
3424 
3425 	/*
3426 	 * copy the tracer to avoid using a global lock all around.
3427 	 * iter->trace is a copy of current_trace, the pointer to the
3428 	 * name may be used instead of a strcmp(), as iter->trace->name
3429 	 * will point to the same string as current_trace->name.
3430 	 */
3431 	mutex_lock(&trace_types_lock);
3432 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433 		*iter->trace = *tr->current_trace;
3434 	mutex_unlock(&trace_types_lock);
3435 
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437 	if (iter->snapshot && iter->trace->use_max_tr)
3438 		return ERR_PTR(-EBUSY);
3439 #endif
3440 
3441 	if (!iter->snapshot)
3442 		atomic_inc(&trace_record_taskinfo_disabled);
3443 
3444 	if (*pos != iter->pos) {
3445 		iter->ent = NULL;
3446 		iter->cpu = 0;
3447 		iter->idx = -1;
3448 
3449 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450 			for_each_tracing_cpu(cpu)
3451 				tracing_iter_reset(iter, cpu);
3452 		} else
3453 			tracing_iter_reset(iter, cpu_file);
3454 
3455 		iter->leftover = 0;
3456 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457 			;
3458 
3459 	} else {
3460 		/*
3461 		 * If we overflowed the seq_file before, then we want
3462 		 * to just reuse the trace_seq buffer again.
3463 		 */
3464 		if (iter->leftover)
3465 			p = iter;
3466 		else {
3467 			l = *pos - 1;
3468 			p = s_next(m, p, &l);
3469 		}
3470 	}
3471 
3472 	trace_event_read_lock();
3473 	trace_access_lock(cpu_file);
3474 	return p;
3475 }
3476 
3477 static void s_stop(struct seq_file *m, void *p)
3478 {
3479 	struct trace_iterator *iter = m->private;
3480 
3481 #ifdef CONFIG_TRACER_MAX_TRACE
3482 	if (iter->snapshot && iter->trace->use_max_tr)
3483 		return;
3484 #endif
3485 
3486 	if (!iter->snapshot)
3487 		atomic_dec(&trace_record_taskinfo_disabled);
3488 
3489 	trace_access_unlock(iter->cpu_file);
3490 	trace_event_read_unlock();
3491 }
3492 
3493 static void
3494 get_total_entries(struct trace_buffer *buf,
3495 		  unsigned long *total, unsigned long *entries)
3496 {
3497 	unsigned long count;
3498 	int cpu;
3499 
3500 	*total = 0;
3501 	*entries = 0;
3502 
3503 	for_each_tracing_cpu(cpu) {
3504 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3505 		/*
3506 		 * If this buffer has skipped entries, then we hold all
3507 		 * entries for the trace and we need to ignore the
3508 		 * ones before the time stamp.
3509 		 */
3510 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3511 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3512 			/* total is the same as the entries */
3513 			*total += count;
3514 		} else
3515 			*total += count +
3516 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3517 		*entries += count;
3518 	}
3519 }
3520 
3521 static void print_lat_help_header(struct seq_file *m)
3522 {
3523 	seq_puts(m, "#                  _------=> CPU#            \n"
3524 		    "#                 / _-----=> irqs-off        \n"
3525 		    "#                | / _----=> need-resched    \n"
3526 		    "#                || / _---=> hardirq/softirq \n"
3527 		    "#                ||| / _--=> preempt-depth   \n"
3528 		    "#                |||| /     delay            \n"
3529 		    "#  cmd     pid   ||||| time  |   caller      \n"
3530 		    "#     \\   /      |||||  \\    |   /         \n");
3531 }
3532 
3533 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3534 {
3535 	unsigned long total;
3536 	unsigned long entries;
3537 
3538 	get_total_entries(buf, &total, &entries);
3539 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3540 		   entries, total, num_online_cpus());
3541 	seq_puts(m, "#\n");
3542 }
3543 
3544 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3545 				   unsigned int flags)
3546 {
3547 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3548 
3549 	print_event_info(buf, m);
3550 
3551 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3552 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3553 }
3554 
3555 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3556 				       unsigned int flags)
3557 {
3558 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3559 	const char tgid_space[] = "          ";
3560 	const char space[] = "  ";
3561 
3562 	print_event_info(buf, m);
3563 
3564 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3565 		   tgid ? tgid_space : space);
3566 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3567 		   tgid ? tgid_space : space);
3568 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3569 		   tgid ? tgid_space : space);
3570 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3571 		   tgid ? tgid_space : space);
3572 	seq_printf(m, "#                          %s||| /     delay\n",
3573 		   tgid ? tgid_space : space);
3574 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3575 		   tgid ? "   TGID   " : space);
3576 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3577 		   tgid ? "     |    " : space);
3578 }
3579 
3580 void
3581 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3582 {
3583 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3584 	struct trace_buffer *buf = iter->trace_buffer;
3585 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3586 	struct tracer *type = iter->trace;
3587 	unsigned long entries;
3588 	unsigned long total;
3589 	const char *name = "preemption";
3590 
3591 	name = type->name;
3592 
3593 	get_total_entries(buf, &total, &entries);
3594 
3595 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3596 		   name, UTS_RELEASE);
3597 	seq_puts(m, "# -----------------------------------"
3598 		 "---------------------------------\n");
3599 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3600 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3601 		   nsecs_to_usecs(data->saved_latency),
3602 		   entries,
3603 		   total,
3604 		   buf->cpu,
3605 #if defined(CONFIG_PREEMPT_NONE)
3606 		   "server",
3607 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3608 		   "desktop",
3609 #elif defined(CONFIG_PREEMPT)
3610 		   "preempt",
3611 #else
3612 		   "unknown",
3613 #endif
3614 		   /* These are reserved for later use */
3615 		   0, 0, 0, 0);
3616 #ifdef CONFIG_SMP
3617 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3618 #else
3619 	seq_puts(m, ")\n");
3620 #endif
3621 	seq_puts(m, "#    -----------------\n");
3622 	seq_printf(m, "#    | task: %.16s-%d "
3623 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3624 		   data->comm, data->pid,
3625 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3626 		   data->policy, data->rt_priority);
3627 	seq_puts(m, "#    -----------------\n");
3628 
3629 	if (data->critical_start) {
3630 		seq_puts(m, "#  => started at: ");
3631 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3632 		trace_print_seq(m, &iter->seq);
3633 		seq_puts(m, "\n#  => ended at:   ");
3634 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3635 		trace_print_seq(m, &iter->seq);
3636 		seq_puts(m, "\n#\n");
3637 	}
3638 
3639 	seq_puts(m, "#\n");
3640 }
3641 
3642 static void test_cpu_buff_start(struct trace_iterator *iter)
3643 {
3644 	struct trace_seq *s = &iter->seq;
3645 	struct trace_array *tr = iter->tr;
3646 
3647 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3648 		return;
3649 
3650 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3651 		return;
3652 
3653 	if (cpumask_available(iter->started) &&
3654 	    cpumask_test_cpu(iter->cpu, iter->started))
3655 		return;
3656 
3657 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3658 		return;
3659 
3660 	if (cpumask_available(iter->started))
3661 		cpumask_set_cpu(iter->cpu, iter->started);
3662 
3663 	/* Don't print started cpu buffer for the first entry of the trace */
3664 	if (iter->idx > 1)
3665 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3666 				iter->cpu);
3667 }
3668 
3669 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3670 {
3671 	struct trace_array *tr = iter->tr;
3672 	struct trace_seq *s = &iter->seq;
3673 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3674 	struct trace_entry *entry;
3675 	struct trace_event *event;
3676 
3677 	entry = iter->ent;
3678 
3679 	test_cpu_buff_start(iter);
3680 
3681 	event = ftrace_find_event(entry->type);
3682 
3683 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3684 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3685 			trace_print_lat_context(iter);
3686 		else
3687 			trace_print_context(iter);
3688 	}
3689 
3690 	if (trace_seq_has_overflowed(s))
3691 		return TRACE_TYPE_PARTIAL_LINE;
3692 
3693 	if (event)
3694 		return event->funcs->trace(iter, sym_flags, event);
3695 
3696 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3697 
3698 	return trace_handle_return(s);
3699 }
3700 
3701 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3702 {
3703 	struct trace_array *tr = iter->tr;
3704 	struct trace_seq *s = &iter->seq;
3705 	struct trace_entry *entry;
3706 	struct trace_event *event;
3707 
3708 	entry = iter->ent;
3709 
3710 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3711 		trace_seq_printf(s, "%d %d %llu ",
3712 				 entry->pid, iter->cpu, iter->ts);
3713 
3714 	if (trace_seq_has_overflowed(s))
3715 		return TRACE_TYPE_PARTIAL_LINE;
3716 
3717 	event = ftrace_find_event(entry->type);
3718 	if (event)
3719 		return event->funcs->raw(iter, 0, event);
3720 
3721 	trace_seq_printf(s, "%d ?\n", entry->type);
3722 
3723 	return trace_handle_return(s);
3724 }
3725 
3726 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3727 {
3728 	struct trace_array *tr = iter->tr;
3729 	struct trace_seq *s = &iter->seq;
3730 	unsigned char newline = '\n';
3731 	struct trace_entry *entry;
3732 	struct trace_event *event;
3733 
3734 	entry = iter->ent;
3735 
3736 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3737 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3738 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3739 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3740 		if (trace_seq_has_overflowed(s))
3741 			return TRACE_TYPE_PARTIAL_LINE;
3742 	}
3743 
3744 	event = ftrace_find_event(entry->type);
3745 	if (event) {
3746 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3747 		if (ret != TRACE_TYPE_HANDLED)
3748 			return ret;
3749 	}
3750 
3751 	SEQ_PUT_FIELD(s, newline);
3752 
3753 	return trace_handle_return(s);
3754 }
3755 
3756 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3757 {
3758 	struct trace_array *tr = iter->tr;
3759 	struct trace_seq *s = &iter->seq;
3760 	struct trace_entry *entry;
3761 	struct trace_event *event;
3762 
3763 	entry = iter->ent;
3764 
3765 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3766 		SEQ_PUT_FIELD(s, entry->pid);
3767 		SEQ_PUT_FIELD(s, iter->cpu);
3768 		SEQ_PUT_FIELD(s, iter->ts);
3769 		if (trace_seq_has_overflowed(s))
3770 			return TRACE_TYPE_PARTIAL_LINE;
3771 	}
3772 
3773 	event = ftrace_find_event(entry->type);
3774 	return event ? event->funcs->binary(iter, 0, event) :
3775 		TRACE_TYPE_HANDLED;
3776 }
3777 
3778 int trace_empty(struct trace_iterator *iter)
3779 {
3780 	struct ring_buffer_iter *buf_iter;
3781 	int cpu;
3782 
3783 	/* If we are looking at one CPU buffer, only check that one */
3784 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3785 		cpu = iter->cpu_file;
3786 		buf_iter = trace_buffer_iter(iter, cpu);
3787 		if (buf_iter) {
3788 			if (!ring_buffer_iter_empty(buf_iter))
3789 				return 0;
3790 		} else {
3791 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3792 				return 0;
3793 		}
3794 		return 1;
3795 	}
3796 
3797 	for_each_tracing_cpu(cpu) {
3798 		buf_iter = trace_buffer_iter(iter, cpu);
3799 		if (buf_iter) {
3800 			if (!ring_buffer_iter_empty(buf_iter))
3801 				return 0;
3802 		} else {
3803 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3804 				return 0;
3805 		}
3806 	}
3807 
3808 	return 1;
3809 }
3810 
3811 /*  Called with trace_event_read_lock() held. */
3812 enum print_line_t print_trace_line(struct trace_iterator *iter)
3813 {
3814 	struct trace_array *tr = iter->tr;
3815 	unsigned long trace_flags = tr->trace_flags;
3816 	enum print_line_t ret;
3817 
3818 	if (iter->lost_events) {
3819 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3820 				 iter->cpu, iter->lost_events);
3821 		if (trace_seq_has_overflowed(&iter->seq))
3822 			return TRACE_TYPE_PARTIAL_LINE;
3823 	}
3824 
3825 	if (iter->trace && iter->trace->print_line) {
3826 		ret = iter->trace->print_line(iter);
3827 		if (ret != TRACE_TYPE_UNHANDLED)
3828 			return ret;
3829 	}
3830 
3831 	if (iter->ent->type == TRACE_BPUTS &&
3832 			trace_flags & TRACE_ITER_PRINTK &&
3833 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3834 		return trace_print_bputs_msg_only(iter);
3835 
3836 	if (iter->ent->type == TRACE_BPRINT &&
3837 			trace_flags & TRACE_ITER_PRINTK &&
3838 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3839 		return trace_print_bprintk_msg_only(iter);
3840 
3841 	if (iter->ent->type == TRACE_PRINT &&
3842 			trace_flags & TRACE_ITER_PRINTK &&
3843 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3844 		return trace_print_printk_msg_only(iter);
3845 
3846 	if (trace_flags & TRACE_ITER_BIN)
3847 		return print_bin_fmt(iter);
3848 
3849 	if (trace_flags & TRACE_ITER_HEX)
3850 		return print_hex_fmt(iter);
3851 
3852 	if (trace_flags & TRACE_ITER_RAW)
3853 		return print_raw_fmt(iter);
3854 
3855 	return print_trace_fmt(iter);
3856 }
3857 
3858 void trace_latency_header(struct seq_file *m)
3859 {
3860 	struct trace_iterator *iter = m->private;
3861 	struct trace_array *tr = iter->tr;
3862 
3863 	/* print nothing if the buffers are empty */
3864 	if (trace_empty(iter))
3865 		return;
3866 
3867 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3868 		print_trace_header(m, iter);
3869 
3870 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3871 		print_lat_help_header(m);
3872 }
3873 
3874 void trace_default_header(struct seq_file *m)
3875 {
3876 	struct trace_iterator *iter = m->private;
3877 	struct trace_array *tr = iter->tr;
3878 	unsigned long trace_flags = tr->trace_flags;
3879 
3880 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3881 		return;
3882 
3883 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3884 		/* print nothing if the buffers are empty */
3885 		if (trace_empty(iter))
3886 			return;
3887 		print_trace_header(m, iter);
3888 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3889 			print_lat_help_header(m);
3890 	} else {
3891 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3892 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3893 				print_func_help_header_irq(iter->trace_buffer,
3894 							   m, trace_flags);
3895 			else
3896 				print_func_help_header(iter->trace_buffer, m,
3897 						       trace_flags);
3898 		}
3899 	}
3900 }
3901 
3902 static void test_ftrace_alive(struct seq_file *m)
3903 {
3904 	if (!ftrace_is_dead())
3905 		return;
3906 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3907 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3908 }
3909 
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911 static void show_snapshot_main_help(struct seq_file *m)
3912 {
3913 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3914 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3915 		    "#                      Takes a snapshot of the main buffer.\n"
3916 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3917 		    "#                      (Doesn't have to be '2' works with any number that\n"
3918 		    "#                       is not a '0' or '1')\n");
3919 }
3920 
3921 static void show_snapshot_percpu_help(struct seq_file *m)
3922 {
3923 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3924 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3925 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3926 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3927 #else
3928 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3929 		    "#                     Must use main snapshot file to allocate.\n");
3930 #endif
3931 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3932 		    "#                      (Doesn't have to be '2' works with any number that\n"
3933 		    "#                       is not a '0' or '1')\n");
3934 }
3935 
3936 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3937 {
3938 	if (iter->tr->allocated_snapshot)
3939 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3940 	else
3941 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3942 
3943 	seq_puts(m, "# Snapshot commands:\n");
3944 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3945 		show_snapshot_main_help(m);
3946 	else
3947 		show_snapshot_percpu_help(m);
3948 }
3949 #else
3950 /* Should never be called */
3951 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3952 #endif
3953 
3954 static int s_show(struct seq_file *m, void *v)
3955 {
3956 	struct trace_iterator *iter = v;
3957 	int ret;
3958 
3959 	if (iter->ent == NULL) {
3960 		if (iter->tr) {
3961 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3962 			seq_puts(m, "#\n");
3963 			test_ftrace_alive(m);
3964 		}
3965 		if (iter->snapshot && trace_empty(iter))
3966 			print_snapshot_help(m, iter);
3967 		else if (iter->trace && iter->trace->print_header)
3968 			iter->trace->print_header(m);
3969 		else
3970 			trace_default_header(m);
3971 
3972 	} else if (iter->leftover) {
3973 		/*
3974 		 * If we filled the seq_file buffer earlier, we
3975 		 * want to just show it now.
3976 		 */
3977 		ret = trace_print_seq(m, &iter->seq);
3978 
3979 		/* ret should this time be zero, but you never know */
3980 		iter->leftover = ret;
3981 
3982 	} else {
3983 		print_trace_line(iter);
3984 		ret = trace_print_seq(m, &iter->seq);
3985 		/*
3986 		 * If we overflow the seq_file buffer, then it will
3987 		 * ask us for this data again at start up.
3988 		 * Use that instead.
3989 		 *  ret is 0 if seq_file write succeeded.
3990 		 *        -1 otherwise.
3991 		 */
3992 		iter->leftover = ret;
3993 	}
3994 
3995 	return 0;
3996 }
3997 
3998 /*
3999  * Should be used after trace_array_get(), trace_types_lock
4000  * ensures that i_cdev was already initialized.
4001  */
4002 static inline int tracing_get_cpu(struct inode *inode)
4003 {
4004 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4005 		return (long)inode->i_cdev - 1;
4006 	return RING_BUFFER_ALL_CPUS;
4007 }
4008 
4009 static const struct seq_operations tracer_seq_ops = {
4010 	.start		= s_start,
4011 	.next		= s_next,
4012 	.stop		= s_stop,
4013 	.show		= s_show,
4014 };
4015 
4016 static struct trace_iterator *
4017 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4018 {
4019 	struct trace_array *tr = inode->i_private;
4020 	struct trace_iterator *iter;
4021 	int cpu;
4022 
4023 	if (tracing_disabled)
4024 		return ERR_PTR(-ENODEV);
4025 
4026 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4027 	if (!iter)
4028 		return ERR_PTR(-ENOMEM);
4029 
4030 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4031 				    GFP_KERNEL);
4032 	if (!iter->buffer_iter)
4033 		goto release;
4034 
4035 	/*
4036 	 * We make a copy of the current tracer to avoid concurrent
4037 	 * changes on it while we are reading.
4038 	 */
4039 	mutex_lock(&trace_types_lock);
4040 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4041 	if (!iter->trace)
4042 		goto fail;
4043 
4044 	*iter->trace = *tr->current_trace;
4045 
4046 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4047 		goto fail;
4048 
4049 	iter->tr = tr;
4050 
4051 #ifdef CONFIG_TRACER_MAX_TRACE
4052 	/* Currently only the top directory has a snapshot */
4053 	if (tr->current_trace->print_max || snapshot)
4054 		iter->trace_buffer = &tr->max_buffer;
4055 	else
4056 #endif
4057 		iter->trace_buffer = &tr->trace_buffer;
4058 	iter->snapshot = snapshot;
4059 	iter->pos = -1;
4060 	iter->cpu_file = tracing_get_cpu(inode);
4061 	mutex_init(&iter->mutex);
4062 
4063 	/* Notify the tracer early; before we stop tracing. */
4064 	if (iter->trace && iter->trace->open)
4065 		iter->trace->open(iter);
4066 
4067 	/* Annotate start of buffers if we had overruns */
4068 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
4069 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4070 
4071 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4072 	if (trace_clocks[tr->clock_id].in_ns)
4073 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4074 
4075 	/* stop the trace while dumping if we are not opening "snapshot" */
4076 	if (!iter->snapshot)
4077 		tracing_stop_tr(tr);
4078 
4079 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4080 		for_each_tracing_cpu(cpu) {
4081 			iter->buffer_iter[cpu] =
4082 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
4083 		}
4084 		ring_buffer_read_prepare_sync();
4085 		for_each_tracing_cpu(cpu) {
4086 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4087 			tracing_iter_reset(iter, cpu);
4088 		}
4089 	} else {
4090 		cpu = iter->cpu_file;
4091 		iter->buffer_iter[cpu] =
4092 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
4093 		ring_buffer_read_prepare_sync();
4094 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4095 		tracing_iter_reset(iter, cpu);
4096 	}
4097 
4098 	mutex_unlock(&trace_types_lock);
4099 
4100 	return iter;
4101 
4102  fail:
4103 	mutex_unlock(&trace_types_lock);
4104 	kfree(iter->trace);
4105 	kfree(iter->buffer_iter);
4106 release:
4107 	seq_release_private(inode, file);
4108 	return ERR_PTR(-ENOMEM);
4109 }
4110 
4111 int tracing_open_generic(struct inode *inode, struct file *filp)
4112 {
4113 	if (tracing_disabled)
4114 		return -ENODEV;
4115 
4116 	filp->private_data = inode->i_private;
4117 	return 0;
4118 }
4119 
4120 bool tracing_is_disabled(void)
4121 {
4122 	return (tracing_disabled) ? true: false;
4123 }
4124 
4125 /*
4126  * Open and update trace_array ref count.
4127  * Must have the current trace_array passed to it.
4128  */
4129 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4130 {
4131 	struct trace_array *tr = inode->i_private;
4132 
4133 	if (tracing_disabled)
4134 		return -ENODEV;
4135 
4136 	if (trace_array_get(tr) < 0)
4137 		return -ENODEV;
4138 
4139 	filp->private_data = inode->i_private;
4140 
4141 	return 0;
4142 }
4143 
4144 static int tracing_release(struct inode *inode, struct file *file)
4145 {
4146 	struct trace_array *tr = inode->i_private;
4147 	struct seq_file *m = file->private_data;
4148 	struct trace_iterator *iter;
4149 	int cpu;
4150 
4151 	if (!(file->f_mode & FMODE_READ)) {
4152 		trace_array_put(tr);
4153 		return 0;
4154 	}
4155 
4156 	/* Writes do not use seq_file */
4157 	iter = m->private;
4158 	mutex_lock(&trace_types_lock);
4159 
4160 	for_each_tracing_cpu(cpu) {
4161 		if (iter->buffer_iter[cpu])
4162 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4163 	}
4164 
4165 	if (iter->trace && iter->trace->close)
4166 		iter->trace->close(iter);
4167 
4168 	if (!iter->snapshot)
4169 		/* reenable tracing if it was previously enabled */
4170 		tracing_start_tr(tr);
4171 
4172 	__trace_array_put(tr);
4173 
4174 	mutex_unlock(&trace_types_lock);
4175 
4176 	mutex_destroy(&iter->mutex);
4177 	free_cpumask_var(iter->started);
4178 	kfree(iter->trace);
4179 	kfree(iter->buffer_iter);
4180 	seq_release_private(inode, file);
4181 
4182 	return 0;
4183 }
4184 
4185 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4186 {
4187 	struct trace_array *tr = inode->i_private;
4188 
4189 	trace_array_put(tr);
4190 	return 0;
4191 }
4192 
4193 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4194 {
4195 	struct trace_array *tr = inode->i_private;
4196 
4197 	trace_array_put(tr);
4198 
4199 	return single_release(inode, file);
4200 }
4201 
4202 static int tracing_open(struct inode *inode, struct file *file)
4203 {
4204 	struct trace_array *tr = inode->i_private;
4205 	struct trace_iterator *iter;
4206 	int ret = 0;
4207 
4208 	if (trace_array_get(tr) < 0)
4209 		return -ENODEV;
4210 
4211 	/* If this file was open for write, then erase contents */
4212 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4213 		int cpu = tracing_get_cpu(inode);
4214 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4215 
4216 #ifdef CONFIG_TRACER_MAX_TRACE
4217 		if (tr->current_trace->print_max)
4218 			trace_buf = &tr->max_buffer;
4219 #endif
4220 
4221 		if (cpu == RING_BUFFER_ALL_CPUS)
4222 			tracing_reset_online_cpus(trace_buf);
4223 		else
4224 			tracing_reset(trace_buf, cpu);
4225 	}
4226 
4227 	if (file->f_mode & FMODE_READ) {
4228 		iter = __tracing_open(inode, file, false);
4229 		if (IS_ERR(iter))
4230 			ret = PTR_ERR(iter);
4231 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4232 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4233 	}
4234 
4235 	if (ret < 0)
4236 		trace_array_put(tr);
4237 
4238 	return ret;
4239 }
4240 
4241 /*
4242  * Some tracers are not suitable for instance buffers.
4243  * A tracer is always available for the global array (toplevel)
4244  * or if it explicitly states that it is.
4245  */
4246 static bool
4247 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4248 {
4249 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4250 }
4251 
4252 /* Find the next tracer that this trace array may use */
4253 static struct tracer *
4254 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4255 {
4256 	while (t && !trace_ok_for_array(t, tr))
4257 		t = t->next;
4258 
4259 	return t;
4260 }
4261 
4262 static void *
4263 t_next(struct seq_file *m, void *v, loff_t *pos)
4264 {
4265 	struct trace_array *tr = m->private;
4266 	struct tracer *t = v;
4267 
4268 	(*pos)++;
4269 
4270 	if (t)
4271 		t = get_tracer_for_array(tr, t->next);
4272 
4273 	return t;
4274 }
4275 
4276 static void *t_start(struct seq_file *m, loff_t *pos)
4277 {
4278 	struct trace_array *tr = m->private;
4279 	struct tracer *t;
4280 	loff_t l = 0;
4281 
4282 	mutex_lock(&trace_types_lock);
4283 
4284 	t = get_tracer_for_array(tr, trace_types);
4285 	for (; t && l < *pos; t = t_next(m, t, &l))
4286 			;
4287 
4288 	return t;
4289 }
4290 
4291 static void t_stop(struct seq_file *m, void *p)
4292 {
4293 	mutex_unlock(&trace_types_lock);
4294 }
4295 
4296 static int t_show(struct seq_file *m, void *v)
4297 {
4298 	struct tracer *t = v;
4299 
4300 	if (!t)
4301 		return 0;
4302 
4303 	seq_puts(m, t->name);
4304 	if (t->next)
4305 		seq_putc(m, ' ');
4306 	else
4307 		seq_putc(m, '\n');
4308 
4309 	return 0;
4310 }
4311 
4312 static const struct seq_operations show_traces_seq_ops = {
4313 	.start		= t_start,
4314 	.next		= t_next,
4315 	.stop		= t_stop,
4316 	.show		= t_show,
4317 };
4318 
4319 static int show_traces_open(struct inode *inode, struct file *file)
4320 {
4321 	struct trace_array *tr = inode->i_private;
4322 	struct seq_file *m;
4323 	int ret;
4324 
4325 	if (tracing_disabled)
4326 		return -ENODEV;
4327 
4328 	ret = seq_open(file, &show_traces_seq_ops);
4329 	if (ret)
4330 		return ret;
4331 
4332 	m = file->private_data;
4333 	m->private = tr;
4334 
4335 	return 0;
4336 }
4337 
4338 static ssize_t
4339 tracing_write_stub(struct file *filp, const char __user *ubuf,
4340 		   size_t count, loff_t *ppos)
4341 {
4342 	return count;
4343 }
4344 
4345 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4346 {
4347 	int ret;
4348 
4349 	if (file->f_mode & FMODE_READ)
4350 		ret = seq_lseek(file, offset, whence);
4351 	else
4352 		file->f_pos = ret = 0;
4353 
4354 	return ret;
4355 }
4356 
4357 static const struct file_operations tracing_fops = {
4358 	.open		= tracing_open,
4359 	.read		= seq_read,
4360 	.write		= tracing_write_stub,
4361 	.llseek		= tracing_lseek,
4362 	.release	= tracing_release,
4363 };
4364 
4365 static const struct file_operations show_traces_fops = {
4366 	.open		= show_traces_open,
4367 	.read		= seq_read,
4368 	.release	= seq_release,
4369 	.llseek		= seq_lseek,
4370 };
4371 
4372 static ssize_t
4373 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4374 		     size_t count, loff_t *ppos)
4375 {
4376 	struct trace_array *tr = file_inode(filp)->i_private;
4377 	char *mask_str;
4378 	int len;
4379 
4380 	len = snprintf(NULL, 0, "%*pb\n",
4381 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4382 	mask_str = kmalloc(len, GFP_KERNEL);
4383 	if (!mask_str)
4384 		return -ENOMEM;
4385 
4386 	len = snprintf(mask_str, len, "%*pb\n",
4387 		       cpumask_pr_args(tr->tracing_cpumask));
4388 	if (len >= count) {
4389 		count = -EINVAL;
4390 		goto out_err;
4391 	}
4392 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4393 
4394 out_err:
4395 	kfree(mask_str);
4396 
4397 	return count;
4398 }
4399 
4400 static ssize_t
4401 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4402 		      size_t count, loff_t *ppos)
4403 {
4404 	struct trace_array *tr = file_inode(filp)->i_private;
4405 	cpumask_var_t tracing_cpumask_new;
4406 	int err, cpu;
4407 
4408 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4409 		return -ENOMEM;
4410 
4411 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4412 	if (err)
4413 		goto err_unlock;
4414 
4415 	local_irq_disable();
4416 	arch_spin_lock(&tr->max_lock);
4417 	for_each_tracing_cpu(cpu) {
4418 		/*
4419 		 * Increase/decrease the disabled counter if we are
4420 		 * about to flip a bit in the cpumask:
4421 		 */
4422 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4423 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4424 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4425 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4426 		}
4427 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4428 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4429 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4430 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4431 		}
4432 	}
4433 	arch_spin_unlock(&tr->max_lock);
4434 	local_irq_enable();
4435 
4436 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4437 	free_cpumask_var(tracing_cpumask_new);
4438 
4439 	return count;
4440 
4441 err_unlock:
4442 	free_cpumask_var(tracing_cpumask_new);
4443 
4444 	return err;
4445 }
4446 
4447 static const struct file_operations tracing_cpumask_fops = {
4448 	.open		= tracing_open_generic_tr,
4449 	.read		= tracing_cpumask_read,
4450 	.write		= tracing_cpumask_write,
4451 	.release	= tracing_release_generic_tr,
4452 	.llseek		= generic_file_llseek,
4453 };
4454 
4455 static int tracing_trace_options_show(struct seq_file *m, void *v)
4456 {
4457 	struct tracer_opt *trace_opts;
4458 	struct trace_array *tr = m->private;
4459 	u32 tracer_flags;
4460 	int i;
4461 
4462 	mutex_lock(&trace_types_lock);
4463 	tracer_flags = tr->current_trace->flags->val;
4464 	trace_opts = tr->current_trace->flags->opts;
4465 
4466 	for (i = 0; trace_options[i]; i++) {
4467 		if (tr->trace_flags & (1 << i))
4468 			seq_printf(m, "%s\n", trace_options[i]);
4469 		else
4470 			seq_printf(m, "no%s\n", trace_options[i]);
4471 	}
4472 
4473 	for (i = 0; trace_opts[i].name; i++) {
4474 		if (tracer_flags & trace_opts[i].bit)
4475 			seq_printf(m, "%s\n", trace_opts[i].name);
4476 		else
4477 			seq_printf(m, "no%s\n", trace_opts[i].name);
4478 	}
4479 	mutex_unlock(&trace_types_lock);
4480 
4481 	return 0;
4482 }
4483 
4484 static int __set_tracer_option(struct trace_array *tr,
4485 			       struct tracer_flags *tracer_flags,
4486 			       struct tracer_opt *opts, int neg)
4487 {
4488 	struct tracer *trace = tracer_flags->trace;
4489 	int ret;
4490 
4491 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4492 	if (ret)
4493 		return ret;
4494 
4495 	if (neg)
4496 		tracer_flags->val &= ~opts->bit;
4497 	else
4498 		tracer_flags->val |= opts->bit;
4499 	return 0;
4500 }
4501 
4502 /* Try to assign a tracer specific option */
4503 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4504 {
4505 	struct tracer *trace = tr->current_trace;
4506 	struct tracer_flags *tracer_flags = trace->flags;
4507 	struct tracer_opt *opts = NULL;
4508 	int i;
4509 
4510 	for (i = 0; tracer_flags->opts[i].name; i++) {
4511 		opts = &tracer_flags->opts[i];
4512 
4513 		if (strcmp(cmp, opts->name) == 0)
4514 			return __set_tracer_option(tr, trace->flags, opts, neg);
4515 	}
4516 
4517 	return -EINVAL;
4518 }
4519 
4520 /* Some tracers require overwrite to stay enabled */
4521 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4522 {
4523 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4524 		return -1;
4525 
4526 	return 0;
4527 }
4528 
4529 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4530 {
4531 	/* do nothing if flag is already set */
4532 	if (!!(tr->trace_flags & mask) == !!enabled)
4533 		return 0;
4534 
4535 	/* Give the tracer a chance to approve the change */
4536 	if (tr->current_trace->flag_changed)
4537 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4538 			return -EINVAL;
4539 
4540 	if (enabled)
4541 		tr->trace_flags |= mask;
4542 	else
4543 		tr->trace_flags &= ~mask;
4544 
4545 	if (mask == TRACE_ITER_RECORD_CMD)
4546 		trace_event_enable_cmd_record(enabled);
4547 
4548 	if (mask == TRACE_ITER_RECORD_TGID) {
4549 		if (!tgid_map)
4550 			tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4551 					   sizeof(*tgid_map),
4552 					   GFP_KERNEL);
4553 		if (!tgid_map) {
4554 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4555 			return -ENOMEM;
4556 		}
4557 
4558 		trace_event_enable_tgid_record(enabled);
4559 	}
4560 
4561 	if (mask == TRACE_ITER_EVENT_FORK)
4562 		trace_event_follow_fork(tr, enabled);
4563 
4564 	if (mask == TRACE_ITER_FUNC_FORK)
4565 		ftrace_pid_follow_fork(tr, enabled);
4566 
4567 	if (mask == TRACE_ITER_OVERWRITE) {
4568 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4569 #ifdef CONFIG_TRACER_MAX_TRACE
4570 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4571 #endif
4572 	}
4573 
4574 	if (mask == TRACE_ITER_PRINTK) {
4575 		trace_printk_start_stop_comm(enabled);
4576 		trace_printk_control(enabled);
4577 	}
4578 
4579 	return 0;
4580 }
4581 
4582 static int trace_set_options(struct trace_array *tr, char *option)
4583 {
4584 	char *cmp;
4585 	int neg = 0;
4586 	int ret;
4587 	size_t orig_len = strlen(option);
4588 	int len;
4589 
4590 	cmp = strstrip(option);
4591 
4592 	len = str_has_prefix(cmp, "no");
4593 	if (len)
4594 		neg = 1;
4595 
4596 	cmp += len;
4597 
4598 	mutex_lock(&trace_types_lock);
4599 
4600 	ret = match_string(trace_options, -1, cmp);
4601 	/* If no option could be set, test the specific tracer options */
4602 	if (ret < 0)
4603 		ret = set_tracer_option(tr, cmp, neg);
4604 	else
4605 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4606 
4607 	mutex_unlock(&trace_types_lock);
4608 
4609 	/*
4610 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4611 	 * turn it back into a space.
4612 	 */
4613 	if (orig_len > strlen(option))
4614 		option[strlen(option)] = ' ';
4615 
4616 	return ret;
4617 }
4618 
4619 static void __init apply_trace_boot_options(void)
4620 {
4621 	char *buf = trace_boot_options_buf;
4622 	char *option;
4623 
4624 	while (true) {
4625 		option = strsep(&buf, ",");
4626 
4627 		if (!option)
4628 			break;
4629 
4630 		if (*option)
4631 			trace_set_options(&global_trace, option);
4632 
4633 		/* Put back the comma to allow this to be called again */
4634 		if (buf)
4635 			*(buf - 1) = ',';
4636 	}
4637 }
4638 
4639 static ssize_t
4640 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4641 			size_t cnt, loff_t *ppos)
4642 {
4643 	struct seq_file *m = filp->private_data;
4644 	struct trace_array *tr = m->private;
4645 	char buf[64];
4646 	int ret;
4647 
4648 	if (cnt >= sizeof(buf))
4649 		return -EINVAL;
4650 
4651 	if (copy_from_user(buf, ubuf, cnt))
4652 		return -EFAULT;
4653 
4654 	buf[cnt] = 0;
4655 
4656 	ret = trace_set_options(tr, buf);
4657 	if (ret < 0)
4658 		return ret;
4659 
4660 	*ppos += cnt;
4661 
4662 	return cnt;
4663 }
4664 
4665 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4666 {
4667 	struct trace_array *tr = inode->i_private;
4668 	int ret;
4669 
4670 	if (tracing_disabled)
4671 		return -ENODEV;
4672 
4673 	if (trace_array_get(tr) < 0)
4674 		return -ENODEV;
4675 
4676 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4677 	if (ret < 0)
4678 		trace_array_put(tr);
4679 
4680 	return ret;
4681 }
4682 
4683 static const struct file_operations tracing_iter_fops = {
4684 	.open		= tracing_trace_options_open,
4685 	.read		= seq_read,
4686 	.llseek		= seq_lseek,
4687 	.release	= tracing_single_release_tr,
4688 	.write		= tracing_trace_options_write,
4689 };
4690 
4691 static const char readme_msg[] =
4692 	"tracing mini-HOWTO:\n\n"
4693 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4694 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4695 	" Important files:\n"
4696 	"  trace\t\t\t- The static contents of the buffer\n"
4697 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4698 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4699 	"  current_tracer\t- function and latency tracers\n"
4700 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4701 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4702 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4703 	"  trace_clock\t\t-change the clock used to order events\n"
4704 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4705 	"      global:   Synced across CPUs but slows tracing down.\n"
4706 	"     counter:   Not a clock, but just an increment\n"
4707 	"      uptime:   Jiffy counter from time of boot\n"
4708 	"        perf:   Same clock that perf events use\n"
4709 #ifdef CONFIG_X86_64
4710 	"     x86-tsc:   TSC cycle counter\n"
4711 #endif
4712 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4713 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4714 	"    absolute:   Absolute (standalone) timestamp\n"
4715 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4716 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4717 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4718 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4719 	"\t\t\t  Remove sub-buffer with rmdir\n"
4720 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4721 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4722 	"\t\t\t  option name\n"
4723 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4724 #ifdef CONFIG_DYNAMIC_FTRACE
4725 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4726 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4727 	"\t\t\t  functions\n"
4728 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4729 	"\t     modules: Can select a group via module\n"
4730 	"\t      Format: :mod:<module-name>\n"
4731 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4732 	"\t    triggers: a command to perform when function is hit\n"
4733 	"\t      Format: <function>:<trigger>[:count]\n"
4734 	"\t     trigger: traceon, traceoff\n"
4735 	"\t\t      enable_event:<system>:<event>\n"
4736 	"\t\t      disable_event:<system>:<event>\n"
4737 #ifdef CONFIG_STACKTRACE
4738 	"\t\t      stacktrace\n"
4739 #endif
4740 #ifdef CONFIG_TRACER_SNAPSHOT
4741 	"\t\t      snapshot\n"
4742 #endif
4743 	"\t\t      dump\n"
4744 	"\t\t      cpudump\n"
4745 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4746 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4747 	"\t     The first one will disable tracing every time do_fault is hit\n"
4748 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4749 	"\t       The first time do trap is hit and it disables tracing, the\n"
4750 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4751 	"\t       the counter will not decrement. It only decrements when the\n"
4752 	"\t       trigger did work\n"
4753 	"\t     To remove trigger without count:\n"
4754 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4755 	"\t     To remove trigger with a count:\n"
4756 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4757 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4758 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4759 	"\t    modules: Can select a group via module command :mod:\n"
4760 	"\t    Does not accept triggers\n"
4761 #endif /* CONFIG_DYNAMIC_FTRACE */
4762 #ifdef CONFIG_FUNCTION_TRACER
4763 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4764 	"\t\t    (function)\n"
4765 #endif
4766 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4767 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4768 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4769 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4770 #endif
4771 #ifdef CONFIG_TRACER_SNAPSHOT
4772 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4773 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4774 	"\t\t\t  information\n"
4775 #endif
4776 #ifdef CONFIG_STACK_TRACER
4777 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4778 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4779 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4780 	"\t\t\t  new trace)\n"
4781 #ifdef CONFIG_DYNAMIC_FTRACE
4782 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4783 	"\t\t\t  traces\n"
4784 #endif
4785 #endif /* CONFIG_STACK_TRACER */
4786 #ifdef CONFIG_DYNAMIC_EVENTS
4787 	"  dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
4788 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4789 #endif
4790 #ifdef CONFIG_KPROBE_EVENTS
4791 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4792 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4793 #endif
4794 #ifdef CONFIG_UPROBE_EVENTS
4795 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4796 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4797 #endif
4798 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4799 	"\t  accepts: event-definitions (one definition per line)\n"
4800 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4801 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4802 #ifdef CONFIG_HIST_TRIGGERS
4803 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4804 #endif
4805 	"\t           -:[<group>/]<event>\n"
4806 #ifdef CONFIG_KPROBE_EVENTS
4807 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4808   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4809 #endif
4810 #ifdef CONFIG_UPROBE_EVENTS
4811   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4812 #endif
4813 	"\t     args: <name>=fetcharg[:type]\n"
4814 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4815 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4816 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
4817 #else
4818 	"\t           $stack<index>, $stack, $retval, $comm\n"
4819 #endif
4820 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4821 	"\t           b<bit-width>@<bit-offset>/<container-size>,\n"
4822 	"\t           <type>\\[<array-size>\\]\n"
4823 #ifdef CONFIG_HIST_TRIGGERS
4824 	"\t    field: <stype> <name>;\n"
4825 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4826 	"\t           [unsigned] char/int/long\n"
4827 #endif
4828 #endif
4829 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4830 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4831 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4832 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4833 	"\t\t\t  events\n"
4834 	"      filter\t\t- If set, only events passing filter are traced\n"
4835 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4836 	"\t\t\t  <event>:\n"
4837 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4838 	"      filter\t\t- If set, only events passing filter are traced\n"
4839 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4840 	"\t    Format: <trigger>[:count][if <filter>]\n"
4841 	"\t   trigger: traceon, traceoff\n"
4842 	"\t            enable_event:<system>:<event>\n"
4843 	"\t            disable_event:<system>:<event>\n"
4844 #ifdef CONFIG_HIST_TRIGGERS
4845 	"\t            enable_hist:<system>:<event>\n"
4846 	"\t            disable_hist:<system>:<event>\n"
4847 #endif
4848 #ifdef CONFIG_STACKTRACE
4849 	"\t\t    stacktrace\n"
4850 #endif
4851 #ifdef CONFIG_TRACER_SNAPSHOT
4852 	"\t\t    snapshot\n"
4853 #endif
4854 #ifdef CONFIG_HIST_TRIGGERS
4855 	"\t\t    hist (see below)\n"
4856 #endif
4857 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4858 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4859 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4860 	"\t                  events/block/block_unplug/trigger\n"
4861 	"\t   The first disables tracing every time block_unplug is hit.\n"
4862 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4863 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4864 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4865 	"\t   Like function triggers, the counter is only decremented if it\n"
4866 	"\t    enabled or disabled tracing.\n"
4867 	"\t   To remove a trigger without a count:\n"
4868 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4869 	"\t   To remove a trigger with a count:\n"
4870 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4871 	"\t   Filters can be ignored when removing a trigger.\n"
4872 #ifdef CONFIG_HIST_TRIGGERS
4873 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4874 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4875 	"\t            [:values=<field1[,field2,...]>]\n"
4876 	"\t            [:sort=<field1[,field2,...]>]\n"
4877 	"\t            [:size=#entries]\n"
4878 	"\t            [:pause][:continue][:clear]\n"
4879 	"\t            [:name=histname1]\n"
4880 	"\t            [:<handler>.<action>]\n"
4881 	"\t            [if <filter>]\n\n"
4882 	"\t    When a matching event is hit, an entry is added to a hash\n"
4883 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4884 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4885 	"\t    correspond to fields in the event's format description.  Keys\n"
4886 	"\t    can be any field, or the special string 'stacktrace'.\n"
4887 	"\t    Compound keys consisting of up to two fields can be specified\n"
4888 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4889 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4890 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4891 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4892 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4893 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4894 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4895 	"\t    its histogram data will be shared with other triggers of the\n"
4896 	"\t    same name, and trigger hits will update this common data.\n\n"
4897 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4898 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4899 	"\t    triggers attached to an event, there will be a table for each\n"
4900 	"\t    trigger in the output.  The table displayed for a named\n"
4901 	"\t    trigger will be the same as any other instance having the\n"
4902 	"\t    same name.  The default format used to display a given field\n"
4903 	"\t    can be modified by appending any of the following modifiers\n"
4904 	"\t    to the field name, as applicable:\n\n"
4905 	"\t            .hex        display a number as a hex value\n"
4906 	"\t            .sym        display an address as a symbol\n"
4907 	"\t            .sym-offset display an address as a symbol and offset\n"
4908 	"\t            .execname   display a common_pid as a program name\n"
4909 	"\t            .syscall    display a syscall id as a syscall name\n"
4910 	"\t            .log2       display log2 value rather than raw number\n"
4911 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4912 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4913 	"\t    trigger or to start a hist trigger but not log any events\n"
4914 	"\t    until told to do so.  'continue' can be used to start or\n"
4915 	"\t    restart a paused hist trigger.\n\n"
4916 	"\t    The 'clear' parameter will clear the contents of a running\n"
4917 	"\t    hist trigger and leave its current paused/active state\n"
4918 	"\t    unchanged.\n\n"
4919 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4920 	"\t    have one event conditionally start and stop another event's\n"
4921 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4922 	"\t    the enable_event and disable_event triggers.\n\n"
4923 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4924 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4925 	"\t        <handler>.<action>\n\n"
4926 	"\t    The available handlers are:\n\n"
4927 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4928 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4929 	"\t        onchange(var)            - invoke action if var changes\n\n"
4930 	"\t    The available actions are:\n\n"
4931 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4932 	"\t        save(field,...)                      - save current event fields\n"
4933 #ifdef CONFIG_TRACER_SNAPSHOT
4934 	"\t        snapshot()                           - snapshot the trace buffer\n"
4935 #endif
4936 #endif
4937 ;
4938 
4939 static ssize_t
4940 tracing_readme_read(struct file *filp, char __user *ubuf,
4941 		       size_t cnt, loff_t *ppos)
4942 {
4943 	return simple_read_from_buffer(ubuf, cnt, ppos,
4944 					readme_msg, strlen(readme_msg));
4945 }
4946 
4947 static const struct file_operations tracing_readme_fops = {
4948 	.open		= tracing_open_generic,
4949 	.read		= tracing_readme_read,
4950 	.llseek		= generic_file_llseek,
4951 };
4952 
4953 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4954 {
4955 	int *ptr = v;
4956 
4957 	if (*pos || m->count)
4958 		ptr++;
4959 
4960 	(*pos)++;
4961 
4962 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4963 		if (trace_find_tgid(*ptr))
4964 			return ptr;
4965 	}
4966 
4967 	return NULL;
4968 }
4969 
4970 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4971 {
4972 	void *v;
4973 	loff_t l = 0;
4974 
4975 	if (!tgid_map)
4976 		return NULL;
4977 
4978 	v = &tgid_map[0];
4979 	while (l <= *pos) {
4980 		v = saved_tgids_next(m, v, &l);
4981 		if (!v)
4982 			return NULL;
4983 	}
4984 
4985 	return v;
4986 }
4987 
4988 static void saved_tgids_stop(struct seq_file *m, void *v)
4989 {
4990 }
4991 
4992 static int saved_tgids_show(struct seq_file *m, void *v)
4993 {
4994 	int pid = (int *)v - tgid_map;
4995 
4996 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4997 	return 0;
4998 }
4999 
5000 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5001 	.start		= saved_tgids_start,
5002 	.stop		= saved_tgids_stop,
5003 	.next		= saved_tgids_next,
5004 	.show		= saved_tgids_show,
5005 };
5006 
5007 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5008 {
5009 	if (tracing_disabled)
5010 		return -ENODEV;
5011 
5012 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5013 }
5014 
5015 
5016 static const struct file_operations tracing_saved_tgids_fops = {
5017 	.open		= tracing_saved_tgids_open,
5018 	.read		= seq_read,
5019 	.llseek		= seq_lseek,
5020 	.release	= seq_release,
5021 };
5022 
5023 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5024 {
5025 	unsigned int *ptr = v;
5026 
5027 	if (*pos || m->count)
5028 		ptr++;
5029 
5030 	(*pos)++;
5031 
5032 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5033 	     ptr++) {
5034 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5035 			continue;
5036 
5037 		return ptr;
5038 	}
5039 
5040 	return NULL;
5041 }
5042 
5043 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5044 {
5045 	void *v;
5046 	loff_t l = 0;
5047 
5048 	preempt_disable();
5049 	arch_spin_lock(&trace_cmdline_lock);
5050 
5051 	v = &savedcmd->map_cmdline_to_pid[0];
5052 	while (l <= *pos) {
5053 		v = saved_cmdlines_next(m, v, &l);
5054 		if (!v)
5055 			return NULL;
5056 	}
5057 
5058 	return v;
5059 }
5060 
5061 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5062 {
5063 	arch_spin_unlock(&trace_cmdline_lock);
5064 	preempt_enable();
5065 }
5066 
5067 static int saved_cmdlines_show(struct seq_file *m, void *v)
5068 {
5069 	char buf[TASK_COMM_LEN];
5070 	unsigned int *pid = v;
5071 
5072 	__trace_find_cmdline(*pid, buf);
5073 	seq_printf(m, "%d %s\n", *pid, buf);
5074 	return 0;
5075 }
5076 
5077 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5078 	.start		= saved_cmdlines_start,
5079 	.next		= saved_cmdlines_next,
5080 	.stop		= saved_cmdlines_stop,
5081 	.show		= saved_cmdlines_show,
5082 };
5083 
5084 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5085 {
5086 	if (tracing_disabled)
5087 		return -ENODEV;
5088 
5089 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5090 }
5091 
5092 static const struct file_operations tracing_saved_cmdlines_fops = {
5093 	.open		= tracing_saved_cmdlines_open,
5094 	.read		= seq_read,
5095 	.llseek		= seq_lseek,
5096 	.release	= seq_release,
5097 };
5098 
5099 static ssize_t
5100 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5101 				 size_t cnt, loff_t *ppos)
5102 {
5103 	char buf[64];
5104 	int r;
5105 
5106 	arch_spin_lock(&trace_cmdline_lock);
5107 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5108 	arch_spin_unlock(&trace_cmdline_lock);
5109 
5110 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5111 }
5112 
5113 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5114 {
5115 	kfree(s->saved_cmdlines);
5116 	kfree(s->map_cmdline_to_pid);
5117 	kfree(s);
5118 }
5119 
5120 static int tracing_resize_saved_cmdlines(unsigned int val)
5121 {
5122 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5123 
5124 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5125 	if (!s)
5126 		return -ENOMEM;
5127 
5128 	if (allocate_cmdlines_buffer(val, s) < 0) {
5129 		kfree(s);
5130 		return -ENOMEM;
5131 	}
5132 
5133 	arch_spin_lock(&trace_cmdline_lock);
5134 	savedcmd_temp = savedcmd;
5135 	savedcmd = s;
5136 	arch_spin_unlock(&trace_cmdline_lock);
5137 	free_saved_cmdlines_buffer(savedcmd_temp);
5138 
5139 	return 0;
5140 }
5141 
5142 static ssize_t
5143 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5144 				  size_t cnt, loff_t *ppos)
5145 {
5146 	unsigned long val;
5147 	int ret;
5148 
5149 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5150 	if (ret)
5151 		return ret;
5152 
5153 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5154 	if (!val || val > PID_MAX_DEFAULT)
5155 		return -EINVAL;
5156 
5157 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5158 	if (ret < 0)
5159 		return ret;
5160 
5161 	*ppos += cnt;
5162 
5163 	return cnt;
5164 }
5165 
5166 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5167 	.open		= tracing_open_generic,
5168 	.read		= tracing_saved_cmdlines_size_read,
5169 	.write		= tracing_saved_cmdlines_size_write,
5170 };
5171 
5172 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5173 static union trace_eval_map_item *
5174 update_eval_map(union trace_eval_map_item *ptr)
5175 {
5176 	if (!ptr->map.eval_string) {
5177 		if (ptr->tail.next) {
5178 			ptr = ptr->tail.next;
5179 			/* Set ptr to the next real item (skip head) */
5180 			ptr++;
5181 		} else
5182 			return NULL;
5183 	}
5184 	return ptr;
5185 }
5186 
5187 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5188 {
5189 	union trace_eval_map_item *ptr = v;
5190 
5191 	/*
5192 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5193 	 * This really should never happen.
5194 	 */
5195 	ptr = update_eval_map(ptr);
5196 	if (WARN_ON_ONCE(!ptr))
5197 		return NULL;
5198 
5199 	ptr++;
5200 
5201 	(*pos)++;
5202 
5203 	ptr = update_eval_map(ptr);
5204 
5205 	return ptr;
5206 }
5207 
5208 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5209 {
5210 	union trace_eval_map_item *v;
5211 	loff_t l = 0;
5212 
5213 	mutex_lock(&trace_eval_mutex);
5214 
5215 	v = trace_eval_maps;
5216 	if (v)
5217 		v++;
5218 
5219 	while (v && l < *pos) {
5220 		v = eval_map_next(m, v, &l);
5221 	}
5222 
5223 	return v;
5224 }
5225 
5226 static void eval_map_stop(struct seq_file *m, void *v)
5227 {
5228 	mutex_unlock(&trace_eval_mutex);
5229 }
5230 
5231 static int eval_map_show(struct seq_file *m, void *v)
5232 {
5233 	union trace_eval_map_item *ptr = v;
5234 
5235 	seq_printf(m, "%s %ld (%s)\n",
5236 		   ptr->map.eval_string, ptr->map.eval_value,
5237 		   ptr->map.system);
5238 
5239 	return 0;
5240 }
5241 
5242 static const struct seq_operations tracing_eval_map_seq_ops = {
5243 	.start		= eval_map_start,
5244 	.next		= eval_map_next,
5245 	.stop		= eval_map_stop,
5246 	.show		= eval_map_show,
5247 };
5248 
5249 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5250 {
5251 	if (tracing_disabled)
5252 		return -ENODEV;
5253 
5254 	return seq_open(filp, &tracing_eval_map_seq_ops);
5255 }
5256 
5257 static const struct file_operations tracing_eval_map_fops = {
5258 	.open		= tracing_eval_map_open,
5259 	.read		= seq_read,
5260 	.llseek		= seq_lseek,
5261 	.release	= seq_release,
5262 };
5263 
5264 static inline union trace_eval_map_item *
5265 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5266 {
5267 	/* Return tail of array given the head */
5268 	return ptr + ptr->head.length + 1;
5269 }
5270 
5271 static void
5272 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5273 			   int len)
5274 {
5275 	struct trace_eval_map **stop;
5276 	struct trace_eval_map **map;
5277 	union trace_eval_map_item *map_array;
5278 	union trace_eval_map_item *ptr;
5279 
5280 	stop = start + len;
5281 
5282 	/*
5283 	 * The trace_eval_maps contains the map plus a head and tail item,
5284 	 * where the head holds the module and length of array, and the
5285 	 * tail holds a pointer to the next list.
5286 	 */
5287 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5288 	if (!map_array) {
5289 		pr_warn("Unable to allocate trace eval mapping\n");
5290 		return;
5291 	}
5292 
5293 	mutex_lock(&trace_eval_mutex);
5294 
5295 	if (!trace_eval_maps)
5296 		trace_eval_maps = map_array;
5297 	else {
5298 		ptr = trace_eval_maps;
5299 		for (;;) {
5300 			ptr = trace_eval_jmp_to_tail(ptr);
5301 			if (!ptr->tail.next)
5302 				break;
5303 			ptr = ptr->tail.next;
5304 
5305 		}
5306 		ptr->tail.next = map_array;
5307 	}
5308 	map_array->head.mod = mod;
5309 	map_array->head.length = len;
5310 	map_array++;
5311 
5312 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5313 		map_array->map = **map;
5314 		map_array++;
5315 	}
5316 	memset(map_array, 0, sizeof(*map_array));
5317 
5318 	mutex_unlock(&trace_eval_mutex);
5319 }
5320 
5321 static void trace_create_eval_file(struct dentry *d_tracer)
5322 {
5323 	trace_create_file("eval_map", 0444, d_tracer,
5324 			  NULL, &tracing_eval_map_fops);
5325 }
5326 
5327 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5328 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5329 static inline void trace_insert_eval_map_file(struct module *mod,
5330 			      struct trace_eval_map **start, int len) { }
5331 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5332 
5333 static void trace_insert_eval_map(struct module *mod,
5334 				  struct trace_eval_map **start, int len)
5335 {
5336 	struct trace_eval_map **map;
5337 
5338 	if (len <= 0)
5339 		return;
5340 
5341 	map = start;
5342 
5343 	trace_event_eval_update(map, len);
5344 
5345 	trace_insert_eval_map_file(mod, start, len);
5346 }
5347 
5348 static ssize_t
5349 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5350 		       size_t cnt, loff_t *ppos)
5351 {
5352 	struct trace_array *tr = filp->private_data;
5353 	char buf[MAX_TRACER_SIZE+2];
5354 	int r;
5355 
5356 	mutex_lock(&trace_types_lock);
5357 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5358 	mutex_unlock(&trace_types_lock);
5359 
5360 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5361 }
5362 
5363 int tracer_init(struct tracer *t, struct trace_array *tr)
5364 {
5365 	tracing_reset_online_cpus(&tr->trace_buffer);
5366 	return t->init(tr);
5367 }
5368 
5369 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5370 {
5371 	int cpu;
5372 
5373 	for_each_tracing_cpu(cpu)
5374 		per_cpu_ptr(buf->data, cpu)->entries = val;
5375 }
5376 
5377 #ifdef CONFIG_TRACER_MAX_TRACE
5378 /* resize @tr's buffer to the size of @size_tr's entries */
5379 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5380 					struct trace_buffer *size_buf, int cpu_id)
5381 {
5382 	int cpu, ret = 0;
5383 
5384 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5385 		for_each_tracing_cpu(cpu) {
5386 			ret = ring_buffer_resize(trace_buf->buffer,
5387 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5388 			if (ret < 0)
5389 				break;
5390 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5391 				per_cpu_ptr(size_buf->data, cpu)->entries;
5392 		}
5393 	} else {
5394 		ret = ring_buffer_resize(trace_buf->buffer,
5395 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5396 		if (ret == 0)
5397 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5398 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5399 	}
5400 
5401 	return ret;
5402 }
5403 #endif /* CONFIG_TRACER_MAX_TRACE */
5404 
5405 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5406 					unsigned long size, int cpu)
5407 {
5408 	int ret;
5409 
5410 	/*
5411 	 * If kernel or user changes the size of the ring buffer
5412 	 * we use the size that was given, and we can forget about
5413 	 * expanding it later.
5414 	 */
5415 	ring_buffer_expanded = true;
5416 
5417 	/* May be called before buffers are initialized */
5418 	if (!tr->trace_buffer.buffer)
5419 		return 0;
5420 
5421 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5422 	if (ret < 0)
5423 		return ret;
5424 
5425 #ifdef CONFIG_TRACER_MAX_TRACE
5426 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5427 	    !tr->current_trace->use_max_tr)
5428 		goto out;
5429 
5430 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5431 	if (ret < 0) {
5432 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5433 						     &tr->trace_buffer, cpu);
5434 		if (r < 0) {
5435 			/*
5436 			 * AARGH! We are left with different
5437 			 * size max buffer!!!!
5438 			 * The max buffer is our "snapshot" buffer.
5439 			 * When a tracer needs a snapshot (one of the
5440 			 * latency tracers), it swaps the max buffer
5441 			 * with the saved snap shot. We succeeded to
5442 			 * update the size of the main buffer, but failed to
5443 			 * update the size of the max buffer. But when we tried
5444 			 * to reset the main buffer to the original size, we
5445 			 * failed there too. This is very unlikely to
5446 			 * happen, but if it does, warn and kill all
5447 			 * tracing.
5448 			 */
5449 			WARN_ON(1);
5450 			tracing_disabled = 1;
5451 		}
5452 		return ret;
5453 	}
5454 
5455 	if (cpu == RING_BUFFER_ALL_CPUS)
5456 		set_buffer_entries(&tr->max_buffer, size);
5457 	else
5458 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5459 
5460  out:
5461 #endif /* CONFIG_TRACER_MAX_TRACE */
5462 
5463 	if (cpu == RING_BUFFER_ALL_CPUS)
5464 		set_buffer_entries(&tr->trace_buffer, size);
5465 	else
5466 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5467 
5468 	return ret;
5469 }
5470 
5471 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5472 					  unsigned long size, int cpu_id)
5473 {
5474 	int ret = size;
5475 
5476 	mutex_lock(&trace_types_lock);
5477 
5478 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5479 		/* make sure, this cpu is enabled in the mask */
5480 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5481 			ret = -EINVAL;
5482 			goto out;
5483 		}
5484 	}
5485 
5486 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5487 	if (ret < 0)
5488 		ret = -ENOMEM;
5489 
5490 out:
5491 	mutex_unlock(&trace_types_lock);
5492 
5493 	return ret;
5494 }
5495 
5496 
5497 /**
5498  * tracing_update_buffers - used by tracing facility to expand ring buffers
5499  *
5500  * To save on memory when the tracing is never used on a system with it
5501  * configured in. The ring buffers are set to a minimum size. But once
5502  * a user starts to use the tracing facility, then they need to grow
5503  * to their default size.
5504  *
5505  * This function is to be called when a tracer is about to be used.
5506  */
5507 int tracing_update_buffers(void)
5508 {
5509 	int ret = 0;
5510 
5511 	mutex_lock(&trace_types_lock);
5512 	if (!ring_buffer_expanded)
5513 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5514 						RING_BUFFER_ALL_CPUS);
5515 	mutex_unlock(&trace_types_lock);
5516 
5517 	return ret;
5518 }
5519 
5520 struct trace_option_dentry;
5521 
5522 static void
5523 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5524 
5525 /*
5526  * Used to clear out the tracer before deletion of an instance.
5527  * Must have trace_types_lock held.
5528  */
5529 static void tracing_set_nop(struct trace_array *tr)
5530 {
5531 	if (tr->current_trace == &nop_trace)
5532 		return;
5533 
5534 	tr->current_trace->enabled--;
5535 
5536 	if (tr->current_trace->reset)
5537 		tr->current_trace->reset(tr);
5538 
5539 	tr->current_trace = &nop_trace;
5540 }
5541 
5542 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5543 {
5544 	/* Only enable if the directory has been created already. */
5545 	if (!tr->dir)
5546 		return;
5547 
5548 	create_trace_option_files(tr, t);
5549 }
5550 
5551 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5552 {
5553 	struct tracer *t;
5554 #ifdef CONFIG_TRACER_MAX_TRACE
5555 	bool had_max_tr;
5556 #endif
5557 	int ret = 0;
5558 
5559 	mutex_lock(&trace_types_lock);
5560 
5561 	if (!ring_buffer_expanded) {
5562 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5563 						RING_BUFFER_ALL_CPUS);
5564 		if (ret < 0)
5565 			goto out;
5566 		ret = 0;
5567 	}
5568 
5569 	for (t = trace_types; t; t = t->next) {
5570 		if (strcmp(t->name, buf) == 0)
5571 			break;
5572 	}
5573 	if (!t) {
5574 		ret = -EINVAL;
5575 		goto out;
5576 	}
5577 	if (t == tr->current_trace)
5578 		goto out;
5579 
5580 #ifdef CONFIG_TRACER_SNAPSHOT
5581 	if (t->use_max_tr) {
5582 		arch_spin_lock(&tr->max_lock);
5583 		if (tr->cond_snapshot)
5584 			ret = -EBUSY;
5585 		arch_spin_unlock(&tr->max_lock);
5586 		if (ret)
5587 			goto out;
5588 	}
5589 #endif
5590 	/* Some tracers won't work on kernel command line */
5591 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5592 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5593 			t->name);
5594 		goto out;
5595 	}
5596 
5597 	/* Some tracers are only allowed for the top level buffer */
5598 	if (!trace_ok_for_array(t, tr)) {
5599 		ret = -EINVAL;
5600 		goto out;
5601 	}
5602 
5603 	/* If trace pipe files are being read, we can't change the tracer */
5604 	if (tr->current_trace->ref) {
5605 		ret = -EBUSY;
5606 		goto out;
5607 	}
5608 
5609 	trace_branch_disable();
5610 
5611 	tr->current_trace->enabled--;
5612 
5613 	if (tr->current_trace->reset)
5614 		tr->current_trace->reset(tr);
5615 
5616 	/* Current trace needs to be nop_trace before synchronize_rcu */
5617 	tr->current_trace = &nop_trace;
5618 
5619 #ifdef CONFIG_TRACER_MAX_TRACE
5620 	had_max_tr = tr->allocated_snapshot;
5621 
5622 	if (had_max_tr && !t->use_max_tr) {
5623 		/*
5624 		 * We need to make sure that the update_max_tr sees that
5625 		 * current_trace changed to nop_trace to keep it from
5626 		 * swapping the buffers after we resize it.
5627 		 * The update_max_tr is called from interrupts disabled
5628 		 * so a synchronized_sched() is sufficient.
5629 		 */
5630 		synchronize_rcu();
5631 		free_snapshot(tr);
5632 	}
5633 #endif
5634 
5635 #ifdef CONFIG_TRACER_MAX_TRACE
5636 	if (t->use_max_tr && !had_max_tr) {
5637 		ret = tracing_alloc_snapshot_instance(tr);
5638 		if (ret < 0)
5639 			goto out;
5640 	}
5641 #endif
5642 
5643 	if (t->init) {
5644 		ret = tracer_init(t, tr);
5645 		if (ret)
5646 			goto out;
5647 	}
5648 
5649 	tr->current_trace = t;
5650 	tr->current_trace->enabled++;
5651 	trace_branch_enable(tr);
5652  out:
5653 	mutex_unlock(&trace_types_lock);
5654 
5655 	return ret;
5656 }
5657 
5658 static ssize_t
5659 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5660 			size_t cnt, loff_t *ppos)
5661 {
5662 	struct trace_array *tr = filp->private_data;
5663 	char buf[MAX_TRACER_SIZE+1];
5664 	int i;
5665 	size_t ret;
5666 	int err;
5667 
5668 	ret = cnt;
5669 
5670 	if (cnt > MAX_TRACER_SIZE)
5671 		cnt = MAX_TRACER_SIZE;
5672 
5673 	if (copy_from_user(buf, ubuf, cnt))
5674 		return -EFAULT;
5675 
5676 	buf[cnt] = 0;
5677 
5678 	/* strip ending whitespace. */
5679 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5680 		buf[i] = 0;
5681 
5682 	err = tracing_set_tracer(tr, buf);
5683 	if (err)
5684 		return err;
5685 
5686 	*ppos += ret;
5687 
5688 	return ret;
5689 }
5690 
5691 static ssize_t
5692 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5693 		   size_t cnt, loff_t *ppos)
5694 {
5695 	char buf[64];
5696 	int r;
5697 
5698 	r = snprintf(buf, sizeof(buf), "%ld\n",
5699 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5700 	if (r > sizeof(buf))
5701 		r = sizeof(buf);
5702 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5703 }
5704 
5705 static ssize_t
5706 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5707 		    size_t cnt, loff_t *ppos)
5708 {
5709 	unsigned long val;
5710 	int ret;
5711 
5712 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5713 	if (ret)
5714 		return ret;
5715 
5716 	*ptr = val * 1000;
5717 
5718 	return cnt;
5719 }
5720 
5721 static ssize_t
5722 tracing_thresh_read(struct file *filp, char __user *ubuf,
5723 		    size_t cnt, loff_t *ppos)
5724 {
5725 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5726 }
5727 
5728 static ssize_t
5729 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5730 		     size_t cnt, loff_t *ppos)
5731 {
5732 	struct trace_array *tr = filp->private_data;
5733 	int ret;
5734 
5735 	mutex_lock(&trace_types_lock);
5736 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5737 	if (ret < 0)
5738 		goto out;
5739 
5740 	if (tr->current_trace->update_thresh) {
5741 		ret = tr->current_trace->update_thresh(tr);
5742 		if (ret < 0)
5743 			goto out;
5744 	}
5745 
5746 	ret = cnt;
5747 out:
5748 	mutex_unlock(&trace_types_lock);
5749 
5750 	return ret;
5751 }
5752 
5753 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5754 
5755 static ssize_t
5756 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5757 		     size_t cnt, loff_t *ppos)
5758 {
5759 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5760 }
5761 
5762 static ssize_t
5763 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5764 		      size_t cnt, loff_t *ppos)
5765 {
5766 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5767 }
5768 
5769 #endif
5770 
5771 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5772 {
5773 	struct trace_array *tr = inode->i_private;
5774 	struct trace_iterator *iter;
5775 	int ret = 0;
5776 
5777 	if (tracing_disabled)
5778 		return -ENODEV;
5779 
5780 	if (trace_array_get(tr) < 0)
5781 		return -ENODEV;
5782 
5783 	mutex_lock(&trace_types_lock);
5784 
5785 	/* create a buffer to store the information to pass to userspace */
5786 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5787 	if (!iter) {
5788 		ret = -ENOMEM;
5789 		__trace_array_put(tr);
5790 		goto out;
5791 	}
5792 
5793 	trace_seq_init(&iter->seq);
5794 	iter->trace = tr->current_trace;
5795 
5796 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5797 		ret = -ENOMEM;
5798 		goto fail;
5799 	}
5800 
5801 	/* trace pipe does not show start of buffer */
5802 	cpumask_setall(iter->started);
5803 
5804 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5805 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5806 
5807 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5808 	if (trace_clocks[tr->clock_id].in_ns)
5809 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5810 
5811 	iter->tr = tr;
5812 	iter->trace_buffer = &tr->trace_buffer;
5813 	iter->cpu_file = tracing_get_cpu(inode);
5814 	mutex_init(&iter->mutex);
5815 	filp->private_data = iter;
5816 
5817 	if (iter->trace->pipe_open)
5818 		iter->trace->pipe_open(iter);
5819 
5820 	nonseekable_open(inode, filp);
5821 
5822 	tr->current_trace->ref++;
5823 out:
5824 	mutex_unlock(&trace_types_lock);
5825 	return ret;
5826 
5827 fail:
5828 	kfree(iter);
5829 	__trace_array_put(tr);
5830 	mutex_unlock(&trace_types_lock);
5831 	return ret;
5832 }
5833 
5834 static int tracing_release_pipe(struct inode *inode, struct file *file)
5835 {
5836 	struct trace_iterator *iter = file->private_data;
5837 	struct trace_array *tr = inode->i_private;
5838 
5839 	mutex_lock(&trace_types_lock);
5840 
5841 	tr->current_trace->ref--;
5842 
5843 	if (iter->trace->pipe_close)
5844 		iter->trace->pipe_close(iter);
5845 
5846 	mutex_unlock(&trace_types_lock);
5847 
5848 	free_cpumask_var(iter->started);
5849 	mutex_destroy(&iter->mutex);
5850 	kfree(iter);
5851 
5852 	trace_array_put(tr);
5853 
5854 	return 0;
5855 }
5856 
5857 static __poll_t
5858 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5859 {
5860 	struct trace_array *tr = iter->tr;
5861 
5862 	/* Iterators are static, they should be filled or empty */
5863 	if (trace_buffer_iter(iter, iter->cpu_file))
5864 		return EPOLLIN | EPOLLRDNORM;
5865 
5866 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5867 		/*
5868 		 * Always select as readable when in blocking mode
5869 		 */
5870 		return EPOLLIN | EPOLLRDNORM;
5871 	else
5872 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5873 					     filp, poll_table);
5874 }
5875 
5876 static __poll_t
5877 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5878 {
5879 	struct trace_iterator *iter = filp->private_data;
5880 
5881 	return trace_poll(iter, filp, poll_table);
5882 }
5883 
5884 /* Must be called with iter->mutex held. */
5885 static int tracing_wait_pipe(struct file *filp)
5886 {
5887 	struct trace_iterator *iter = filp->private_data;
5888 	int ret;
5889 
5890 	while (trace_empty(iter)) {
5891 
5892 		if ((filp->f_flags & O_NONBLOCK)) {
5893 			return -EAGAIN;
5894 		}
5895 
5896 		/*
5897 		 * We block until we read something and tracing is disabled.
5898 		 * We still block if tracing is disabled, but we have never
5899 		 * read anything. This allows a user to cat this file, and
5900 		 * then enable tracing. But after we have read something,
5901 		 * we give an EOF when tracing is again disabled.
5902 		 *
5903 		 * iter->pos will be 0 if we haven't read anything.
5904 		 */
5905 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5906 			break;
5907 
5908 		mutex_unlock(&iter->mutex);
5909 
5910 		ret = wait_on_pipe(iter, 0);
5911 
5912 		mutex_lock(&iter->mutex);
5913 
5914 		if (ret)
5915 			return ret;
5916 	}
5917 
5918 	return 1;
5919 }
5920 
5921 /*
5922  * Consumer reader.
5923  */
5924 static ssize_t
5925 tracing_read_pipe(struct file *filp, char __user *ubuf,
5926 		  size_t cnt, loff_t *ppos)
5927 {
5928 	struct trace_iterator *iter = filp->private_data;
5929 	ssize_t sret;
5930 
5931 	/*
5932 	 * Avoid more than one consumer on a single file descriptor
5933 	 * This is just a matter of traces coherency, the ring buffer itself
5934 	 * is protected.
5935 	 */
5936 	mutex_lock(&iter->mutex);
5937 
5938 	/* return any leftover data */
5939 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5940 	if (sret != -EBUSY)
5941 		goto out;
5942 
5943 	trace_seq_init(&iter->seq);
5944 
5945 	if (iter->trace->read) {
5946 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5947 		if (sret)
5948 			goto out;
5949 	}
5950 
5951 waitagain:
5952 	sret = tracing_wait_pipe(filp);
5953 	if (sret <= 0)
5954 		goto out;
5955 
5956 	/* stop when tracing is finished */
5957 	if (trace_empty(iter)) {
5958 		sret = 0;
5959 		goto out;
5960 	}
5961 
5962 	if (cnt >= PAGE_SIZE)
5963 		cnt = PAGE_SIZE - 1;
5964 
5965 	/* reset all but tr, trace, and overruns */
5966 	memset(&iter->seq, 0,
5967 	       sizeof(struct trace_iterator) -
5968 	       offsetof(struct trace_iterator, seq));
5969 	cpumask_clear(iter->started);
5970 	iter->pos = -1;
5971 
5972 	trace_event_read_lock();
5973 	trace_access_lock(iter->cpu_file);
5974 	while (trace_find_next_entry_inc(iter) != NULL) {
5975 		enum print_line_t ret;
5976 		int save_len = iter->seq.seq.len;
5977 
5978 		ret = print_trace_line(iter);
5979 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5980 			/* don't print partial lines */
5981 			iter->seq.seq.len = save_len;
5982 			break;
5983 		}
5984 		if (ret != TRACE_TYPE_NO_CONSUME)
5985 			trace_consume(iter);
5986 
5987 		if (trace_seq_used(&iter->seq) >= cnt)
5988 			break;
5989 
5990 		/*
5991 		 * Setting the full flag means we reached the trace_seq buffer
5992 		 * size and we should leave by partial output condition above.
5993 		 * One of the trace_seq_* functions is not used properly.
5994 		 */
5995 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5996 			  iter->ent->type);
5997 	}
5998 	trace_access_unlock(iter->cpu_file);
5999 	trace_event_read_unlock();
6000 
6001 	/* Now copy what we have to the user */
6002 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6003 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6004 		trace_seq_init(&iter->seq);
6005 
6006 	/*
6007 	 * If there was nothing to send to user, in spite of consuming trace
6008 	 * entries, go back to wait for more entries.
6009 	 */
6010 	if (sret == -EBUSY)
6011 		goto waitagain;
6012 
6013 out:
6014 	mutex_unlock(&iter->mutex);
6015 
6016 	return sret;
6017 }
6018 
6019 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6020 				     unsigned int idx)
6021 {
6022 	__free_page(spd->pages[idx]);
6023 }
6024 
6025 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6026 	.confirm		= generic_pipe_buf_confirm,
6027 	.release		= generic_pipe_buf_release,
6028 	.steal			= generic_pipe_buf_steal,
6029 	.get			= generic_pipe_buf_get,
6030 };
6031 
6032 static size_t
6033 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6034 {
6035 	size_t count;
6036 	int save_len;
6037 	int ret;
6038 
6039 	/* Seq buffer is page-sized, exactly what we need. */
6040 	for (;;) {
6041 		save_len = iter->seq.seq.len;
6042 		ret = print_trace_line(iter);
6043 
6044 		if (trace_seq_has_overflowed(&iter->seq)) {
6045 			iter->seq.seq.len = save_len;
6046 			break;
6047 		}
6048 
6049 		/*
6050 		 * This should not be hit, because it should only
6051 		 * be set if the iter->seq overflowed. But check it
6052 		 * anyway to be safe.
6053 		 */
6054 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6055 			iter->seq.seq.len = save_len;
6056 			break;
6057 		}
6058 
6059 		count = trace_seq_used(&iter->seq) - save_len;
6060 		if (rem < count) {
6061 			rem = 0;
6062 			iter->seq.seq.len = save_len;
6063 			break;
6064 		}
6065 
6066 		if (ret != TRACE_TYPE_NO_CONSUME)
6067 			trace_consume(iter);
6068 		rem -= count;
6069 		if (!trace_find_next_entry_inc(iter))	{
6070 			rem = 0;
6071 			iter->ent = NULL;
6072 			break;
6073 		}
6074 	}
6075 
6076 	return rem;
6077 }
6078 
6079 static ssize_t tracing_splice_read_pipe(struct file *filp,
6080 					loff_t *ppos,
6081 					struct pipe_inode_info *pipe,
6082 					size_t len,
6083 					unsigned int flags)
6084 {
6085 	struct page *pages_def[PIPE_DEF_BUFFERS];
6086 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6087 	struct trace_iterator *iter = filp->private_data;
6088 	struct splice_pipe_desc spd = {
6089 		.pages		= pages_def,
6090 		.partial	= partial_def,
6091 		.nr_pages	= 0, /* This gets updated below. */
6092 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6093 		.ops		= &tracing_pipe_buf_ops,
6094 		.spd_release	= tracing_spd_release_pipe,
6095 	};
6096 	ssize_t ret;
6097 	size_t rem;
6098 	unsigned int i;
6099 
6100 	if (splice_grow_spd(pipe, &spd))
6101 		return -ENOMEM;
6102 
6103 	mutex_lock(&iter->mutex);
6104 
6105 	if (iter->trace->splice_read) {
6106 		ret = iter->trace->splice_read(iter, filp,
6107 					       ppos, pipe, len, flags);
6108 		if (ret)
6109 			goto out_err;
6110 	}
6111 
6112 	ret = tracing_wait_pipe(filp);
6113 	if (ret <= 0)
6114 		goto out_err;
6115 
6116 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6117 		ret = -EFAULT;
6118 		goto out_err;
6119 	}
6120 
6121 	trace_event_read_lock();
6122 	trace_access_lock(iter->cpu_file);
6123 
6124 	/* Fill as many pages as possible. */
6125 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6126 		spd.pages[i] = alloc_page(GFP_KERNEL);
6127 		if (!spd.pages[i])
6128 			break;
6129 
6130 		rem = tracing_fill_pipe_page(rem, iter);
6131 
6132 		/* Copy the data into the page, so we can start over. */
6133 		ret = trace_seq_to_buffer(&iter->seq,
6134 					  page_address(spd.pages[i]),
6135 					  trace_seq_used(&iter->seq));
6136 		if (ret < 0) {
6137 			__free_page(spd.pages[i]);
6138 			break;
6139 		}
6140 		spd.partial[i].offset = 0;
6141 		spd.partial[i].len = trace_seq_used(&iter->seq);
6142 
6143 		trace_seq_init(&iter->seq);
6144 	}
6145 
6146 	trace_access_unlock(iter->cpu_file);
6147 	trace_event_read_unlock();
6148 	mutex_unlock(&iter->mutex);
6149 
6150 	spd.nr_pages = i;
6151 
6152 	if (i)
6153 		ret = splice_to_pipe(pipe, &spd);
6154 	else
6155 		ret = 0;
6156 out:
6157 	splice_shrink_spd(&spd);
6158 	return ret;
6159 
6160 out_err:
6161 	mutex_unlock(&iter->mutex);
6162 	goto out;
6163 }
6164 
6165 static ssize_t
6166 tracing_entries_read(struct file *filp, char __user *ubuf,
6167 		     size_t cnt, loff_t *ppos)
6168 {
6169 	struct inode *inode = file_inode(filp);
6170 	struct trace_array *tr = inode->i_private;
6171 	int cpu = tracing_get_cpu(inode);
6172 	char buf[64];
6173 	int r = 0;
6174 	ssize_t ret;
6175 
6176 	mutex_lock(&trace_types_lock);
6177 
6178 	if (cpu == RING_BUFFER_ALL_CPUS) {
6179 		int cpu, buf_size_same;
6180 		unsigned long size;
6181 
6182 		size = 0;
6183 		buf_size_same = 1;
6184 		/* check if all cpu sizes are same */
6185 		for_each_tracing_cpu(cpu) {
6186 			/* fill in the size from first enabled cpu */
6187 			if (size == 0)
6188 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6189 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6190 				buf_size_same = 0;
6191 				break;
6192 			}
6193 		}
6194 
6195 		if (buf_size_same) {
6196 			if (!ring_buffer_expanded)
6197 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6198 					    size >> 10,
6199 					    trace_buf_size >> 10);
6200 			else
6201 				r = sprintf(buf, "%lu\n", size >> 10);
6202 		} else
6203 			r = sprintf(buf, "X\n");
6204 	} else
6205 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6206 
6207 	mutex_unlock(&trace_types_lock);
6208 
6209 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6210 	return ret;
6211 }
6212 
6213 static ssize_t
6214 tracing_entries_write(struct file *filp, const char __user *ubuf,
6215 		      size_t cnt, loff_t *ppos)
6216 {
6217 	struct inode *inode = file_inode(filp);
6218 	struct trace_array *tr = inode->i_private;
6219 	unsigned long val;
6220 	int ret;
6221 
6222 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6223 	if (ret)
6224 		return ret;
6225 
6226 	/* must have at least 1 entry */
6227 	if (!val)
6228 		return -EINVAL;
6229 
6230 	/* value is in KB */
6231 	val <<= 10;
6232 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6233 	if (ret < 0)
6234 		return ret;
6235 
6236 	*ppos += cnt;
6237 
6238 	return cnt;
6239 }
6240 
6241 static ssize_t
6242 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6243 				size_t cnt, loff_t *ppos)
6244 {
6245 	struct trace_array *tr = filp->private_data;
6246 	char buf[64];
6247 	int r, cpu;
6248 	unsigned long size = 0, expanded_size = 0;
6249 
6250 	mutex_lock(&trace_types_lock);
6251 	for_each_tracing_cpu(cpu) {
6252 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6253 		if (!ring_buffer_expanded)
6254 			expanded_size += trace_buf_size >> 10;
6255 	}
6256 	if (ring_buffer_expanded)
6257 		r = sprintf(buf, "%lu\n", size);
6258 	else
6259 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6260 	mutex_unlock(&trace_types_lock);
6261 
6262 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6263 }
6264 
6265 static ssize_t
6266 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6267 			  size_t cnt, loff_t *ppos)
6268 {
6269 	/*
6270 	 * There is no need to read what the user has written, this function
6271 	 * is just to make sure that there is no error when "echo" is used
6272 	 */
6273 
6274 	*ppos += cnt;
6275 
6276 	return cnt;
6277 }
6278 
6279 static int
6280 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6281 {
6282 	struct trace_array *tr = inode->i_private;
6283 
6284 	/* disable tracing ? */
6285 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6286 		tracer_tracing_off(tr);
6287 	/* resize the ring buffer to 0 */
6288 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6289 
6290 	trace_array_put(tr);
6291 
6292 	return 0;
6293 }
6294 
6295 static ssize_t
6296 tracing_mark_write(struct file *filp, const char __user *ubuf,
6297 					size_t cnt, loff_t *fpos)
6298 {
6299 	struct trace_array *tr = filp->private_data;
6300 	struct ring_buffer_event *event;
6301 	enum event_trigger_type tt = ETT_NONE;
6302 	struct ring_buffer *buffer;
6303 	struct print_entry *entry;
6304 	unsigned long irq_flags;
6305 	const char faulted[] = "<faulted>";
6306 	ssize_t written;
6307 	int size;
6308 	int len;
6309 
6310 /* Used in tracing_mark_raw_write() as well */
6311 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6312 
6313 	if (tracing_disabled)
6314 		return -EINVAL;
6315 
6316 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6317 		return -EINVAL;
6318 
6319 	if (cnt > TRACE_BUF_SIZE)
6320 		cnt = TRACE_BUF_SIZE;
6321 
6322 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6323 
6324 	local_save_flags(irq_flags);
6325 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6326 
6327 	/* If less than "<faulted>", then make sure we can still add that */
6328 	if (cnt < FAULTED_SIZE)
6329 		size += FAULTED_SIZE - cnt;
6330 
6331 	buffer = tr->trace_buffer.buffer;
6332 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6333 					    irq_flags, preempt_count());
6334 	if (unlikely(!event))
6335 		/* Ring buffer disabled, return as if not open for write */
6336 		return -EBADF;
6337 
6338 	entry = ring_buffer_event_data(event);
6339 	entry->ip = _THIS_IP_;
6340 
6341 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6342 	if (len) {
6343 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6344 		cnt = FAULTED_SIZE;
6345 		written = -EFAULT;
6346 	} else
6347 		written = cnt;
6348 	len = cnt;
6349 
6350 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6351 		/* do not add \n before testing triggers, but add \0 */
6352 		entry->buf[cnt] = '\0';
6353 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6354 	}
6355 
6356 	if (entry->buf[cnt - 1] != '\n') {
6357 		entry->buf[cnt] = '\n';
6358 		entry->buf[cnt + 1] = '\0';
6359 	} else
6360 		entry->buf[cnt] = '\0';
6361 
6362 	__buffer_unlock_commit(buffer, event);
6363 
6364 	if (tt)
6365 		event_triggers_post_call(tr->trace_marker_file, tt);
6366 
6367 	if (written > 0)
6368 		*fpos += written;
6369 
6370 	return written;
6371 }
6372 
6373 /* Limit it for now to 3K (including tag) */
6374 #define RAW_DATA_MAX_SIZE (1024*3)
6375 
6376 static ssize_t
6377 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6378 					size_t cnt, loff_t *fpos)
6379 {
6380 	struct trace_array *tr = filp->private_data;
6381 	struct ring_buffer_event *event;
6382 	struct ring_buffer *buffer;
6383 	struct raw_data_entry *entry;
6384 	const char faulted[] = "<faulted>";
6385 	unsigned long irq_flags;
6386 	ssize_t written;
6387 	int size;
6388 	int len;
6389 
6390 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6391 
6392 	if (tracing_disabled)
6393 		return -EINVAL;
6394 
6395 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6396 		return -EINVAL;
6397 
6398 	/* The marker must at least have a tag id */
6399 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6400 		return -EINVAL;
6401 
6402 	if (cnt > TRACE_BUF_SIZE)
6403 		cnt = TRACE_BUF_SIZE;
6404 
6405 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6406 
6407 	local_save_flags(irq_flags);
6408 	size = sizeof(*entry) + cnt;
6409 	if (cnt < FAULT_SIZE_ID)
6410 		size += FAULT_SIZE_ID - cnt;
6411 
6412 	buffer = tr->trace_buffer.buffer;
6413 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6414 					    irq_flags, preempt_count());
6415 	if (!event)
6416 		/* Ring buffer disabled, return as if not open for write */
6417 		return -EBADF;
6418 
6419 	entry = ring_buffer_event_data(event);
6420 
6421 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6422 	if (len) {
6423 		entry->id = -1;
6424 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6425 		written = -EFAULT;
6426 	} else
6427 		written = cnt;
6428 
6429 	__buffer_unlock_commit(buffer, event);
6430 
6431 	if (written > 0)
6432 		*fpos += written;
6433 
6434 	return written;
6435 }
6436 
6437 static int tracing_clock_show(struct seq_file *m, void *v)
6438 {
6439 	struct trace_array *tr = m->private;
6440 	int i;
6441 
6442 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6443 		seq_printf(m,
6444 			"%s%s%s%s", i ? " " : "",
6445 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6446 			i == tr->clock_id ? "]" : "");
6447 	seq_putc(m, '\n');
6448 
6449 	return 0;
6450 }
6451 
6452 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6453 {
6454 	int i;
6455 
6456 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6457 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6458 			break;
6459 	}
6460 	if (i == ARRAY_SIZE(trace_clocks))
6461 		return -EINVAL;
6462 
6463 	mutex_lock(&trace_types_lock);
6464 
6465 	tr->clock_id = i;
6466 
6467 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6468 
6469 	/*
6470 	 * New clock may not be consistent with the previous clock.
6471 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6472 	 */
6473 	tracing_reset_online_cpus(&tr->trace_buffer);
6474 
6475 #ifdef CONFIG_TRACER_MAX_TRACE
6476 	if (tr->max_buffer.buffer)
6477 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6478 	tracing_reset_online_cpus(&tr->max_buffer);
6479 #endif
6480 
6481 	mutex_unlock(&trace_types_lock);
6482 
6483 	return 0;
6484 }
6485 
6486 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6487 				   size_t cnt, loff_t *fpos)
6488 {
6489 	struct seq_file *m = filp->private_data;
6490 	struct trace_array *tr = m->private;
6491 	char buf[64];
6492 	const char *clockstr;
6493 	int ret;
6494 
6495 	if (cnt >= sizeof(buf))
6496 		return -EINVAL;
6497 
6498 	if (copy_from_user(buf, ubuf, cnt))
6499 		return -EFAULT;
6500 
6501 	buf[cnt] = 0;
6502 
6503 	clockstr = strstrip(buf);
6504 
6505 	ret = tracing_set_clock(tr, clockstr);
6506 	if (ret)
6507 		return ret;
6508 
6509 	*fpos += cnt;
6510 
6511 	return cnt;
6512 }
6513 
6514 static int tracing_clock_open(struct inode *inode, struct file *file)
6515 {
6516 	struct trace_array *tr = inode->i_private;
6517 	int ret;
6518 
6519 	if (tracing_disabled)
6520 		return -ENODEV;
6521 
6522 	if (trace_array_get(tr))
6523 		return -ENODEV;
6524 
6525 	ret = single_open(file, tracing_clock_show, inode->i_private);
6526 	if (ret < 0)
6527 		trace_array_put(tr);
6528 
6529 	return ret;
6530 }
6531 
6532 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6533 {
6534 	struct trace_array *tr = m->private;
6535 
6536 	mutex_lock(&trace_types_lock);
6537 
6538 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6539 		seq_puts(m, "delta [absolute]\n");
6540 	else
6541 		seq_puts(m, "[delta] absolute\n");
6542 
6543 	mutex_unlock(&trace_types_lock);
6544 
6545 	return 0;
6546 }
6547 
6548 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6549 {
6550 	struct trace_array *tr = inode->i_private;
6551 	int ret;
6552 
6553 	if (tracing_disabled)
6554 		return -ENODEV;
6555 
6556 	if (trace_array_get(tr))
6557 		return -ENODEV;
6558 
6559 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6560 	if (ret < 0)
6561 		trace_array_put(tr);
6562 
6563 	return ret;
6564 }
6565 
6566 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6567 {
6568 	int ret = 0;
6569 
6570 	mutex_lock(&trace_types_lock);
6571 
6572 	if (abs && tr->time_stamp_abs_ref++)
6573 		goto out;
6574 
6575 	if (!abs) {
6576 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6577 			ret = -EINVAL;
6578 			goto out;
6579 		}
6580 
6581 		if (--tr->time_stamp_abs_ref)
6582 			goto out;
6583 	}
6584 
6585 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6586 
6587 #ifdef CONFIG_TRACER_MAX_TRACE
6588 	if (tr->max_buffer.buffer)
6589 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6590 #endif
6591  out:
6592 	mutex_unlock(&trace_types_lock);
6593 
6594 	return ret;
6595 }
6596 
6597 struct ftrace_buffer_info {
6598 	struct trace_iterator	iter;
6599 	void			*spare;
6600 	unsigned int		spare_cpu;
6601 	unsigned int		read;
6602 };
6603 
6604 #ifdef CONFIG_TRACER_SNAPSHOT
6605 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6606 {
6607 	struct trace_array *tr = inode->i_private;
6608 	struct trace_iterator *iter;
6609 	struct seq_file *m;
6610 	int ret = 0;
6611 
6612 	if (trace_array_get(tr) < 0)
6613 		return -ENODEV;
6614 
6615 	if (file->f_mode & FMODE_READ) {
6616 		iter = __tracing_open(inode, file, true);
6617 		if (IS_ERR(iter))
6618 			ret = PTR_ERR(iter);
6619 	} else {
6620 		/* Writes still need the seq_file to hold the private data */
6621 		ret = -ENOMEM;
6622 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6623 		if (!m)
6624 			goto out;
6625 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6626 		if (!iter) {
6627 			kfree(m);
6628 			goto out;
6629 		}
6630 		ret = 0;
6631 
6632 		iter->tr = tr;
6633 		iter->trace_buffer = &tr->max_buffer;
6634 		iter->cpu_file = tracing_get_cpu(inode);
6635 		m->private = iter;
6636 		file->private_data = m;
6637 	}
6638 out:
6639 	if (ret < 0)
6640 		trace_array_put(tr);
6641 
6642 	return ret;
6643 }
6644 
6645 static ssize_t
6646 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6647 		       loff_t *ppos)
6648 {
6649 	struct seq_file *m = filp->private_data;
6650 	struct trace_iterator *iter = m->private;
6651 	struct trace_array *tr = iter->tr;
6652 	unsigned long val;
6653 	int ret;
6654 
6655 	ret = tracing_update_buffers();
6656 	if (ret < 0)
6657 		return ret;
6658 
6659 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6660 	if (ret)
6661 		return ret;
6662 
6663 	mutex_lock(&trace_types_lock);
6664 
6665 	if (tr->current_trace->use_max_tr) {
6666 		ret = -EBUSY;
6667 		goto out;
6668 	}
6669 
6670 	arch_spin_lock(&tr->max_lock);
6671 	if (tr->cond_snapshot)
6672 		ret = -EBUSY;
6673 	arch_spin_unlock(&tr->max_lock);
6674 	if (ret)
6675 		goto out;
6676 
6677 	switch (val) {
6678 	case 0:
6679 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6680 			ret = -EINVAL;
6681 			break;
6682 		}
6683 		if (tr->allocated_snapshot)
6684 			free_snapshot(tr);
6685 		break;
6686 	case 1:
6687 /* Only allow per-cpu swap if the ring buffer supports it */
6688 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6689 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6690 			ret = -EINVAL;
6691 			break;
6692 		}
6693 #endif
6694 		if (!tr->allocated_snapshot) {
6695 			ret = tracing_alloc_snapshot_instance(tr);
6696 			if (ret < 0)
6697 				break;
6698 		}
6699 		local_irq_disable();
6700 		/* Now, we're going to swap */
6701 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6702 			update_max_tr(tr, current, smp_processor_id(), NULL);
6703 		else
6704 			update_max_tr_single(tr, current, iter->cpu_file);
6705 		local_irq_enable();
6706 		break;
6707 	default:
6708 		if (tr->allocated_snapshot) {
6709 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6710 				tracing_reset_online_cpus(&tr->max_buffer);
6711 			else
6712 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6713 		}
6714 		break;
6715 	}
6716 
6717 	if (ret >= 0) {
6718 		*ppos += cnt;
6719 		ret = cnt;
6720 	}
6721 out:
6722 	mutex_unlock(&trace_types_lock);
6723 	return ret;
6724 }
6725 
6726 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6727 {
6728 	struct seq_file *m = file->private_data;
6729 	int ret;
6730 
6731 	ret = tracing_release(inode, file);
6732 
6733 	if (file->f_mode & FMODE_READ)
6734 		return ret;
6735 
6736 	/* If write only, the seq_file is just a stub */
6737 	if (m)
6738 		kfree(m->private);
6739 	kfree(m);
6740 
6741 	return 0;
6742 }
6743 
6744 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6745 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6746 				    size_t count, loff_t *ppos);
6747 static int tracing_buffers_release(struct inode *inode, struct file *file);
6748 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6749 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6750 
6751 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6752 {
6753 	struct ftrace_buffer_info *info;
6754 	int ret;
6755 
6756 	ret = tracing_buffers_open(inode, filp);
6757 	if (ret < 0)
6758 		return ret;
6759 
6760 	info = filp->private_data;
6761 
6762 	if (info->iter.trace->use_max_tr) {
6763 		tracing_buffers_release(inode, filp);
6764 		return -EBUSY;
6765 	}
6766 
6767 	info->iter.snapshot = true;
6768 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6769 
6770 	return ret;
6771 }
6772 
6773 #endif /* CONFIG_TRACER_SNAPSHOT */
6774 
6775 
6776 static const struct file_operations tracing_thresh_fops = {
6777 	.open		= tracing_open_generic,
6778 	.read		= tracing_thresh_read,
6779 	.write		= tracing_thresh_write,
6780 	.llseek		= generic_file_llseek,
6781 };
6782 
6783 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6784 static const struct file_operations tracing_max_lat_fops = {
6785 	.open		= tracing_open_generic,
6786 	.read		= tracing_max_lat_read,
6787 	.write		= tracing_max_lat_write,
6788 	.llseek		= generic_file_llseek,
6789 };
6790 #endif
6791 
6792 static const struct file_operations set_tracer_fops = {
6793 	.open		= tracing_open_generic,
6794 	.read		= tracing_set_trace_read,
6795 	.write		= tracing_set_trace_write,
6796 	.llseek		= generic_file_llseek,
6797 };
6798 
6799 static const struct file_operations tracing_pipe_fops = {
6800 	.open		= tracing_open_pipe,
6801 	.poll		= tracing_poll_pipe,
6802 	.read		= tracing_read_pipe,
6803 	.splice_read	= tracing_splice_read_pipe,
6804 	.release	= tracing_release_pipe,
6805 	.llseek		= no_llseek,
6806 };
6807 
6808 static const struct file_operations tracing_entries_fops = {
6809 	.open		= tracing_open_generic_tr,
6810 	.read		= tracing_entries_read,
6811 	.write		= tracing_entries_write,
6812 	.llseek		= generic_file_llseek,
6813 	.release	= tracing_release_generic_tr,
6814 };
6815 
6816 static const struct file_operations tracing_total_entries_fops = {
6817 	.open		= tracing_open_generic_tr,
6818 	.read		= tracing_total_entries_read,
6819 	.llseek		= generic_file_llseek,
6820 	.release	= tracing_release_generic_tr,
6821 };
6822 
6823 static const struct file_operations tracing_free_buffer_fops = {
6824 	.open		= tracing_open_generic_tr,
6825 	.write		= tracing_free_buffer_write,
6826 	.release	= tracing_free_buffer_release,
6827 };
6828 
6829 static const struct file_operations tracing_mark_fops = {
6830 	.open		= tracing_open_generic_tr,
6831 	.write		= tracing_mark_write,
6832 	.llseek		= generic_file_llseek,
6833 	.release	= tracing_release_generic_tr,
6834 };
6835 
6836 static const struct file_operations tracing_mark_raw_fops = {
6837 	.open		= tracing_open_generic_tr,
6838 	.write		= tracing_mark_raw_write,
6839 	.llseek		= generic_file_llseek,
6840 	.release	= tracing_release_generic_tr,
6841 };
6842 
6843 static const struct file_operations trace_clock_fops = {
6844 	.open		= tracing_clock_open,
6845 	.read		= seq_read,
6846 	.llseek		= seq_lseek,
6847 	.release	= tracing_single_release_tr,
6848 	.write		= tracing_clock_write,
6849 };
6850 
6851 static const struct file_operations trace_time_stamp_mode_fops = {
6852 	.open		= tracing_time_stamp_mode_open,
6853 	.read		= seq_read,
6854 	.llseek		= seq_lseek,
6855 	.release	= tracing_single_release_tr,
6856 };
6857 
6858 #ifdef CONFIG_TRACER_SNAPSHOT
6859 static const struct file_operations snapshot_fops = {
6860 	.open		= tracing_snapshot_open,
6861 	.read		= seq_read,
6862 	.write		= tracing_snapshot_write,
6863 	.llseek		= tracing_lseek,
6864 	.release	= tracing_snapshot_release,
6865 };
6866 
6867 static const struct file_operations snapshot_raw_fops = {
6868 	.open		= snapshot_raw_open,
6869 	.read		= tracing_buffers_read,
6870 	.release	= tracing_buffers_release,
6871 	.splice_read	= tracing_buffers_splice_read,
6872 	.llseek		= no_llseek,
6873 };
6874 
6875 #endif /* CONFIG_TRACER_SNAPSHOT */
6876 
6877 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6878 {
6879 	struct trace_array *tr = inode->i_private;
6880 	struct ftrace_buffer_info *info;
6881 	int ret;
6882 
6883 	if (tracing_disabled)
6884 		return -ENODEV;
6885 
6886 	if (trace_array_get(tr) < 0)
6887 		return -ENODEV;
6888 
6889 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6890 	if (!info) {
6891 		trace_array_put(tr);
6892 		return -ENOMEM;
6893 	}
6894 
6895 	mutex_lock(&trace_types_lock);
6896 
6897 	info->iter.tr		= tr;
6898 	info->iter.cpu_file	= tracing_get_cpu(inode);
6899 	info->iter.trace	= tr->current_trace;
6900 	info->iter.trace_buffer = &tr->trace_buffer;
6901 	info->spare		= NULL;
6902 	/* Force reading ring buffer for first read */
6903 	info->read		= (unsigned int)-1;
6904 
6905 	filp->private_data = info;
6906 
6907 	tr->current_trace->ref++;
6908 
6909 	mutex_unlock(&trace_types_lock);
6910 
6911 	ret = nonseekable_open(inode, filp);
6912 	if (ret < 0)
6913 		trace_array_put(tr);
6914 
6915 	return ret;
6916 }
6917 
6918 static __poll_t
6919 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6920 {
6921 	struct ftrace_buffer_info *info = filp->private_data;
6922 	struct trace_iterator *iter = &info->iter;
6923 
6924 	return trace_poll(iter, filp, poll_table);
6925 }
6926 
6927 static ssize_t
6928 tracing_buffers_read(struct file *filp, char __user *ubuf,
6929 		     size_t count, loff_t *ppos)
6930 {
6931 	struct ftrace_buffer_info *info = filp->private_data;
6932 	struct trace_iterator *iter = &info->iter;
6933 	ssize_t ret = 0;
6934 	ssize_t size;
6935 
6936 	if (!count)
6937 		return 0;
6938 
6939 #ifdef CONFIG_TRACER_MAX_TRACE
6940 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6941 		return -EBUSY;
6942 #endif
6943 
6944 	if (!info->spare) {
6945 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6946 							  iter->cpu_file);
6947 		if (IS_ERR(info->spare)) {
6948 			ret = PTR_ERR(info->spare);
6949 			info->spare = NULL;
6950 		} else {
6951 			info->spare_cpu = iter->cpu_file;
6952 		}
6953 	}
6954 	if (!info->spare)
6955 		return ret;
6956 
6957 	/* Do we have previous read data to read? */
6958 	if (info->read < PAGE_SIZE)
6959 		goto read;
6960 
6961  again:
6962 	trace_access_lock(iter->cpu_file);
6963 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6964 				    &info->spare,
6965 				    count,
6966 				    iter->cpu_file, 0);
6967 	trace_access_unlock(iter->cpu_file);
6968 
6969 	if (ret < 0) {
6970 		if (trace_empty(iter)) {
6971 			if ((filp->f_flags & O_NONBLOCK))
6972 				return -EAGAIN;
6973 
6974 			ret = wait_on_pipe(iter, 0);
6975 			if (ret)
6976 				return ret;
6977 
6978 			goto again;
6979 		}
6980 		return 0;
6981 	}
6982 
6983 	info->read = 0;
6984  read:
6985 	size = PAGE_SIZE - info->read;
6986 	if (size > count)
6987 		size = count;
6988 
6989 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6990 	if (ret == size)
6991 		return -EFAULT;
6992 
6993 	size -= ret;
6994 
6995 	*ppos += size;
6996 	info->read += size;
6997 
6998 	return size;
6999 }
7000 
7001 static int tracing_buffers_release(struct inode *inode, struct file *file)
7002 {
7003 	struct ftrace_buffer_info *info = file->private_data;
7004 	struct trace_iterator *iter = &info->iter;
7005 
7006 	mutex_lock(&trace_types_lock);
7007 
7008 	iter->tr->current_trace->ref--;
7009 
7010 	__trace_array_put(iter->tr);
7011 
7012 	if (info->spare)
7013 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
7014 					   info->spare_cpu, info->spare);
7015 	kfree(info);
7016 
7017 	mutex_unlock(&trace_types_lock);
7018 
7019 	return 0;
7020 }
7021 
7022 struct buffer_ref {
7023 	struct ring_buffer	*buffer;
7024 	void			*page;
7025 	int			cpu;
7026 	int			ref;
7027 };
7028 
7029 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7030 				    struct pipe_buffer *buf)
7031 {
7032 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7033 
7034 	if (--ref->ref)
7035 		return;
7036 
7037 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7038 	kfree(ref);
7039 	buf->private = 0;
7040 }
7041 
7042 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7043 				struct pipe_buffer *buf)
7044 {
7045 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7046 
7047 	ref->ref++;
7048 }
7049 
7050 /* Pipe buffer operations for a buffer. */
7051 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7052 	.confirm		= generic_pipe_buf_confirm,
7053 	.release		= buffer_pipe_buf_release,
7054 	.steal			= generic_pipe_buf_steal,
7055 	.get			= buffer_pipe_buf_get,
7056 };
7057 
7058 /*
7059  * Callback from splice_to_pipe(), if we need to release some pages
7060  * at the end of the spd in case we error'ed out in filling the pipe.
7061  */
7062 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7063 {
7064 	struct buffer_ref *ref =
7065 		(struct buffer_ref *)spd->partial[i].private;
7066 
7067 	if (--ref->ref)
7068 		return;
7069 
7070 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7071 	kfree(ref);
7072 	spd->partial[i].private = 0;
7073 }
7074 
7075 static ssize_t
7076 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7077 			    struct pipe_inode_info *pipe, size_t len,
7078 			    unsigned int flags)
7079 {
7080 	struct ftrace_buffer_info *info = file->private_data;
7081 	struct trace_iterator *iter = &info->iter;
7082 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7083 	struct page *pages_def[PIPE_DEF_BUFFERS];
7084 	struct splice_pipe_desc spd = {
7085 		.pages		= pages_def,
7086 		.partial	= partial_def,
7087 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7088 		.ops		= &buffer_pipe_buf_ops,
7089 		.spd_release	= buffer_spd_release,
7090 	};
7091 	struct buffer_ref *ref;
7092 	int entries, i;
7093 	ssize_t ret = 0;
7094 
7095 #ifdef CONFIG_TRACER_MAX_TRACE
7096 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7097 		return -EBUSY;
7098 #endif
7099 
7100 	if (*ppos & (PAGE_SIZE - 1))
7101 		return -EINVAL;
7102 
7103 	if (len & (PAGE_SIZE - 1)) {
7104 		if (len < PAGE_SIZE)
7105 			return -EINVAL;
7106 		len &= PAGE_MASK;
7107 	}
7108 
7109 	if (splice_grow_spd(pipe, &spd))
7110 		return -ENOMEM;
7111 
7112  again:
7113 	trace_access_lock(iter->cpu_file);
7114 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7115 
7116 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7117 		struct page *page;
7118 		int r;
7119 
7120 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7121 		if (!ref) {
7122 			ret = -ENOMEM;
7123 			break;
7124 		}
7125 
7126 		ref->ref = 1;
7127 		ref->buffer = iter->trace_buffer->buffer;
7128 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7129 		if (IS_ERR(ref->page)) {
7130 			ret = PTR_ERR(ref->page);
7131 			ref->page = NULL;
7132 			kfree(ref);
7133 			break;
7134 		}
7135 		ref->cpu = iter->cpu_file;
7136 
7137 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7138 					  len, iter->cpu_file, 1);
7139 		if (r < 0) {
7140 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7141 						   ref->page);
7142 			kfree(ref);
7143 			break;
7144 		}
7145 
7146 		page = virt_to_page(ref->page);
7147 
7148 		spd.pages[i] = page;
7149 		spd.partial[i].len = PAGE_SIZE;
7150 		spd.partial[i].offset = 0;
7151 		spd.partial[i].private = (unsigned long)ref;
7152 		spd.nr_pages++;
7153 		*ppos += PAGE_SIZE;
7154 
7155 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7156 	}
7157 
7158 	trace_access_unlock(iter->cpu_file);
7159 	spd.nr_pages = i;
7160 
7161 	/* did we read anything? */
7162 	if (!spd.nr_pages) {
7163 		if (ret)
7164 			goto out;
7165 
7166 		ret = -EAGAIN;
7167 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7168 			goto out;
7169 
7170 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7171 		if (ret)
7172 			goto out;
7173 
7174 		goto again;
7175 	}
7176 
7177 	ret = splice_to_pipe(pipe, &spd);
7178 out:
7179 	splice_shrink_spd(&spd);
7180 
7181 	return ret;
7182 }
7183 
7184 static const struct file_operations tracing_buffers_fops = {
7185 	.open		= tracing_buffers_open,
7186 	.read		= tracing_buffers_read,
7187 	.poll		= tracing_buffers_poll,
7188 	.release	= tracing_buffers_release,
7189 	.splice_read	= tracing_buffers_splice_read,
7190 	.llseek		= no_llseek,
7191 };
7192 
7193 static ssize_t
7194 tracing_stats_read(struct file *filp, char __user *ubuf,
7195 		   size_t count, loff_t *ppos)
7196 {
7197 	struct inode *inode = file_inode(filp);
7198 	struct trace_array *tr = inode->i_private;
7199 	struct trace_buffer *trace_buf = &tr->trace_buffer;
7200 	int cpu = tracing_get_cpu(inode);
7201 	struct trace_seq *s;
7202 	unsigned long cnt;
7203 	unsigned long long t;
7204 	unsigned long usec_rem;
7205 
7206 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7207 	if (!s)
7208 		return -ENOMEM;
7209 
7210 	trace_seq_init(s);
7211 
7212 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7213 	trace_seq_printf(s, "entries: %ld\n", cnt);
7214 
7215 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7216 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7217 
7218 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7219 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7220 
7221 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7222 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7223 
7224 	if (trace_clocks[tr->clock_id].in_ns) {
7225 		/* local or global for trace_clock */
7226 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7227 		usec_rem = do_div(t, USEC_PER_SEC);
7228 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7229 								t, usec_rem);
7230 
7231 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7232 		usec_rem = do_div(t, USEC_PER_SEC);
7233 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7234 	} else {
7235 		/* counter or tsc mode for trace_clock */
7236 		trace_seq_printf(s, "oldest event ts: %llu\n",
7237 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7238 
7239 		trace_seq_printf(s, "now ts: %llu\n",
7240 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7241 	}
7242 
7243 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7244 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7245 
7246 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7247 	trace_seq_printf(s, "read events: %ld\n", cnt);
7248 
7249 	count = simple_read_from_buffer(ubuf, count, ppos,
7250 					s->buffer, trace_seq_used(s));
7251 
7252 	kfree(s);
7253 
7254 	return count;
7255 }
7256 
7257 static const struct file_operations tracing_stats_fops = {
7258 	.open		= tracing_open_generic_tr,
7259 	.read		= tracing_stats_read,
7260 	.llseek		= generic_file_llseek,
7261 	.release	= tracing_release_generic_tr,
7262 };
7263 
7264 #ifdef CONFIG_DYNAMIC_FTRACE
7265 
7266 static ssize_t
7267 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7268 		  size_t cnt, loff_t *ppos)
7269 {
7270 	unsigned long *p = filp->private_data;
7271 	char buf[64]; /* Not too big for a shallow stack */
7272 	int r;
7273 
7274 	r = scnprintf(buf, 63, "%ld", *p);
7275 	buf[r++] = '\n';
7276 
7277 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7278 }
7279 
7280 static const struct file_operations tracing_dyn_info_fops = {
7281 	.open		= tracing_open_generic,
7282 	.read		= tracing_read_dyn_info,
7283 	.llseek		= generic_file_llseek,
7284 };
7285 #endif /* CONFIG_DYNAMIC_FTRACE */
7286 
7287 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7288 static void
7289 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7290 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7291 		void *data)
7292 {
7293 	tracing_snapshot_instance(tr);
7294 }
7295 
7296 static void
7297 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7298 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7299 		      void *data)
7300 {
7301 	struct ftrace_func_mapper *mapper = data;
7302 	long *count = NULL;
7303 
7304 	if (mapper)
7305 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7306 
7307 	if (count) {
7308 
7309 		if (*count <= 0)
7310 			return;
7311 
7312 		(*count)--;
7313 	}
7314 
7315 	tracing_snapshot_instance(tr);
7316 }
7317 
7318 static int
7319 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7320 		      struct ftrace_probe_ops *ops, void *data)
7321 {
7322 	struct ftrace_func_mapper *mapper = data;
7323 	long *count = NULL;
7324 
7325 	seq_printf(m, "%ps:", (void *)ip);
7326 
7327 	seq_puts(m, "snapshot");
7328 
7329 	if (mapper)
7330 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7331 
7332 	if (count)
7333 		seq_printf(m, ":count=%ld\n", *count);
7334 	else
7335 		seq_puts(m, ":unlimited\n");
7336 
7337 	return 0;
7338 }
7339 
7340 static int
7341 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7342 		     unsigned long ip, void *init_data, void **data)
7343 {
7344 	struct ftrace_func_mapper *mapper = *data;
7345 
7346 	if (!mapper) {
7347 		mapper = allocate_ftrace_func_mapper();
7348 		if (!mapper)
7349 			return -ENOMEM;
7350 		*data = mapper;
7351 	}
7352 
7353 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7354 }
7355 
7356 static void
7357 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7358 		     unsigned long ip, void *data)
7359 {
7360 	struct ftrace_func_mapper *mapper = data;
7361 
7362 	if (!ip) {
7363 		if (!mapper)
7364 			return;
7365 		free_ftrace_func_mapper(mapper, NULL);
7366 		return;
7367 	}
7368 
7369 	ftrace_func_mapper_remove_ip(mapper, ip);
7370 }
7371 
7372 static struct ftrace_probe_ops snapshot_probe_ops = {
7373 	.func			= ftrace_snapshot,
7374 	.print			= ftrace_snapshot_print,
7375 };
7376 
7377 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7378 	.func			= ftrace_count_snapshot,
7379 	.print			= ftrace_snapshot_print,
7380 	.init			= ftrace_snapshot_init,
7381 	.free			= ftrace_snapshot_free,
7382 };
7383 
7384 static int
7385 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7386 			       char *glob, char *cmd, char *param, int enable)
7387 {
7388 	struct ftrace_probe_ops *ops;
7389 	void *count = (void *)-1;
7390 	char *number;
7391 	int ret;
7392 
7393 	if (!tr)
7394 		return -ENODEV;
7395 
7396 	/* hash funcs only work with set_ftrace_filter */
7397 	if (!enable)
7398 		return -EINVAL;
7399 
7400 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7401 
7402 	if (glob[0] == '!')
7403 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7404 
7405 	if (!param)
7406 		goto out_reg;
7407 
7408 	number = strsep(&param, ":");
7409 
7410 	if (!strlen(number))
7411 		goto out_reg;
7412 
7413 	/*
7414 	 * We use the callback data field (which is a pointer)
7415 	 * as our counter.
7416 	 */
7417 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7418 	if (ret)
7419 		return ret;
7420 
7421  out_reg:
7422 	ret = tracing_alloc_snapshot_instance(tr);
7423 	if (ret < 0)
7424 		goto out;
7425 
7426 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7427 
7428  out:
7429 	return ret < 0 ? ret : 0;
7430 }
7431 
7432 static struct ftrace_func_command ftrace_snapshot_cmd = {
7433 	.name			= "snapshot",
7434 	.func			= ftrace_trace_snapshot_callback,
7435 };
7436 
7437 static __init int register_snapshot_cmd(void)
7438 {
7439 	return register_ftrace_command(&ftrace_snapshot_cmd);
7440 }
7441 #else
7442 static inline __init int register_snapshot_cmd(void) { return 0; }
7443 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7444 
7445 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7446 {
7447 	if (WARN_ON(!tr->dir))
7448 		return ERR_PTR(-ENODEV);
7449 
7450 	/* Top directory uses NULL as the parent */
7451 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7452 		return NULL;
7453 
7454 	/* All sub buffers have a descriptor */
7455 	return tr->dir;
7456 }
7457 
7458 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7459 {
7460 	struct dentry *d_tracer;
7461 
7462 	if (tr->percpu_dir)
7463 		return tr->percpu_dir;
7464 
7465 	d_tracer = tracing_get_dentry(tr);
7466 	if (IS_ERR(d_tracer))
7467 		return NULL;
7468 
7469 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7470 
7471 	WARN_ONCE(!tr->percpu_dir,
7472 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7473 
7474 	return tr->percpu_dir;
7475 }
7476 
7477 static struct dentry *
7478 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7479 		      void *data, long cpu, const struct file_operations *fops)
7480 {
7481 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7482 
7483 	if (ret) /* See tracing_get_cpu() */
7484 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7485 	return ret;
7486 }
7487 
7488 static void
7489 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7490 {
7491 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7492 	struct dentry *d_cpu;
7493 	char cpu_dir[30]; /* 30 characters should be more than enough */
7494 
7495 	if (!d_percpu)
7496 		return;
7497 
7498 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7499 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7500 	if (!d_cpu) {
7501 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7502 		return;
7503 	}
7504 
7505 	/* per cpu trace_pipe */
7506 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7507 				tr, cpu, &tracing_pipe_fops);
7508 
7509 	/* per cpu trace */
7510 	trace_create_cpu_file("trace", 0644, d_cpu,
7511 				tr, cpu, &tracing_fops);
7512 
7513 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7514 				tr, cpu, &tracing_buffers_fops);
7515 
7516 	trace_create_cpu_file("stats", 0444, d_cpu,
7517 				tr, cpu, &tracing_stats_fops);
7518 
7519 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7520 				tr, cpu, &tracing_entries_fops);
7521 
7522 #ifdef CONFIG_TRACER_SNAPSHOT
7523 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7524 				tr, cpu, &snapshot_fops);
7525 
7526 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7527 				tr, cpu, &snapshot_raw_fops);
7528 #endif
7529 }
7530 
7531 #ifdef CONFIG_FTRACE_SELFTEST
7532 /* Let selftest have access to static functions in this file */
7533 #include "trace_selftest.c"
7534 #endif
7535 
7536 static ssize_t
7537 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7538 			loff_t *ppos)
7539 {
7540 	struct trace_option_dentry *topt = filp->private_data;
7541 	char *buf;
7542 
7543 	if (topt->flags->val & topt->opt->bit)
7544 		buf = "1\n";
7545 	else
7546 		buf = "0\n";
7547 
7548 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7549 }
7550 
7551 static ssize_t
7552 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7553 			 loff_t *ppos)
7554 {
7555 	struct trace_option_dentry *topt = filp->private_data;
7556 	unsigned long val;
7557 	int ret;
7558 
7559 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7560 	if (ret)
7561 		return ret;
7562 
7563 	if (val != 0 && val != 1)
7564 		return -EINVAL;
7565 
7566 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7567 		mutex_lock(&trace_types_lock);
7568 		ret = __set_tracer_option(topt->tr, topt->flags,
7569 					  topt->opt, !val);
7570 		mutex_unlock(&trace_types_lock);
7571 		if (ret)
7572 			return ret;
7573 	}
7574 
7575 	*ppos += cnt;
7576 
7577 	return cnt;
7578 }
7579 
7580 
7581 static const struct file_operations trace_options_fops = {
7582 	.open = tracing_open_generic,
7583 	.read = trace_options_read,
7584 	.write = trace_options_write,
7585 	.llseek	= generic_file_llseek,
7586 };
7587 
7588 /*
7589  * In order to pass in both the trace_array descriptor as well as the index
7590  * to the flag that the trace option file represents, the trace_array
7591  * has a character array of trace_flags_index[], which holds the index
7592  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7593  * The address of this character array is passed to the flag option file
7594  * read/write callbacks.
7595  *
7596  * In order to extract both the index and the trace_array descriptor,
7597  * get_tr_index() uses the following algorithm.
7598  *
7599  *   idx = *ptr;
7600  *
7601  * As the pointer itself contains the address of the index (remember
7602  * index[1] == 1).
7603  *
7604  * Then to get the trace_array descriptor, by subtracting that index
7605  * from the ptr, we get to the start of the index itself.
7606  *
7607  *   ptr - idx == &index[0]
7608  *
7609  * Then a simple container_of() from that pointer gets us to the
7610  * trace_array descriptor.
7611  */
7612 static void get_tr_index(void *data, struct trace_array **ptr,
7613 			 unsigned int *pindex)
7614 {
7615 	*pindex = *(unsigned char *)data;
7616 
7617 	*ptr = container_of(data - *pindex, struct trace_array,
7618 			    trace_flags_index);
7619 }
7620 
7621 static ssize_t
7622 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7623 			loff_t *ppos)
7624 {
7625 	void *tr_index = filp->private_data;
7626 	struct trace_array *tr;
7627 	unsigned int index;
7628 	char *buf;
7629 
7630 	get_tr_index(tr_index, &tr, &index);
7631 
7632 	if (tr->trace_flags & (1 << index))
7633 		buf = "1\n";
7634 	else
7635 		buf = "0\n";
7636 
7637 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7638 }
7639 
7640 static ssize_t
7641 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7642 			 loff_t *ppos)
7643 {
7644 	void *tr_index = filp->private_data;
7645 	struct trace_array *tr;
7646 	unsigned int index;
7647 	unsigned long val;
7648 	int ret;
7649 
7650 	get_tr_index(tr_index, &tr, &index);
7651 
7652 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7653 	if (ret)
7654 		return ret;
7655 
7656 	if (val != 0 && val != 1)
7657 		return -EINVAL;
7658 
7659 	mutex_lock(&trace_types_lock);
7660 	ret = set_tracer_flag(tr, 1 << index, val);
7661 	mutex_unlock(&trace_types_lock);
7662 
7663 	if (ret < 0)
7664 		return ret;
7665 
7666 	*ppos += cnt;
7667 
7668 	return cnt;
7669 }
7670 
7671 static const struct file_operations trace_options_core_fops = {
7672 	.open = tracing_open_generic,
7673 	.read = trace_options_core_read,
7674 	.write = trace_options_core_write,
7675 	.llseek = generic_file_llseek,
7676 };
7677 
7678 struct dentry *trace_create_file(const char *name,
7679 				 umode_t mode,
7680 				 struct dentry *parent,
7681 				 void *data,
7682 				 const struct file_operations *fops)
7683 {
7684 	struct dentry *ret;
7685 
7686 	ret = tracefs_create_file(name, mode, parent, data, fops);
7687 	if (!ret)
7688 		pr_warn("Could not create tracefs '%s' entry\n", name);
7689 
7690 	return ret;
7691 }
7692 
7693 
7694 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7695 {
7696 	struct dentry *d_tracer;
7697 
7698 	if (tr->options)
7699 		return tr->options;
7700 
7701 	d_tracer = tracing_get_dentry(tr);
7702 	if (IS_ERR(d_tracer))
7703 		return NULL;
7704 
7705 	tr->options = tracefs_create_dir("options", d_tracer);
7706 	if (!tr->options) {
7707 		pr_warn("Could not create tracefs directory 'options'\n");
7708 		return NULL;
7709 	}
7710 
7711 	return tr->options;
7712 }
7713 
7714 static void
7715 create_trace_option_file(struct trace_array *tr,
7716 			 struct trace_option_dentry *topt,
7717 			 struct tracer_flags *flags,
7718 			 struct tracer_opt *opt)
7719 {
7720 	struct dentry *t_options;
7721 
7722 	t_options = trace_options_init_dentry(tr);
7723 	if (!t_options)
7724 		return;
7725 
7726 	topt->flags = flags;
7727 	topt->opt = opt;
7728 	topt->tr = tr;
7729 
7730 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7731 				    &trace_options_fops);
7732 
7733 }
7734 
7735 static void
7736 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7737 {
7738 	struct trace_option_dentry *topts;
7739 	struct trace_options *tr_topts;
7740 	struct tracer_flags *flags;
7741 	struct tracer_opt *opts;
7742 	int cnt;
7743 	int i;
7744 
7745 	if (!tracer)
7746 		return;
7747 
7748 	flags = tracer->flags;
7749 
7750 	if (!flags || !flags->opts)
7751 		return;
7752 
7753 	/*
7754 	 * If this is an instance, only create flags for tracers
7755 	 * the instance may have.
7756 	 */
7757 	if (!trace_ok_for_array(tracer, tr))
7758 		return;
7759 
7760 	for (i = 0; i < tr->nr_topts; i++) {
7761 		/* Make sure there's no duplicate flags. */
7762 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7763 			return;
7764 	}
7765 
7766 	opts = flags->opts;
7767 
7768 	for (cnt = 0; opts[cnt].name; cnt++)
7769 		;
7770 
7771 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7772 	if (!topts)
7773 		return;
7774 
7775 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7776 			    GFP_KERNEL);
7777 	if (!tr_topts) {
7778 		kfree(topts);
7779 		return;
7780 	}
7781 
7782 	tr->topts = tr_topts;
7783 	tr->topts[tr->nr_topts].tracer = tracer;
7784 	tr->topts[tr->nr_topts].topts = topts;
7785 	tr->nr_topts++;
7786 
7787 	for (cnt = 0; opts[cnt].name; cnt++) {
7788 		create_trace_option_file(tr, &topts[cnt], flags,
7789 					 &opts[cnt]);
7790 		WARN_ONCE(topts[cnt].entry == NULL,
7791 			  "Failed to create trace option: %s",
7792 			  opts[cnt].name);
7793 	}
7794 }
7795 
7796 static struct dentry *
7797 create_trace_option_core_file(struct trace_array *tr,
7798 			      const char *option, long index)
7799 {
7800 	struct dentry *t_options;
7801 
7802 	t_options = trace_options_init_dentry(tr);
7803 	if (!t_options)
7804 		return NULL;
7805 
7806 	return trace_create_file(option, 0644, t_options,
7807 				 (void *)&tr->trace_flags_index[index],
7808 				 &trace_options_core_fops);
7809 }
7810 
7811 static void create_trace_options_dir(struct trace_array *tr)
7812 {
7813 	struct dentry *t_options;
7814 	bool top_level = tr == &global_trace;
7815 	int i;
7816 
7817 	t_options = trace_options_init_dentry(tr);
7818 	if (!t_options)
7819 		return;
7820 
7821 	for (i = 0; trace_options[i]; i++) {
7822 		if (top_level ||
7823 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7824 			create_trace_option_core_file(tr, trace_options[i], i);
7825 	}
7826 }
7827 
7828 static ssize_t
7829 rb_simple_read(struct file *filp, char __user *ubuf,
7830 	       size_t cnt, loff_t *ppos)
7831 {
7832 	struct trace_array *tr = filp->private_data;
7833 	char buf[64];
7834 	int r;
7835 
7836 	r = tracer_tracing_is_on(tr);
7837 	r = sprintf(buf, "%d\n", r);
7838 
7839 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7840 }
7841 
7842 static ssize_t
7843 rb_simple_write(struct file *filp, const char __user *ubuf,
7844 		size_t cnt, loff_t *ppos)
7845 {
7846 	struct trace_array *tr = filp->private_data;
7847 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7848 	unsigned long val;
7849 	int ret;
7850 
7851 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7852 	if (ret)
7853 		return ret;
7854 
7855 	if (buffer) {
7856 		mutex_lock(&trace_types_lock);
7857 		if (!!val == tracer_tracing_is_on(tr)) {
7858 			val = 0; /* do nothing */
7859 		} else if (val) {
7860 			tracer_tracing_on(tr);
7861 			if (tr->current_trace->start)
7862 				tr->current_trace->start(tr);
7863 		} else {
7864 			tracer_tracing_off(tr);
7865 			if (tr->current_trace->stop)
7866 				tr->current_trace->stop(tr);
7867 		}
7868 		mutex_unlock(&trace_types_lock);
7869 	}
7870 
7871 	(*ppos)++;
7872 
7873 	return cnt;
7874 }
7875 
7876 static const struct file_operations rb_simple_fops = {
7877 	.open		= tracing_open_generic_tr,
7878 	.read		= rb_simple_read,
7879 	.write		= rb_simple_write,
7880 	.release	= tracing_release_generic_tr,
7881 	.llseek		= default_llseek,
7882 };
7883 
7884 static ssize_t
7885 buffer_percent_read(struct file *filp, char __user *ubuf,
7886 		    size_t cnt, loff_t *ppos)
7887 {
7888 	struct trace_array *tr = filp->private_data;
7889 	char buf[64];
7890 	int r;
7891 
7892 	r = tr->buffer_percent;
7893 	r = sprintf(buf, "%d\n", r);
7894 
7895 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7896 }
7897 
7898 static ssize_t
7899 buffer_percent_write(struct file *filp, const char __user *ubuf,
7900 		     size_t cnt, loff_t *ppos)
7901 {
7902 	struct trace_array *tr = filp->private_data;
7903 	unsigned long val;
7904 	int ret;
7905 
7906 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7907 	if (ret)
7908 		return ret;
7909 
7910 	if (val > 100)
7911 		return -EINVAL;
7912 
7913 	if (!val)
7914 		val = 1;
7915 
7916 	tr->buffer_percent = val;
7917 
7918 	(*ppos)++;
7919 
7920 	return cnt;
7921 }
7922 
7923 static const struct file_operations buffer_percent_fops = {
7924 	.open		= tracing_open_generic_tr,
7925 	.read		= buffer_percent_read,
7926 	.write		= buffer_percent_write,
7927 	.release	= tracing_release_generic_tr,
7928 	.llseek		= default_llseek,
7929 };
7930 
7931 struct dentry *trace_instance_dir;
7932 
7933 static void
7934 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7935 
7936 static int
7937 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7938 {
7939 	enum ring_buffer_flags rb_flags;
7940 
7941 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7942 
7943 	buf->tr = tr;
7944 
7945 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7946 	if (!buf->buffer)
7947 		return -ENOMEM;
7948 
7949 	buf->data = alloc_percpu(struct trace_array_cpu);
7950 	if (!buf->data) {
7951 		ring_buffer_free(buf->buffer);
7952 		buf->buffer = NULL;
7953 		return -ENOMEM;
7954 	}
7955 
7956 	/* Allocate the first page for all buffers */
7957 	set_buffer_entries(&tr->trace_buffer,
7958 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7959 
7960 	return 0;
7961 }
7962 
7963 static int allocate_trace_buffers(struct trace_array *tr, int size)
7964 {
7965 	int ret;
7966 
7967 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7968 	if (ret)
7969 		return ret;
7970 
7971 #ifdef CONFIG_TRACER_MAX_TRACE
7972 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7973 				    allocate_snapshot ? size : 1);
7974 	if (WARN_ON(ret)) {
7975 		ring_buffer_free(tr->trace_buffer.buffer);
7976 		tr->trace_buffer.buffer = NULL;
7977 		free_percpu(tr->trace_buffer.data);
7978 		tr->trace_buffer.data = NULL;
7979 		return -ENOMEM;
7980 	}
7981 	tr->allocated_snapshot = allocate_snapshot;
7982 
7983 	/*
7984 	 * Only the top level trace array gets its snapshot allocated
7985 	 * from the kernel command line.
7986 	 */
7987 	allocate_snapshot = false;
7988 #endif
7989 	return 0;
7990 }
7991 
7992 static void free_trace_buffer(struct trace_buffer *buf)
7993 {
7994 	if (buf->buffer) {
7995 		ring_buffer_free(buf->buffer);
7996 		buf->buffer = NULL;
7997 		free_percpu(buf->data);
7998 		buf->data = NULL;
7999 	}
8000 }
8001 
8002 static void free_trace_buffers(struct trace_array *tr)
8003 {
8004 	if (!tr)
8005 		return;
8006 
8007 	free_trace_buffer(&tr->trace_buffer);
8008 
8009 #ifdef CONFIG_TRACER_MAX_TRACE
8010 	free_trace_buffer(&tr->max_buffer);
8011 #endif
8012 }
8013 
8014 static void init_trace_flags_index(struct trace_array *tr)
8015 {
8016 	int i;
8017 
8018 	/* Used by the trace options files */
8019 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8020 		tr->trace_flags_index[i] = i;
8021 }
8022 
8023 static void __update_tracer_options(struct trace_array *tr)
8024 {
8025 	struct tracer *t;
8026 
8027 	for (t = trace_types; t; t = t->next)
8028 		add_tracer_options(tr, t);
8029 }
8030 
8031 static void update_tracer_options(struct trace_array *tr)
8032 {
8033 	mutex_lock(&trace_types_lock);
8034 	__update_tracer_options(tr);
8035 	mutex_unlock(&trace_types_lock);
8036 }
8037 
8038 static int instance_mkdir(const char *name)
8039 {
8040 	struct trace_array *tr;
8041 	int ret;
8042 
8043 	mutex_lock(&event_mutex);
8044 	mutex_lock(&trace_types_lock);
8045 
8046 	ret = -EEXIST;
8047 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8048 		if (tr->name && strcmp(tr->name, name) == 0)
8049 			goto out_unlock;
8050 	}
8051 
8052 	ret = -ENOMEM;
8053 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8054 	if (!tr)
8055 		goto out_unlock;
8056 
8057 	tr->name = kstrdup(name, GFP_KERNEL);
8058 	if (!tr->name)
8059 		goto out_free_tr;
8060 
8061 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8062 		goto out_free_tr;
8063 
8064 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8065 
8066 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8067 
8068 	raw_spin_lock_init(&tr->start_lock);
8069 
8070 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8071 
8072 	tr->current_trace = &nop_trace;
8073 
8074 	INIT_LIST_HEAD(&tr->systems);
8075 	INIT_LIST_HEAD(&tr->events);
8076 	INIT_LIST_HEAD(&tr->hist_vars);
8077 
8078 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8079 		goto out_free_tr;
8080 
8081 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
8082 	if (!tr->dir)
8083 		goto out_free_tr;
8084 
8085 	ret = event_trace_add_tracer(tr->dir, tr);
8086 	if (ret) {
8087 		tracefs_remove_recursive(tr->dir);
8088 		goto out_free_tr;
8089 	}
8090 
8091 	ftrace_init_trace_array(tr);
8092 
8093 	init_tracer_tracefs(tr, tr->dir);
8094 	init_trace_flags_index(tr);
8095 	__update_tracer_options(tr);
8096 
8097 	list_add(&tr->list, &ftrace_trace_arrays);
8098 
8099 	mutex_unlock(&trace_types_lock);
8100 	mutex_unlock(&event_mutex);
8101 
8102 	return 0;
8103 
8104  out_free_tr:
8105 	free_trace_buffers(tr);
8106 	free_cpumask_var(tr->tracing_cpumask);
8107 	kfree(tr->name);
8108 	kfree(tr);
8109 
8110  out_unlock:
8111 	mutex_unlock(&trace_types_lock);
8112 	mutex_unlock(&event_mutex);
8113 
8114 	return ret;
8115 
8116 }
8117 
8118 static int instance_rmdir(const char *name)
8119 {
8120 	struct trace_array *tr;
8121 	int found = 0;
8122 	int ret;
8123 	int i;
8124 
8125 	mutex_lock(&event_mutex);
8126 	mutex_lock(&trace_types_lock);
8127 
8128 	ret = -ENODEV;
8129 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8130 		if (tr->name && strcmp(tr->name, name) == 0) {
8131 			found = 1;
8132 			break;
8133 		}
8134 	}
8135 	if (!found)
8136 		goto out_unlock;
8137 
8138 	ret = -EBUSY;
8139 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8140 		goto out_unlock;
8141 
8142 	list_del(&tr->list);
8143 
8144 	/* Disable all the flags that were enabled coming in */
8145 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8146 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8147 			set_tracer_flag(tr, 1 << i, 0);
8148 	}
8149 
8150 	tracing_set_nop(tr);
8151 	clear_ftrace_function_probes(tr);
8152 	event_trace_del_tracer(tr);
8153 	ftrace_clear_pids(tr);
8154 	ftrace_destroy_function_files(tr);
8155 	tracefs_remove_recursive(tr->dir);
8156 	free_trace_buffers(tr);
8157 
8158 	for (i = 0; i < tr->nr_topts; i++) {
8159 		kfree(tr->topts[i].topts);
8160 	}
8161 	kfree(tr->topts);
8162 
8163 	free_cpumask_var(tr->tracing_cpumask);
8164 	kfree(tr->name);
8165 	kfree(tr);
8166 
8167 	ret = 0;
8168 
8169  out_unlock:
8170 	mutex_unlock(&trace_types_lock);
8171 	mutex_unlock(&event_mutex);
8172 
8173 	return ret;
8174 }
8175 
8176 static __init void create_trace_instances(struct dentry *d_tracer)
8177 {
8178 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8179 							 instance_mkdir,
8180 							 instance_rmdir);
8181 	if (WARN_ON(!trace_instance_dir))
8182 		return;
8183 }
8184 
8185 static void
8186 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8187 {
8188 	struct trace_event_file *file;
8189 	int cpu;
8190 
8191 	trace_create_file("available_tracers", 0444, d_tracer,
8192 			tr, &show_traces_fops);
8193 
8194 	trace_create_file("current_tracer", 0644, d_tracer,
8195 			tr, &set_tracer_fops);
8196 
8197 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8198 			  tr, &tracing_cpumask_fops);
8199 
8200 	trace_create_file("trace_options", 0644, d_tracer,
8201 			  tr, &tracing_iter_fops);
8202 
8203 	trace_create_file("trace", 0644, d_tracer,
8204 			  tr, &tracing_fops);
8205 
8206 	trace_create_file("trace_pipe", 0444, d_tracer,
8207 			  tr, &tracing_pipe_fops);
8208 
8209 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8210 			  tr, &tracing_entries_fops);
8211 
8212 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8213 			  tr, &tracing_total_entries_fops);
8214 
8215 	trace_create_file("free_buffer", 0200, d_tracer,
8216 			  tr, &tracing_free_buffer_fops);
8217 
8218 	trace_create_file("trace_marker", 0220, d_tracer,
8219 			  tr, &tracing_mark_fops);
8220 
8221 	file = __find_event_file(tr, "ftrace", "print");
8222 	if (file && file->dir)
8223 		trace_create_file("trigger", 0644, file->dir, file,
8224 				  &event_trigger_fops);
8225 	tr->trace_marker_file = file;
8226 
8227 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8228 			  tr, &tracing_mark_raw_fops);
8229 
8230 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8231 			  &trace_clock_fops);
8232 
8233 	trace_create_file("tracing_on", 0644, d_tracer,
8234 			  tr, &rb_simple_fops);
8235 
8236 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8237 			  &trace_time_stamp_mode_fops);
8238 
8239 	tr->buffer_percent = 50;
8240 
8241 	trace_create_file("buffer_percent", 0444, d_tracer,
8242 			tr, &buffer_percent_fops);
8243 
8244 	create_trace_options_dir(tr);
8245 
8246 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8247 	trace_create_file("tracing_max_latency", 0644, d_tracer,
8248 			&tr->max_latency, &tracing_max_lat_fops);
8249 #endif
8250 
8251 	if (ftrace_create_function_files(tr, d_tracer))
8252 		WARN(1, "Could not allocate function filter files");
8253 
8254 #ifdef CONFIG_TRACER_SNAPSHOT
8255 	trace_create_file("snapshot", 0644, d_tracer,
8256 			  tr, &snapshot_fops);
8257 #endif
8258 
8259 	for_each_tracing_cpu(cpu)
8260 		tracing_init_tracefs_percpu(tr, cpu);
8261 
8262 	ftrace_init_tracefs(tr, d_tracer);
8263 }
8264 
8265 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8266 {
8267 	struct vfsmount *mnt;
8268 	struct file_system_type *type;
8269 
8270 	/*
8271 	 * To maintain backward compatibility for tools that mount
8272 	 * debugfs to get to the tracing facility, tracefs is automatically
8273 	 * mounted to the debugfs/tracing directory.
8274 	 */
8275 	type = get_fs_type("tracefs");
8276 	if (!type)
8277 		return NULL;
8278 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8279 	put_filesystem(type);
8280 	if (IS_ERR(mnt))
8281 		return NULL;
8282 	mntget(mnt);
8283 
8284 	return mnt;
8285 }
8286 
8287 /**
8288  * tracing_init_dentry - initialize top level trace array
8289  *
8290  * This is called when creating files or directories in the tracing
8291  * directory. It is called via fs_initcall() by any of the boot up code
8292  * and expects to return the dentry of the top level tracing directory.
8293  */
8294 struct dentry *tracing_init_dentry(void)
8295 {
8296 	struct trace_array *tr = &global_trace;
8297 
8298 	/* The top level trace array uses  NULL as parent */
8299 	if (tr->dir)
8300 		return NULL;
8301 
8302 	if (WARN_ON(!tracefs_initialized()) ||
8303 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8304 		 WARN_ON(!debugfs_initialized())))
8305 		return ERR_PTR(-ENODEV);
8306 
8307 	/*
8308 	 * As there may still be users that expect the tracing
8309 	 * files to exist in debugfs/tracing, we must automount
8310 	 * the tracefs file system there, so older tools still
8311 	 * work with the newer kerenl.
8312 	 */
8313 	tr->dir = debugfs_create_automount("tracing", NULL,
8314 					   trace_automount, NULL);
8315 	if (!tr->dir) {
8316 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8317 		return ERR_PTR(-ENOMEM);
8318 	}
8319 
8320 	return NULL;
8321 }
8322 
8323 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8324 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8325 
8326 static void __init trace_eval_init(void)
8327 {
8328 	int len;
8329 
8330 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8331 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8332 }
8333 
8334 #ifdef CONFIG_MODULES
8335 static void trace_module_add_evals(struct module *mod)
8336 {
8337 	if (!mod->num_trace_evals)
8338 		return;
8339 
8340 	/*
8341 	 * Modules with bad taint do not have events created, do
8342 	 * not bother with enums either.
8343 	 */
8344 	if (trace_module_has_bad_taint(mod))
8345 		return;
8346 
8347 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8348 }
8349 
8350 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8351 static void trace_module_remove_evals(struct module *mod)
8352 {
8353 	union trace_eval_map_item *map;
8354 	union trace_eval_map_item **last = &trace_eval_maps;
8355 
8356 	if (!mod->num_trace_evals)
8357 		return;
8358 
8359 	mutex_lock(&trace_eval_mutex);
8360 
8361 	map = trace_eval_maps;
8362 
8363 	while (map) {
8364 		if (map->head.mod == mod)
8365 			break;
8366 		map = trace_eval_jmp_to_tail(map);
8367 		last = &map->tail.next;
8368 		map = map->tail.next;
8369 	}
8370 	if (!map)
8371 		goto out;
8372 
8373 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8374 	kfree(map);
8375  out:
8376 	mutex_unlock(&trace_eval_mutex);
8377 }
8378 #else
8379 static inline void trace_module_remove_evals(struct module *mod) { }
8380 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8381 
8382 static int trace_module_notify(struct notifier_block *self,
8383 			       unsigned long val, void *data)
8384 {
8385 	struct module *mod = data;
8386 
8387 	switch (val) {
8388 	case MODULE_STATE_COMING:
8389 		trace_module_add_evals(mod);
8390 		break;
8391 	case MODULE_STATE_GOING:
8392 		trace_module_remove_evals(mod);
8393 		break;
8394 	}
8395 
8396 	return 0;
8397 }
8398 
8399 static struct notifier_block trace_module_nb = {
8400 	.notifier_call = trace_module_notify,
8401 	.priority = 0,
8402 };
8403 #endif /* CONFIG_MODULES */
8404 
8405 static __init int tracer_init_tracefs(void)
8406 {
8407 	struct dentry *d_tracer;
8408 
8409 	trace_access_lock_init();
8410 
8411 	d_tracer = tracing_init_dentry();
8412 	if (IS_ERR(d_tracer))
8413 		return 0;
8414 
8415 	event_trace_init();
8416 
8417 	init_tracer_tracefs(&global_trace, d_tracer);
8418 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8419 
8420 	trace_create_file("tracing_thresh", 0644, d_tracer,
8421 			&global_trace, &tracing_thresh_fops);
8422 
8423 	trace_create_file("README", 0444, d_tracer,
8424 			NULL, &tracing_readme_fops);
8425 
8426 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8427 			NULL, &tracing_saved_cmdlines_fops);
8428 
8429 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8430 			  NULL, &tracing_saved_cmdlines_size_fops);
8431 
8432 	trace_create_file("saved_tgids", 0444, d_tracer,
8433 			NULL, &tracing_saved_tgids_fops);
8434 
8435 	trace_eval_init();
8436 
8437 	trace_create_eval_file(d_tracer);
8438 
8439 #ifdef CONFIG_MODULES
8440 	register_module_notifier(&trace_module_nb);
8441 #endif
8442 
8443 #ifdef CONFIG_DYNAMIC_FTRACE
8444 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8445 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8446 #endif
8447 
8448 	create_trace_instances(d_tracer);
8449 
8450 	update_tracer_options(&global_trace);
8451 
8452 	return 0;
8453 }
8454 
8455 static int trace_panic_handler(struct notifier_block *this,
8456 			       unsigned long event, void *unused)
8457 {
8458 	if (ftrace_dump_on_oops)
8459 		ftrace_dump(ftrace_dump_on_oops);
8460 	return NOTIFY_OK;
8461 }
8462 
8463 static struct notifier_block trace_panic_notifier = {
8464 	.notifier_call  = trace_panic_handler,
8465 	.next           = NULL,
8466 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8467 };
8468 
8469 static int trace_die_handler(struct notifier_block *self,
8470 			     unsigned long val,
8471 			     void *data)
8472 {
8473 	switch (val) {
8474 	case DIE_OOPS:
8475 		if (ftrace_dump_on_oops)
8476 			ftrace_dump(ftrace_dump_on_oops);
8477 		break;
8478 	default:
8479 		break;
8480 	}
8481 	return NOTIFY_OK;
8482 }
8483 
8484 static struct notifier_block trace_die_notifier = {
8485 	.notifier_call = trace_die_handler,
8486 	.priority = 200
8487 };
8488 
8489 /*
8490  * printk is set to max of 1024, we really don't need it that big.
8491  * Nothing should be printing 1000 characters anyway.
8492  */
8493 #define TRACE_MAX_PRINT		1000
8494 
8495 /*
8496  * Define here KERN_TRACE so that we have one place to modify
8497  * it if we decide to change what log level the ftrace dump
8498  * should be at.
8499  */
8500 #define KERN_TRACE		KERN_EMERG
8501 
8502 void
8503 trace_printk_seq(struct trace_seq *s)
8504 {
8505 	/* Probably should print a warning here. */
8506 	if (s->seq.len >= TRACE_MAX_PRINT)
8507 		s->seq.len = TRACE_MAX_PRINT;
8508 
8509 	/*
8510 	 * More paranoid code. Although the buffer size is set to
8511 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8512 	 * an extra layer of protection.
8513 	 */
8514 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8515 		s->seq.len = s->seq.size - 1;
8516 
8517 	/* should be zero ended, but we are paranoid. */
8518 	s->buffer[s->seq.len] = 0;
8519 
8520 	printk(KERN_TRACE "%s", s->buffer);
8521 
8522 	trace_seq_init(s);
8523 }
8524 
8525 void trace_init_global_iter(struct trace_iterator *iter)
8526 {
8527 	iter->tr = &global_trace;
8528 	iter->trace = iter->tr->current_trace;
8529 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8530 	iter->trace_buffer = &global_trace.trace_buffer;
8531 
8532 	if (iter->trace && iter->trace->open)
8533 		iter->trace->open(iter);
8534 
8535 	/* Annotate start of buffers if we had overruns */
8536 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8537 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8538 
8539 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8540 	if (trace_clocks[iter->tr->clock_id].in_ns)
8541 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8542 }
8543 
8544 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8545 {
8546 	/* use static because iter can be a bit big for the stack */
8547 	static struct trace_iterator iter;
8548 	static atomic_t dump_running;
8549 	struct trace_array *tr = &global_trace;
8550 	unsigned int old_userobj;
8551 	unsigned long flags;
8552 	int cnt = 0, cpu;
8553 
8554 	/* Only allow one dump user at a time. */
8555 	if (atomic_inc_return(&dump_running) != 1) {
8556 		atomic_dec(&dump_running);
8557 		return;
8558 	}
8559 
8560 	/*
8561 	 * Always turn off tracing when we dump.
8562 	 * We don't need to show trace output of what happens
8563 	 * between multiple crashes.
8564 	 *
8565 	 * If the user does a sysrq-z, then they can re-enable
8566 	 * tracing with echo 1 > tracing_on.
8567 	 */
8568 	tracing_off();
8569 
8570 	local_irq_save(flags);
8571 	printk_nmi_direct_enter();
8572 
8573 	/* Simulate the iterator */
8574 	trace_init_global_iter(&iter);
8575 
8576 	for_each_tracing_cpu(cpu) {
8577 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8578 	}
8579 
8580 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8581 
8582 	/* don't look at user memory in panic mode */
8583 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8584 
8585 	switch (oops_dump_mode) {
8586 	case DUMP_ALL:
8587 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8588 		break;
8589 	case DUMP_ORIG:
8590 		iter.cpu_file = raw_smp_processor_id();
8591 		break;
8592 	case DUMP_NONE:
8593 		goto out_enable;
8594 	default:
8595 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8596 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8597 	}
8598 
8599 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8600 
8601 	/* Did function tracer already get disabled? */
8602 	if (ftrace_is_dead()) {
8603 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8604 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8605 	}
8606 
8607 	/*
8608 	 * We need to stop all tracing on all CPUS to read the
8609 	 * the next buffer. This is a bit expensive, but is
8610 	 * not done often. We fill all what we can read,
8611 	 * and then release the locks again.
8612 	 */
8613 
8614 	while (!trace_empty(&iter)) {
8615 
8616 		if (!cnt)
8617 			printk(KERN_TRACE "---------------------------------\n");
8618 
8619 		cnt++;
8620 
8621 		/* reset all but tr, trace, and overruns */
8622 		memset(&iter.seq, 0,
8623 		       sizeof(struct trace_iterator) -
8624 		       offsetof(struct trace_iterator, seq));
8625 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8626 		iter.pos = -1;
8627 
8628 		if (trace_find_next_entry_inc(&iter) != NULL) {
8629 			int ret;
8630 
8631 			ret = print_trace_line(&iter);
8632 			if (ret != TRACE_TYPE_NO_CONSUME)
8633 				trace_consume(&iter);
8634 		}
8635 		touch_nmi_watchdog();
8636 
8637 		trace_printk_seq(&iter.seq);
8638 	}
8639 
8640 	if (!cnt)
8641 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8642 	else
8643 		printk(KERN_TRACE "---------------------------------\n");
8644 
8645  out_enable:
8646 	tr->trace_flags |= old_userobj;
8647 
8648 	for_each_tracing_cpu(cpu) {
8649 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8650 	}
8651 	atomic_dec(&dump_running);
8652 	printk_nmi_direct_exit();
8653 	local_irq_restore(flags);
8654 }
8655 EXPORT_SYMBOL_GPL(ftrace_dump);
8656 
8657 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8658 {
8659 	char **argv;
8660 	int argc, ret;
8661 
8662 	argc = 0;
8663 	ret = 0;
8664 	argv = argv_split(GFP_KERNEL, buf, &argc);
8665 	if (!argv)
8666 		return -ENOMEM;
8667 
8668 	if (argc)
8669 		ret = createfn(argc, argv);
8670 
8671 	argv_free(argv);
8672 
8673 	return ret;
8674 }
8675 
8676 #define WRITE_BUFSIZE  4096
8677 
8678 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8679 				size_t count, loff_t *ppos,
8680 				int (*createfn)(int, char **))
8681 {
8682 	char *kbuf, *buf, *tmp;
8683 	int ret = 0;
8684 	size_t done = 0;
8685 	size_t size;
8686 
8687 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8688 	if (!kbuf)
8689 		return -ENOMEM;
8690 
8691 	while (done < count) {
8692 		size = count - done;
8693 
8694 		if (size >= WRITE_BUFSIZE)
8695 			size = WRITE_BUFSIZE - 1;
8696 
8697 		if (copy_from_user(kbuf, buffer + done, size)) {
8698 			ret = -EFAULT;
8699 			goto out;
8700 		}
8701 		kbuf[size] = '\0';
8702 		buf = kbuf;
8703 		do {
8704 			tmp = strchr(buf, '\n');
8705 			if (tmp) {
8706 				*tmp = '\0';
8707 				size = tmp - buf + 1;
8708 			} else {
8709 				size = strlen(buf);
8710 				if (done + size < count) {
8711 					if (buf != kbuf)
8712 						break;
8713 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8714 					pr_warn("Line length is too long: Should be less than %d\n",
8715 						WRITE_BUFSIZE - 2);
8716 					ret = -EINVAL;
8717 					goto out;
8718 				}
8719 			}
8720 			done += size;
8721 
8722 			/* Remove comments */
8723 			tmp = strchr(buf, '#');
8724 
8725 			if (tmp)
8726 				*tmp = '\0';
8727 
8728 			ret = trace_run_command(buf, createfn);
8729 			if (ret)
8730 				goto out;
8731 			buf += size;
8732 
8733 		} while (done < count);
8734 	}
8735 	ret = done;
8736 
8737 out:
8738 	kfree(kbuf);
8739 
8740 	return ret;
8741 }
8742 
8743 __init static int tracer_alloc_buffers(void)
8744 {
8745 	int ring_buf_size;
8746 	int ret = -ENOMEM;
8747 
8748 	/*
8749 	 * Make sure we don't accidently add more trace options
8750 	 * than we have bits for.
8751 	 */
8752 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8753 
8754 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8755 		goto out;
8756 
8757 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8758 		goto out_free_buffer_mask;
8759 
8760 	/* Only allocate trace_printk buffers if a trace_printk exists */
8761 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8762 		/* Must be called before global_trace.buffer is allocated */
8763 		trace_printk_init_buffers();
8764 
8765 	/* To save memory, keep the ring buffer size to its minimum */
8766 	if (ring_buffer_expanded)
8767 		ring_buf_size = trace_buf_size;
8768 	else
8769 		ring_buf_size = 1;
8770 
8771 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8772 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8773 
8774 	raw_spin_lock_init(&global_trace.start_lock);
8775 
8776 	/*
8777 	 * The prepare callbacks allocates some memory for the ring buffer. We
8778 	 * don't free the buffer if the if the CPU goes down. If we were to free
8779 	 * the buffer, then the user would lose any trace that was in the
8780 	 * buffer. The memory will be removed once the "instance" is removed.
8781 	 */
8782 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8783 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8784 				      NULL);
8785 	if (ret < 0)
8786 		goto out_free_cpumask;
8787 	/* Used for event triggers */
8788 	ret = -ENOMEM;
8789 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8790 	if (!temp_buffer)
8791 		goto out_rm_hp_state;
8792 
8793 	if (trace_create_savedcmd() < 0)
8794 		goto out_free_temp_buffer;
8795 
8796 	/* TODO: make the number of buffers hot pluggable with CPUS */
8797 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8798 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8799 		WARN_ON(1);
8800 		goto out_free_savedcmd;
8801 	}
8802 
8803 	if (global_trace.buffer_disabled)
8804 		tracing_off();
8805 
8806 	if (trace_boot_clock) {
8807 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8808 		if (ret < 0)
8809 			pr_warn("Trace clock %s not defined, going back to default\n",
8810 				trace_boot_clock);
8811 	}
8812 
8813 	/*
8814 	 * register_tracer() might reference current_trace, so it
8815 	 * needs to be set before we register anything. This is
8816 	 * just a bootstrap of current_trace anyway.
8817 	 */
8818 	global_trace.current_trace = &nop_trace;
8819 
8820 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8821 
8822 	ftrace_init_global_array_ops(&global_trace);
8823 
8824 	init_trace_flags_index(&global_trace);
8825 
8826 	register_tracer(&nop_trace);
8827 
8828 	/* Function tracing may start here (via kernel command line) */
8829 	init_function_trace();
8830 
8831 	/* All seems OK, enable tracing */
8832 	tracing_disabled = 0;
8833 
8834 	atomic_notifier_chain_register(&panic_notifier_list,
8835 				       &trace_panic_notifier);
8836 
8837 	register_die_notifier(&trace_die_notifier);
8838 
8839 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8840 
8841 	INIT_LIST_HEAD(&global_trace.systems);
8842 	INIT_LIST_HEAD(&global_trace.events);
8843 	INIT_LIST_HEAD(&global_trace.hist_vars);
8844 	list_add(&global_trace.list, &ftrace_trace_arrays);
8845 
8846 	apply_trace_boot_options();
8847 
8848 	register_snapshot_cmd();
8849 
8850 	return 0;
8851 
8852 out_free_savedcmd:
8853 	free_saved_cmdlines_buffer(savedcmd);
8854 out_free_temp_buffer:
8855 	ring_buffer_free(temp_buffer);
8856 out_rm_hp_state:
8857 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8858 out_free_cpumask:
8859 	free_cpumask_var(global_trace.tracing_cpumask);
8860 out_free_buffer_mask:
8861 	free_cpumask_var(tracing_buffer_mask);
8862 out:
8863 	return ret;
8864 }
8865 
8866 void __init early_trace_init(void)
8867 {
8868 	if (tracepoint_printk) {
8869 		tracepoint_print_iter =
8870 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8871 		if (WARN_ON(!tracepoint_print_iter))
8872 			tracepoint_printk = 0;
8873 		else
8874 			static_key_enable(&tracepoint_printk_key.key);
8875 	}
8876 	tracer_alloc_buffers();
8877 }
8878 
8879 void __init trace_init(void)
8880 {
8881 	trace_event_init();
8882 }
8883 
8884 __init static int clear_boot_tracer(void)
8885 {
8886 	/*
8887 	 * The default tracer at boot buffer is an init section.
8888 	 * This function is called in lateinit. If we did not
8889 	 * find the boot tracer, then clear it out, to prevent
8890 	 * later registration from accessing the buffer that is
8891 	 * about to be freed.
8892 	 */
8893 	if (!default_bootup_tracer)
8894 		return 0;
8895 
8896 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8897 	       default_bootup_tracer);
8898 	default_bootup_tracer = NULL;
8899 
8900 	return 0;
8901 }
8902 
8903 fs_initcall(tracer_init_tracefs);
8904 late_initcall_sync(clear_boot_tracer);
8905 
8906 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8907 __init static int tracing_set_default_clock(void)
8908 {
8909 	/* sched_clock_stable() is determined in late_initcall */
8910 	if (!trace_boot_clock && !sched_clock_stable()) {
8911 		printk(KERN_WARNING
8912 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8913 		       "If you want to keep using the local clock, then add:\n"
8914 		       "  \"trace_clock=local\"\n"
8915 		       "on the kernel command line\n");
8916 		tracing_set_clock(&global_trace, "global");
8917 	}
8918 
8919 	return 0;
8920 }
8921 late_initcall_sync(tracing_set_default_clock);
8922 #endif
8923