xref: /linux-6.15/kernel/trace/trace.c (revision 6da2ec56)
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
5  * Copyright (C) 2008 Ingo Molnar <[email protected]>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <[email protected]>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46 
47 #include "trace.h"
48 #include "trace_output.h"
49 
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55 
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64 
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69 
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74 
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77 	{ }
78 };
79 
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83 	return 0;
84 }
85 
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92 
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100 
101 cpumask_var_t __read_mostly	tracing_buffer_mask;
102 
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118 
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120 
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123 
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127 	struct module			*mod;
128 	unsigned long			length;
129 };
130 
131 union trace_eval_map_item;
132 
133 struct trace_eval_map_tail {
134 	/*
135 	 * "end" is first and points to NULL as it must be different
136 	 * than "mod" or "eval_string"
137 	 */
138 	union trace_eval_map_item	*next;
139 	const char			*end;	/* points to NULL */
140 };
141 
142 static DEFINE_MUTEX(trace_eval_mutex);
143 
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152 	struct trace_eval_map		map;
153 	struct trace_eval_map_head	head;
154 	struct trace_eval_map_tail	tail;
155 };
156 
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159 
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 
162 #define MAX_TRACER_SIZE		100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165 
166 static bool allocate_snapshot;
167 
168 static int __init set_cmdline_ftrace(char *str)
169 {
170 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171 	default_bootup_tracer = bootup_tracer_buf;
172 	/* We are using ftrace early, expand it */
173 	ring_buffer_expanded = true;
174 	return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177 
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180 	if (*str++ != '=' || !*str) {
181 		ftrace_dump_on_oops = DUMP_ALL;
182 		return 1;
183 	}
184 
185 	if (!strcmp("orig_cpu", str)) {
186 		ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189 
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 
194 static int __init stop_trace_on_warning(char *str)
195 {
196 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197 		__disable_trace_on_warning = 1;
198 	return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201 
202 static int __init boot_alloc_snapshot(char *str)
203 {
204 	allocate_snapshot = true;
205 	/* We also need the main ring buffer expanded */
206 	ring_buffer_expanded = true;
207 	return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210 
211 
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 
214 static int __init set_trace_boot_options(char *str)
215 {
216 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217 	return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220 
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223 
224 static int __init set_trace_boot_clock(char *str)
225 {
226 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227 	trace_boot_clock = trace_boot_clock_buf;
228 	return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231 
232 static int __init set_tracepoint_printk(char *str)
233 {
234 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235 		tracepoint_printk = 1;
236 	return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239 
240 unsigned long long ns2usecs(u64 nsec)
241 {
242 	nsec += 500;
243 	do_div(nsec, 1000);
244 	return nsec;
245 }
246 
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS						\
249 	(FUNCTION_DEFAULT_FLAGS |					\
250 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
251 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
252 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
253 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
257 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262 
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268 	.trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270 
271 LIST_HEAD(ftrace_trace_arrays);
272 
273 int trace_array_get(struct trace_array *this_tr)
274 {
275 	struct trace_array *tr;
276 	int ret = -ENODEV;
277 
278 	mutex_lock(&trace_types_lock);
279 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280 		if (tr == this_tr) {
281 			tr->ref++;
282 			ret = 0;
283 			break;
284 		}
285 	}
286 	mutex_unlock(&trace_types_lock);
287 
288 	return ret;
289 }
290 
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293 	WARN_ON(!this_tr->ref);
294 	this_tr->ref--;
295 }
296 
297 void trace_array_put(struct trace_array *this_tr)
298 {
299 	mutex_lock(&trace_types_lock);
300 	__trace_array_put(this_tr);
301 	mutex_unlock(&trace_types_lock);
302 }
303 
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305 			      struct ring_buffer *buffer,
306 			      struct ring_buffer_event *event)
307 {
308 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309 	    !filter_match_preds(call->filter, rec)) {
310 		__trace_event_discard_commit(buffer, event);
311 		return 1;
312 	}
313 
314 	return 0;
315 }
316 
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319 	vfree(pid_list->pids);
320 	kfree(pid_list);
321 }
322 
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333 	/*
334 	 * If pid_max changed after filtered_pids was created, we
335 	 * by default ignore all pids greater than the previous pid_max.
336 	 */
337 	if (search_pid >= filtered_pids->pid_max)
338 		return false;
339 
340 	return test_bit(search_pid, filtered_pids->pids);
341 }
342 
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355 	/*
356 	 * Return false, because if filtered_pids does not exist,
357 	 * all pids are good to trace.
358 	 */
359 	if (!filtered_pids)
360 		return false;
361 
362 	return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364 
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378 				  struct task_struct *self,
379 				  struct task_struct *task)
380 {
381 	if (!pid_list)
382 		return;
383 
384 	/* For forks, we only add if the forking task is listed */
385 	if (self) {
386 		if (!trace_find_filtered_pid(pid_list, self->pid))
387 			return;
388 	}
389 
390 	/* Sorry, but we don't support pid_max changing after setting */
391 	if (task->pid >= pid_list->pid_max)
392 		return;
393 
394 	/* "self" is set for forks, and NULL for exits */
395 	if (self)
396 		set_bit(task->pid, pid_list->pids);
397 	else
398 		clear_bit(task->pid, pid_list->pids);
399 }
400 
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415 	unsigned long pid = (unsigned long)v;
416 
417 	(*pos)++;
418 
419 	/* pid already is +1 of the actual prevous bit */
420 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 
422 	/* Return pid + 1 to allow zero to be represented */
423 	if (pid < pid_list->pid_max)
424 		return (void *)(pid + 1);
425 
426 	return NULL;
427 }
428 
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442 	unsigned long pid;
443 	loff_t l = 0;
444 
445 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446 	if (pid >= pid_list->pid_max)
447 		return NULL;
448 
449 	/* Return pid + 1 so that zero can be the exit value */
450 	for (pid++; pid && l < *pos;
451 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452 		;
453 	return (void *)pid;
454 }
455 
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466 	unsigned long pid = (unsigned long)v - 1;
467 
468 	seq_printf(m, "%lu\n", pid);
469 	return 0;
470 }
471 
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE		127
474 
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476 		    struct trace_pid_list **new_pid_list,
477 		    const char __user *ubuf, size_t cnt)
478 {
479 	struct trace_pid_list *pid_list;
480 	struct trace_parser parser;
481 	unsigned long val;
482 	int nr_pids = 0;
483 	ssize_t read = 0;
484 	ssize_t ret = 0;
485 	loff_t pos;
486 	pid_t pid;
487 
488 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489 		return -ENOMEM;
490 
491 	/*
492 	 * Always recreate a new array. The write is an all or nothing
493 	 * operation. Always create a new array when adding new pids by
494 	 * the user. If the operation fails, then the current list is
495 	 * not modified.
496 	 */
497 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498 	if (!pid_list)
499 		return -ENOMEM;
500 
501 	pid_list->pid_max = READ_ONCE(pid_max);
502 
503 	/* Only truncating will shrink pid_max */
504 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505 		pid_list->pid_max = filtered_pids->pid_max;
506 
507 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508 	if (!pid_list->pids) {
509 		kfree(pid_list);
510 		return -ENOMEM;
511 	}
512 
513 	if (filtered_pids) {
514 		/* copy the current bits to the new max */
515 		for_each_set_bit(pid, filtered_pids->pids,
516 				 filtered_pids->pid_max) {
517 			set_bit(pid, pid_list->pids);
518 			nr_pids++;
519 		}
520 	}
521 
522 	while (cnt > 0) {
523 
524 		pos = 0;
525 
526 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
527 		if (ret < 0 || !trace_parser_loaded(&parser))
528 			break;
529 
530 		read += ret;
531 		ubuf += ret;
532 		cnt -= ret;
533 
534 		ret = -EINVAL;
535 		if (kstrtoul(parser.buffer, 0, &val))
536 			break;
537 		if (val >= pid_list->pid_max)
538 			break;
539 
540 		pid = (pid_t)val;
541 
542 		set_bit(pid, pid_list->pids);
543 		nr_pids++;
544 
545 		trace_parser_clear(&parser);
546 		ret = 0;
547 	}
548 	trace_parser_put(&parser);
549 
550 	if (ret < 0) {
551 		trace_free_pid_list(pid_list);
552 		return ret;
553 	}
554 
555 	if (!nr_pids) {
556 		/* Cleared the list of pids */
557 		trace_free_pid_list(pid_list);
558 		read = ret;
559 		pid_list = NULL;
560 	}
561 
562 	*new_pid_list = pid_list;
563 
564 	return read;
565 }
566 
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569 	u64 ts;
570 
571 	/* Early boot up does not have a buffer yet */
572 	if (!buf->buffer)
573 		return trace_clock_local();
574 
575 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
576 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577 
578 	return ts;
579 }
580 
581 u64 ftrace_now(int cpu)
582 {
583 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585 
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597 	/*
598 	 * For quick access (irqsoff uses this in fast path), just
599 	 * return the mirror variable of the state of the ring buffer.
600 	 * It's a little racy, but we don't really care.
601 	 */
602 	smp_rmb();
603 	return !global_trace.buffer_disabled;
604 }
605 
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
617 
618 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619 
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer		*trace_types __read_mostly;
622 
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627 
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649 
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653 
654 static inline void trace_access_lock(int cpu)
655 {
656 	if (cpu == RING_BUFFER_ALL_CPUS) {
657 		/* gain it for accessing the whole ring buffer. */
658 		down_write(&all_cpu_access_lock);
659 	} else {
660 		/* gain it for accessing a cpu ring buffer. */
661 
662 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663 		down_read(&all_cpu_access_lock);
664 
665 		/* Secondly block other access to this @cpu ring buffer. */
666 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
667 	}
668 }
669 
670 static inline void trace_access_unlock(int cpu)
671 {
672 	if (cpu == RING_BUFFER_ALL_CPUS) {
673 		up_write(&all_cpu_access_lock);
674 	} else {
675 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676 		up_read(&all_cpu_access_lock);
677 	}
678 }
679 
680 static inline void trace_access_lock_init(void)
681 {
682 	int cpu;
683 
684 	for_each_possible_cpu(cpu)
685 		mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687 
688 #else
689 
690 static DEFINE_MUTEX(access_lock);
691 
692 static inline void trace_access_lock(int cpu)
693 {
694 	(void)cpu;
695 	mutex_lock(&access_lock);
696 }
697 
698 static inline void trace_access_unlock(int cpu)
699 {
700 	(void)cpu;
701 	mutex_unlock(&access_lock);
702 }
703 
704 static inline void trace_access_lock_init(void)
705 {
706 }
707 
708 #endif
709 
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712 				 unsigned long flags,
713 				 int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715 				      struct ring_buffer *buffer,
716 				      unsigned long flags,
717 				      int skip, int pc, struct pt_regs *regs);
718 
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721 					unsigned long flags,
722 					int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726 				      struct ring_buffer *buffer,
727 				      unsigned long flags,
728 				      int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 
732 #endif
733 
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736 		  int type, unsigned long flags, int pc)
737 {
738 	struct trace_entry *ent = ring_buffer_event_data(event);
739 
740 	tracing_generic_entry_update(ent, flags, pc);
741 	ent->type = type;
742 }
743 
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746 			  int type,
747 			  unsigned long len,
748 			  unsigned long flags, int pc)
749 {
750 	struct ring_buffer_event *event;
751 
752 	event = ring_buffer_lock_reserve(buffer, len);
753 	if (event != NULL)
754 		trace_event_setup(event, type, flags, pc);
755 
756 	return event;
757 }
758 
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761 	if (tr->trace_buffer.buffer)
762 		ring_buffer_record_on(tr->trace_buffer.buffer);
763 	/*
764 	 * This flag is looked at when buffers haven't been allocated
765 	 * yet, or by some tracers (like irqsoff), that just want to
766 	 * know if the ring buffer has been disabled, but it can handle
767 	 * races of where it gets disabled but we still do a record.
768 	 * As the check is in the fast path of the tracers, it is more
769 	 * important to be fast than accurate.
770 	 */
771 	tr->buffer_disabled = 0;
772 	/* Make the flag seen by readers */
773 	smp_wmb();
774 }
775 
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784 	tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787 
788 
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792 	__this_cpu_write(trace_taskinfo_save, true);
793 
794 	/* If this is the temp buffer, we need to commit fully */
795 	if (this_cpu_read(trace_buffered_event) == event) {
796 		/* Length is in event->array[0] */
797 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
798 		/* Release the temp buffer */
799 		this_cpu_dec(trace_buffered_event_cnt);
800 	} else
801 		ring_buffer_unlock_commit(buffer, event);
802 }
803 
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:	   The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812 	struct ring_buffer_event *event;
813 	struct ring_buffer *buffer;
814 	struct print_entry *entry;
815 	unsigned long irq_flags;
816 	int alloc;
817 	int pc;
818 
819 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820 		return 0;
821 
822 	pc = preempt_count();
823 
824 	if (unlikely(tracing_selftest_running || tracing_disabled))
825 		return 0;
826 
827 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
828 
829 	local_save_flags(irq_flags);
830 	buffer = global_trace.trace_buffer.buffer;
831 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
832 					    irq_flags, pc);
833 	if (!event)
834 		return 0;
835 
836 	entry = ring_buffer_event_data(event);
837 	entry->ip = ip;
838 
839 	memcpy(&entry->buf, str, size);
840 
841 	/* Add a newline if necessary */
842 	if (entry->buf[size - 1] != '\n') {
843 		entry->buf[size] = '\n';
844 		entry->buf[size + 1] = '\0';
845 	} else
846 		entry->buf[size] = '\0';
847 
848 	__buffer_unlock_commit(buffer, event);
849 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850 
851 	return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854 
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:	   The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862 	struct ring_buffer_event *event;
863 	struct ring_buffer *buffer;
864 	struct bputs_entry *entry;
865 	unsigned long irq_flags;
866 	int size = sizeof(struct bputs_entry);
867 	int pc;
868 
869 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870 		return 0;
871 
872 	pc = preempt_count();
873 
874 	if (unlikely(tracing_selftest_running || tracing_disabled))
875 		return 0;
876 
877 	local_save_flags(irq_flags);
878 	buffer = global_trace.trace_buffer.buffer;
879 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880 					    irq_flags, pc);
881 	if (!event)
882 		return 0;
883 
884 	entry = ring_buffer_event_data(event);
885 	entry->ip			= ip;
886 	entry->str			= str;
887 
888 	__buffer_unlock_commit(buffer, event);
889 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890 
891 	return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894 
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 void tracing_snapshot_instance(struct trace_array *tr)
897 {
898 	struct tracer *tracer = tr->current_trace;
899 	unsigned long flags;
900 
901 	if (in_nmi()) {
902 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903 		internal_trace_puts("*** snapshot is being ignored        ***\n");
904 		return;
905 	}
906 
907 	if (!tr->allocated_snapshot) {
908 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909 		internal_trace_puts("*** stopping trace here!   ***\n");
910 		tracing_off();
911 		return;
912 	}
913 
914 	/* Note, snapshot can not be used when the tracer uses it */
915 	if (tracer->use_max_tr) {
916 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918 		return;
919 	}
920 
921 	local_irq_save(flags);
922 	update_max_tr(tr, current, smp_processor_id());
923 	local_irq_restore(flags);
924 }
925 
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942 	struct trace_array *tr = &global_trace;
943 
944 	tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947 
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949 					struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951 
952 int tracing_alloc_snapshot_instance(struct trace_array *tr)
953 {
954 	int ret;
955 
956 	if (!tr->allocated_snapshot) {
957 
958 		/* allocate spare buffer */
959 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
960 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961 		if (ret < 0)
962 			return ret;
963 
964 		tr->allocated_snapshot = true;
965 	}
966 
967 	return 0;
968 }
969 
970 static void free_snapshot(struct trace_array *tr)
971 {
972 	/*
973 	 * We don't free the ring buffer. instead, resize it because
974 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
975 	 * we want preserve it.
976 	 */
977 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978 	set_buffer_entries(&tr->max_buffer, 1);
979 	tracing_reset_online_cpus(&tr->max_buffer);
980 	tr->allocated_snapshot = false;
981 }
982 
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995 	struct trace_array *tr = &global_trace;
996 	int ret;
997 
998 	ret = tracing_alloc_snapshot_instance(tr);
999 	WARN_ON(ret < 0);
1000 
1001 	return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004 
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot();
1021 	if (ret < 0)
1022 		return;
1023 
1024 	tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036 	return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041 	/* Give warning */
1042 	tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046 
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049 	if (tr->trace_buffer.buffer)
1050 		ring_buffer_record_off(tr->trace_buffer.buffer);
1051 	/*
1052 	 * This flag is looked at when buffers haven't been allocated
1053 	 * yet, or by some tracers (like irqsoff), that just want to
1054 	 * know if the ring buffer has been disabled, but it can handle
1055 	 * races of where it gets disabled but we still do a record.
1056 	 * As the check is in the fast path of the tracers, it is more
1057 	 * important to be fast than accurate.
1058 	 */
1059 	tr->buffer_disabled = 1;
1060 	/* Make the flag seen by readers */
1061 	smp_wmb();
1062 }
1063 
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074 	tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077 
1078 void disable_trace_on_warning(void)
1079 {
1080 	if (__disable_trace_on_warning)
1081 		tracing_off();
1082 }
1083 
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092 	if (tr->trace_buffer.buffer)
1093 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094 	return !tr->buffer_disabled;
1095 }
1096 
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102 	return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105 
1106 static int __init set_buf_size(char *str)
1107 {
1108 	unsigned long buf_size;
1109 
1110 	if (!str)
1111 		return 0;
1112 	buf_size = memparse(str, &str);
1113 	/* nr_entries can not be zero */
1114 	if (buf_size == 0)
1115 		return 0;
1116 	trace_buf_size = buf_size;
1117 	return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120 
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123 	unsigned long threshold;
1124 	int ret;
1125 
1126 	if (!str)
1127 		return 0;
1128 	ret = kstrtoul(str, 0, &threshold);
1129 	if (ret < 0)
1130 		return 0;
1131 	tracing_thresh = threshold * 1000;
1132 	return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135 
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138 	return nsecs / 1000;
1139 }
1140 
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149 
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152 	TRACE_FLAGS
1153 	NULL
1154 };
1155 
1156 static struct {
1157 	u64 (*func)(void);
1158 	const char *name;
1159 	int in_ns;		/* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161 	{ trace_clock_local,		"local",	1 },
1162 	{ trace_clock_global,		"global",	1 },
1163 	{ trace_clock_counter,		"counter",	0 },
1164 	{ trace_clock_jiffies,		"uptime",	0 },
1165 	{ trace_clock,			"perf",		1 },
1166 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1167 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1168 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1169 	ARCH_TRACE_CLOCKS
1170 };
1171 
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174 	if (trace_clocks[tr->clock_id].in_ns)
1175 		return true;
1176 
1177 	return false;
1178 }
1179 
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185 	memset(parser, 0, sizeof(*parser));
1186 
1187 	parser->buffer = kmalloc(size, GFP_KERNEL);
1188 	if (!parser->buffer)
1189 		return 1;
1190 
1191 	parser->size = size;
1192 	return 0;
1193 }
1194 
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200 	kfree(parser->buffer);
1201 	parser->buffer = NULL;
1202 }
1203 
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216 	size_t cnt, loff_t *ppos)
1217 {
1218 	char ch;
1219 	size_t read = 0;
1220 	ssize_t ret;
1221 
1222 	if (!*ppos)
1223 		trace_parser_clear(parser);
1224 
1225 	ret = get_user(ch, ubuf++);
1226 	if (ret)
1227 		goto out;
1228 
1229 	read++;
1230 	cnt--;
1231 
1232 	/*
1233 	 * The parser is not finished with the last write,
1234 	 * continue reading the user input without skipping spaces.
1235 	 */
1236 	if (!parser->cont) {
1237 		/* skip white space */
1238 		while (cnt && isspace(ch)) {
1239 			ret = get_user(ch, ubuf++);
1240 			if (ret)
1241 				goto out;
1242 			read++;
1243 			cnt--;
1244 		}
1245 
1246 		parser->idx = 0;
1247 
1248 		/* only spaces were written */
1249 		if (isspace(ch) || !ch) {
1250 			*ppos += read;
1251 			ret = read;
1252 			goto out;
1253 		}
1254 	}
1255 
1256 	/* read the non-space input */
1257 	while (cnt && !isspace(ch) && ch) {
1258 		if (parser->idx < parser->size - 1)
1259 			parser->buffer[parser->idx++] = ch;
1260 		else {
1261 			ret = -EINVAL;
1262 			goto out;
1263 		}
1264 		ret = get_user(ch, ubuf++);
1265 		if (ret)
1266 			goto out;
1267 		read++;
1268 		cnt--;
1269 	}
1270 
1271 	/* We either got finished input or we have to wait for another call. */
1272 	if (isspace(ch) || !ch) {
1273 		parser->buffer[parser->idx] = 0;
1274 		parser->cont = false;
1275 	} else if (parser->idx < parser->size - 1) {
1276 		parser->cont = true;
1277 		parser->buffer[parser->idx++] = ch;
1278 		/* Make sure the parsed string always terminates with '\0'. */
1279 		parser->buffer[parser->idx] = 0;
1280 	} else {
1281 		ret = -EINVAL;
1282 		goto out;
1283 	}
1284 
1285 	*ppos += read;
1286 	ret = read;
1287 
1288 out:
1289 	return ret;
1290 }
1291 
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295 	int len;
1296 
1297 	if (trace_seq_used(s) <= s->seq.readpos)
1298 		return -EBUSY;
1299 
1300 	len = trace_seq_used(s) - s->seq.readpos;
1301 	if (cnt > len)
1302 		cnt = len;
1303 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304 
1305 	s->seq.readpos += cnt;
1306 	return cnt;
1307 }
1308 
1309 unsigned long __read_mostly	tracing_thresh;
1310 
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1321 	struct trace_buffer *max_buf = &tr->max_buffer;
1322 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324 
1325 	max_buf->cpu = cpu;
1326 	max_buf->time_start = data->preempt_timestamp;
1327 
1328 	max_data->saved_latency = tr->max_latency;
1329 	max_data->critical_start = data->critical_start;
1330 	max_data->critical_end = data->critical_end;
1331 
1332 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333 	max_data->pid = tsk->pid;
1334 	/*
1335 	 * If tsk == current, then use current_uid(), as that does not use
1336 	 * RCU. The irq tracer can be called out of RCU scope.
1337 	 */
1338 	if (tsk == current)
1339 		max_data->uid = current_uid();
1340 	else
1341 		max_data->uid = task_uid(tsk);
1342 
1343 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344 	max_data->policy = tsk->policy;
1345 	max_data->rt_priority = tsk->rt_priority;
1346 
1347 	/* record this tasks comm */
1348 	tracing_record_cmdline(tsk);
1349 }
1350 
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363 	struct ring_buffer *buf;
1364 
1365 	if (tr->stop_count)
1366 		return;
1367 
1368 	WARN_ON_ONCE(!irqs_disabled());
1369 
1370 	if (!tr->allocated_snapshot) {
1371 		/* Only the nop tracer should hit this when disabling */
1372 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373 		return;
1374 	}
1375 
1376 	arch_spin_lock(&tr->max_lock);
1377 
1378 	buf = tr->trace_buffer.buffer;
1379 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380 	tr->max_buffer.buffer = buf;
1381 
1382 	__update_max_tr(tr, tsk, cpu);
1383 	arch_spin_unlock(&tr->max_lock);
1384 }
1385 
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397 	int ret;
1398 
1399 	if (tr->stop_count)
1400 		return;
1401 
1402 	WARN_ON_ONCE(!irqs_disabled());
1403 	if (!tr->allocated_snapshot) {
1404 		/* Only the nop tracer should hit this when disabling */
1405 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406 		return;
1407 	}
1408 
1409 	arch_spin_lock(&tr->max_lock);
1410 
1411 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412 
1413 	if (ret == -EBUSY) {
1414 		/*
1415 		 * We failed to swap the buffer due to a commit taking
1416 		 * place on this CPU. We fail to record, but we reset
1417 		 * the max trace buffer (no one writes directly to it)
1418 		 * and flag that it failed.
1419 		 */
1420 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421 			"Failed to swap buffers due to commit in progress\n");
1422 	}
1423 
1424 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425 
1426 	__update_max_tr(tr, tsk, cpu);
1427 	arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430 
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433 	/* Iterators are static, they should be filled or empty */
1434 	if (trace_buffer_iter(iter, iter->cpu_file))
1435 		return 0;
1436 
1437 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438 				full);
1439 }
1440 
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443 
1444 struct trace_selftests {
1445 	struct list_head		list;
1446 	struct tracer			*type;
1447 };
1448 
1449 static LIST_HEAD(postponed_selftests);
1450 
1451 static int save_selftest(struct tracer *type)
1452 {
1453 	struct trace_selftests *selftest;
1454 
1455 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456 	if (!selftest)
1457 		return -ENOMEM;
1458 
1459 	selftest->type = type;
1460 	list_add(&selftest->list, &postponed_selftests);
1461 	return 0;
1462 }
1463 
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466 	struct trace_array *tr = &global_trace;
1467 	struct tracer *saved_tracer = tr->current_trace;
1468 	int ret;
1469 
1470 	if (!type->selftest || tracing_selftest_disabled)
1471 		return 0;
1472 
1473 	/*
1474 	 * If a tracer registers early in boot up (before scheduling is
1475 	 * initialized and such), then do not run its selftests yet.
1476 	 * Instead, run it a little later in the boot process.
1477 	 */
1478 	if (!selftests_can_run)
1479 		return save_selftest(type);
1480 
1481 	/*
1482 	 * Run a selftest on this tracer.
1483 	 * Here we reset the trace buffer, and set the current
1484 	 * tracer to be this tracer. The tracer can then run some
1485 	 * internal tracing to verify that everything is in order.
1486 	 * If we fail, we do not register this tracer.
1487 	 */
1488 	tracing_reset_online_cpus(&tr->trace_buffer);
1489 
1490 	tr->current_trace = type;
1491 
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493 	if (type->use_max_tr) {
1494 		/* If we expanded the buffers, make sure the max is expanded too */
1495 		if (ring_buffer_expanded)
1496 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497 					   RING_BUFFER_ALL_CPUS);
1498 		tr->allocated_snapshot = true;
1499 	}
1500 #endif
1501 
1502 	/* the test is responsible for initializing and enabling */
1503 	pr_info("Testing tracer %s: ", type->name);
1504 	ret = type->selftest(type, tr);
1505 	/* the test is responsible for resetting too */
1506 	tr->current_trace = saved_tracer;
1507 	if (ret) {
1508 		printk(KERN_CONT "FAILED!\n");
1509 		/* Add the warning after printing 'FAILED' */
1510 		WARN_ON(1);
1511 		return -1;
1512 	}
1513 	/* Only reset on passing, to avoid touching corrupted buffers */
1514 	tracing_reset_online_cpus(&tr->trace_buffer);
1515 
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517 	if (type->use_max_tr) {
1518 		tr->allocated_snapshot = false;
1519 
1520 		/* Shrink the max buffer again */
1521 		if (ring_buffer_expanded)
1522 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1523 					   RING_BUFFER_ALL_CPUS);
1524 	}
1525 #endif
1526 
1527 	printk(KERN_CONT "PASSED\n");
1528 	return 0;
1529 }
1530 
1531 static __init int init_trace_selftests(void)
1532 {
1533 	struct trace_selftests *p, *n;
1534 	struct tracer *t, **last;
1535 	int ret;
1536 
1537 	selftests_can_run = true;
1538 
1539 	mutex_lock(&trace_types_lock);
1540 
1541 	if (list_empty(&postponed_selftests))
1542 		goto out;
1543 
1544 	pr_info("Running postponed tracer tests:\n");
1545 
1546 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1547 		ret = run_tracer_selftest(p->type);
1548 		/* If the test fails, then warn and remove from available_tracers */
1549 		if (ret < 0) {
1550 			WARN(1, "tracer: %s failed selftest, disabling\n",
1551 			     p->type->name);
1552 			last = &trace_types;
1553 			for (t = trace_types; t; t = t->next) {
1554 				if (t == p->type) {
1555 					*last = t->next;
1556 					break;
1557 				}
1558 				last = &t->next;
1559 			}
1560 		}
1561 		list_del(&p->list);
1562 		kfree(p);
1563 	}
1564 
1565  out:
1566 	mutex_unlock(&trace_types_lock);
1567 
1568 	return 0;
1569 }
1570 core_initcall(init_trace_selftests);
1571 #else
1572 static inline int run_tracer_selftest(struct tracer *type)
1573 {
1574 	return 0;
1575 }
1576 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1577 
1578 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1579 
1580 static void __init apply_trace_boot_options(void);
1581 
1582 /**
1583  * register_tracer - register a tracer with the ftrace system.
1584  * @type - the plugin for the tracer
1585  *
1586  * Register a new plugin tracer.
1587  */
1588 int __init register_tracer(struct tracer *type)
1589 {
1590 	struct tracer *t;
1591 	int ret = 0;
1592 
1593 	if (!type->name) {
1594 		pr_info("Tracer must have a name\n");
1595 		return -1;
1596 	}
1597 
1598 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1599 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1600 		return -1;
1601 	}
1602 
1603 	mutex_lock(&trace_types_lock);
1604 
1605 	tracing_selftest_running = true;
1606 
1607 	for (t = trace_types; t; t = t->next) {
1608 		if (strcmp(type->name, t->name) == 0) {
1609 			/* already found */
1610 			pr_info("Tracer %s already registered\n",
1611 				type->name);
1612 			ret = -1;
1613 			goto out;
1614 		}
1615 	}
1616 
1617 	if (!type->set_flag)
1618 		type->set_flag = &dummy_set_flag;
1619 	if (!type->flags) {
1620 		/*allocate a dummy tracer_flags*/
1621 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1622 		if (!type->flags) {
1623 			ret = -ENOMEM;
1624 			goto out;
1625 		}
1626 		type->flags->val = 0;
1627 		type->flags->opts = dummy_tracer_opt;
1628 	} else
1629 		if (!type->flags->opts)
1630 			type->flags->opts = dummy_tracer_opt;
1631 
1632 	/* store the tracer for __set_tracer_option */
1633 	type->flags->trace = type;
1634 
1635 	ret = run_tracer_selftest(type);
1636 	if (ret < 0)
1637 		goto out;
1638 
1639 	type->next = trace_types;
1640 	trace_types = type;
1641 	add_tracer_options(&global_trace, type);
1642 
1643  out:
1644 	tracing_selftest_running = false;
1645 	mutex_unlock(&trace_types_lock);
1646 
1647 	if (ret || !default_bootup_tracer)
1648 		goto out_unlock;
1649 
1650 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1651 		goto out_unlock;
1652 
1653 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1654 	/* Do we want this tracer to start on bootup? */
1655 	tracing_set_tracer(&global_trace, type->name);
1656 	default_bootup_tracer = NULL;
1657 
1658 	apply_trace_boot_options();
1659 
1660 	/* disable other selftests, since this will break it. */
1661 	tracing_selftest_disabled = true;
1662 #ifdef CONFIG_FTRACE_STARTUP_TEST
1663 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1664 	       type->name);
1665 #endif
1666 
1667  out_unlock:
1668 	return ret;
1669 }
1670 
1671 void tracing_reset(struct trace_buffer *buf, int cpu)
1672 {
1673 	struct ring_buffer *buffer = buf->buffer;
1674 
1675 	if (!buffer)
1676 		return;
1677 
1678 	ring_buffer_record_disable(buffer);
1679 
1680 	/* Make sure all commits have finished */
1681 	synchronize_sched();
1682 	ring_buffer_reset_cpu(buffer, cpu);
1683 
1684 	ring_buffer_record_enable(buffer);
1685 }
1686 
1687 void tracing_reset_online_cpus(struct trace_buffer *buf)
1688 {
1689 	struct ring_buffer *buffer = buf->buffer;
1690 	int cpu;
1691 
1692 	if (!buffer)
1693 		return;
1694 
1695 	ring_buffer_record_disable(buffer);
1696 
1697 	/* Make sure all commits have finished */
1698 	synchronize_sched();
1699 
1700 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1701 
1702 	for_each_online_cpu(cpu)
1703 		ring_buffer_reset_cpu(buffer, cpu);
1704 
1705 	ring_buffer_record_enable(buffer);
1706 }
1707 
1708 /* Must have trace_types_lock held */
1709 void tracing_reset_all_online_cpus(void)
1710 {
1711 	struct trace_array *tr;
1712 
1713 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1714 		if (!tr->clear_trace)
1715 			continue;
1716 		tr->clear_trace = false;
1717 		tracing_reset_online_cpus(&tr->trace_buffer);
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719 		tracing_reset_online_cpus(&tr->max_buffer);
1720 #endif
1721 	}
1722 }
1723 
1724 static int *tgid_map;
1725 
1726 #define SAVED_CMDLINES_DEFAULT 128
1727 #define NO_CMDLINE_MAP UINT_MAX
1728 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1729 struct saved_cmdlines_buffer {
1730 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1731 	unsigned *map_cmdline_to_pid;
1732 	unsigned cmdline_num;
1733 	int cmdline_idx;
1734 	char *saved_cmdlines;
1735 };
1736 static struct saved_cmdlines_buffer *savedcmd;
1737 
1738 /* temporary disable recording */
1739 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1740 
1741 static inline char *get_saved_cmdlines(int idx)
1742 {
1743 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1744 }
1745 
1746 static inline void set_cmdline(int idx, const char *cmdline)
1747 {
1748 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1749 }
1750 
1751 static int allocate_cmdlines_buffer(unsigned int val,
1752 				    struct saved_cmdlines_buffer *s)
1753 {
1754 	s->map_cmdline_to_pid = kmalloc_array(val,
1755 					      sizeof(*s->map_cmdline_to_pid),
1756 					      GFP_KERNEL);
1757 	if (!s->map_cmdline_to_pid)
1758 		return -ENOMEM;
1759 
1760 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1761 	if (!s->saved_cmdlines) {
1762 		kfree(s->map_cmdline_to_pid);
1763 		return -ENOMEM;
1764 	}
1765 
1766 	s->cmdline_idx = 0;
1767 	s->cmdline_num = val;
1768 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1769 	       sizeof(s->map_pid_to_cmdline));
1770 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1771 	       val * sizeof(*s->map_cmdline_to_pid));
1772 
1773 	return 0;
1774 }
1775 
1776 static int trace_create_savedcmd(void)
1777 {
1778 	int ret;
1779 
1780 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1781 	if (!savedcmd)
1782 		return -ENOMEM;
1783 
1784 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1785 	if (ret < 0) {
1786 		kfree(savedcmd);
1787 		savedcmd = NULL;
1788 		return -ENOMEM;
1789 	}
1790 
1791 	return 0;
1792 }
1793 
1794 int is_tracing_stopped(void)
1795 {
1796 	return global_trace.stop_count;
1797 }
1798 
1799 /**
1800  * tracing_start - quick start of the tracer
1801  *
1802  * If tracing is enabled but was stopped by tracing_stop,
1803  * this will start the tracer back up.
1804  */
1805 void tracing_start(void)
1806 {
1807 	struct ring_buffer *buffer;
1808 	unsigned long flags;
1809 
1810 	if (tracing_disabled)
1811 		return;
1812 
1813 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1814 	if (--global_trace.stop_count) {
1815 		if (global_trace.stop_count < 0) {
1816 			/* Someone screwed up their debugging */
1817 			WARN_ON_ONCE(1);
1818 			global_trace.stop_count = 0;
1819 		}
1820 		goto out;
1821 	}
1822 
1823 	/* Prevent the buffers from switching */
1824 	arch_spin_lock(&global_trace.max_lock);
1825 
1826 	buffer = global_trace.trace_buffer.buffer;
1827 	if (buffer)
1828 		ring_buffer_record_enable(buffer);
1829 
1830 #ifdef CONFIG_TRACER_MAX_TRACE
1831 	buffer = global_trace.max_buffer.buffer;
1832 	if (buffer)
1833 		ring_buffer_record_enable(buffer);
1834 #endif
1835 
1836 	arch_spin_unlock(&global_trace.max_lock);
1837 
1838  out:
1839 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1840 }
1841 
1842 static void tracing_start_tr(struct trace_array *tr)
1843 {
1844 	struct ring_buffer *buffer;
1845 	unsigned long flags;
1846 
1847 	if (tracing_disabled)
1848 		return;
1849 
1850 	/* If global, we need to also start the max tracer */
1851 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1852 		return tracing_start();
1853 
1854 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1855 
1856 	if (--tr->stop_count) {
1857 		if (tr->stop_count < 0) {
1858 			/* Someone screwed up their debugging */
1859 			WARN_ON_ONCE(1);
1860 			tr->stop_count = 0;
1861 		}
1862 		goto out;
1863 	}
1864 
1865 	buffer = tr->trace_buffer.buffer;
1866 	if (buffer)
1867 		ring_buffer_record_enable(buffer);
1868 
1869  out:
1870 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1871 }
1872 
1873 /**
1874  * tracing_stop - quick stop of the tracer
1875  *
1876  * Light weight way to stop tracing. Use in conjunction with
1877  * tracing_start.
1878  */
1879 void tracing_stop(void)
1880 {
1881 	struct ring_buffer *buffer;
1882 	unsigned long flags;
1883 
1884 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1885 	if (global_trace.stop_count++)
1886 		goto out;
1887 
1888 	/* Prevent the buffers from switching */
1889 	arch_spin_lock(&global_trace.max_lock);
1890 
1891 	buffer = global_trace.trace_buffer.buffer;
1892 	if (buffer)
1893 		ring_buffer_record_disable(buffer);
1894 
1895 #ifdef CONFIG_TRACER_MAX_TRACE
1896 	buffer = global_trace.max_buffer.buffer;
1897 	if (buffer)
1898 		ring_buffer_record_disable(buffer);
1899 #endif
1900 
1901 	arch_spin_unlock(&global_trace.max_lock);
1902 
1903  out:
1904 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1905 }
1906 
1907 static void tracing_stop_tr(struct trace_array *tr)
1908 {
1909 	struct ring_buffer *buffer;
1910 	unsigned long flags;
1911 
1912 	/* If global, we need to also stop the max tracer */
1913 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1914 		return tracing_stop();
1915 
1916 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1917 	if (tr->stop_count++)
1918 		goto out;
1919 
1920 	buffer = tr->trace_buffer.buffer;
1921 	if (buffer)
1922 		ring_buffer_record_disable(buffer);
1923 
1924  out:
1925 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1926 }
1927 
1928 static int trace_save_cmdline(struct task_struct *tsk)
1929 {
1930 	unsigned pid, idx;
1931 
1932 	/* treat recording of idle task as a success */
1933 	if (!tsk->pid)
1934 		return 1;
1935 
1936 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1937 		return 0;
1938 
1939 	/*
1940 	 * It's not the end of the world if we don't get
1941 	 * the lock, but we also don't want to spin
1942 	 * nor do we want to disable interrupts,
1943 	 * so if we miss here, then better luck next time.
1944 	 */
1945 	if (!arch_spin_trylock(&trace_cmdline_lock))
1946 		return 0;
1947 
1948 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1949 	if (idx == NO_CMDLINE_MAP) {
1950 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1951 
1952 		/*
1953 		 * Check whether the cmdline buffer at idx has a pid
1954 		 * mapped. We are going to overwrite that entry so we
1955 		 * need to clear the map_pid_to_cmdline. Otherwise we
1956 		 * would read the new comm for the old pid.
1957 		 */
1958 		pid = savedcmd->map_cmdline_to_pid[idx];
1959 		if (pid != NO_CMDLINE_MAP)
1960 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1961 
1962 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1963 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1964 
1965 		savedcmd->cmdline_idx = idx;
1966 	}
1967 
1968 	set_cmdline(idx, tsk->comm);
1969 
1970 	arch_spin_unlock(&trace_cmdline_lock);
1971 
1972 	return 1;
1973 }
1974 
1975 static void __trace_find_cmdline(int pid, char comm[])
1976 {
1977 	unsigned map;
1978 
1979 	if (!pid) {
1980 		strcpy(comm, "<idle>");
1981 		return;
1982 	}
1983 
1984 	if (WARN_ON_ONCE(pid < 0)) {
1985 		strcpy(comm, "<XXX>");
1986 		return;
1987 	}
1988 
1989 	if (pid > PID_MAX_DEFAULT) {
1990 		strcpy(comm, "<...>");
1991 		return;
1992 	}
1993 
1994 	map = savedcmd->map_pid_to_cmdline[pid];
1995 	if (map != NO_CMDLINE_MAP)
1996 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1997 	else
1998 		strcpy(comm, "<...>");
1999 }
2000 
2001 void trace_find_cmdline(int pid, char comm[])
2002 {
2003 	preempt_disable();
2004 	arch_spin_lock(&trace_cmdline_lock);
2005 
2006 	__trace_find_cmdline(pid, comm);
2007 
2008 	arch_spin_unlock(&trace_cmdline_lock);
2009 	preempt_enable();
2010 }
2011 
2012 int trace_find_tgid(int pid)
2013 {
2014 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2015 		return 0;
2016 
2017 	return tgid_map[pid];
2018 }
2019 
2020 static int trace_save_tgid(struct task_struct *tsk)
2021 {
2022 	/* treat recording of idle task as a success */
2023 	if (!tsk->pid)
2024 		return 1;
2025 
2026 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2027 		return 0;
2028 
2029 	tgid_map[tsk->pid] = tsk->tgid;
2030 	return 1;
2031 }
2032 
2033 static bool tracing_record_taskinfo_skip(int flags)
2034 {
2035 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2036 		return true;
2037 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2038 		return true;
2039 	if (!__this_cpu_read(trace_taskinfo_save))
2040 		return true;
2041 	return false;
2042 }
2043 
2044 /**
2045  * tracing_record_taskinfo - record the task info of a task
2046  *
2047  * @task  - task to record
2048  * @flags - TRACE_RECORD_CMDLINE for recording comm
2049  *        - TRACE_RECORD_TGID for recording tgid
2050  */
2051 void tracing_record_taskinfo(struct task_struct *task, int flags)
2052 {
2053 	bool done;
2054 
2055 	if (tracing_record_taskinfo_skip(flags))
2056 		return;
2057 
2058 	/*
2059 	 * Record as much task information as possible. If some fail, continue
2060 	 * to try to record the others.
2061 	 */
2062 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2063 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2064 
2065 	/* If recording any information failed, retry again soon. */
2066 	if (!done)
2067 		return;
2068 
2069 	__this_cpu_write(trace_taskinfo_save, false);
2070 }
2071 
2072 /**
2073  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2074  *
2075  * @prev - previous task during sched_switch
2076  * @next - next task during sched_switch
2077  * @flags - TRACE_RECORD_CMDLINE for recording comm
2078  *          TRACE_RECORD_TGID for recording tgid
2079  */
2080 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2081 					  struct task_struct *next, int flags)
2082 {
2083 	bool done;
2084 
2085 	if (tracing_record_taskinfo_skip(flags))
2086 		return;
2087 
2088 	/*
2089 	 * Record as much task information as possible. If some fail, continue
2090 	 * to try to record the others.
2091 	 */
2092 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2093 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2094 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2095 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2096 
2097 	/* If recording any information failed, retry again soon. */
2098 	if (!done)
2099 		return;
2100 
2101 	__this_cpu_write(trace_taskinfo_save, false);
2102 }
2103 
2104 /* Helpers to record a specific task information */
2105 void tracing_record_cmdline(struct task_struct *task)
2106 {
2107 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2108 }
2109 
2110 void tracing_record_tgid(struct task_struct *task)
2111 {
2112 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2113 }
2114 
2115 /*
2116  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2117  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2118  * simplifies those functions and keeps them in sync.
2119  */
2120 enum print_line_t trace_handle_return(struct trace_seq *s)
2121 {
2122 	return trace_seq_has_overflowed(s) ?
2123 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2124 }
2125 EXPORT_SYMBOL_GPL(trace_handle_return);
2126 
2127 void
2128 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2129 			     int pc)
2130 {
2131 	struct task_struct *tsk = current;
2132 
2133 	entry->preempt_count		= pc & 0xff;
2134 	entry->pid			= (tsk) ? tsk->pid : 0;
2135 	entry->flags =
2136 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2137 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2138 #else
2139 		TRACE_FLAG_IRQS_NOSUPPORT |
2140 #endif
2141 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2142 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2143 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2144 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2145 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2146 }
2147 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2148 
2149 struct ring_buffer_event *
2150 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2151 			  int type,
2152 			  unsigned long len,
2153 			  unsigned long flags, int pc)
2154 {
2155 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2156 }
2157 
2158 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2159 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2160 static int trace_buffered_event_ref;
2161 
2162 /**
2163  * trace_buffered_event_enable - enable buffering events
2164  *
2165  * When events are being filtered, it is quicker to use a temporary
2166  * buffer to write the event data into if there's a likely chance
2167  * that it will not be committed. The discard of the ring buffer
2168  * is not as fast as committing, and is much slower than copying
2169  * a commit.
2170  *
2171  * When an event is to be filtered, allocate per cpu buffers to
2172  * write the event data into, and if the event is filtered and discarded
2173  * it is simply dropped, otherwise, the entire data is to be committed
2174  * in one shot.
2175  */
2176 void trace_buffered_event_enable(void)
2177 {
2178 	struct ring_buffer_event *event;
2179 	struct page *page;
2180 	int cpu;
2181 
2182 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2183 
2184 	if (trace_buffered_event_ref++)
2185 		return;
2186 
2187 	for_each_tracing_cpu(cpu) {
2188 		page = alloc_pages_node(cpu_to_node(cpu),
2189 					GFP_KERNEL | __GFP_NORETRY, 0);
2190 		if (!page)
2191 			goto failed;
2192 
2193 		event = page_address(page);
2194 		memset(event, 0, sizeof(*event));
2195 
2196 		per_cpu(trace_buffered_event, cpu) = event;
2197 
2198 		preempt_disable();
2199 		if (cpu == smp_processor_id() &&
2200 		    this_cpu_read(trace_buffered_event) !=
2201 		    per_cpu(trace_buffered_event, cpu))
2202 			WARN_ON_ONCE(1);
2203 		preempt_enable();
2204 	}
2205 
2206 	return;
2207  failed:
2208 	trace_buffered_event_disable();
2209 }
2210 
2211 static void enable_trace_buffered_event(void *data)
2212 {
2213 	/* Probably not needed, but do it anyway */
2214 	smp_rmb();
2215 	this_cpu_dec(trace_buffered_event_cnt);
2216 }
2217 
2218 static void disable_trace_buffered_event(void *data)
2219 {
2220 	this_cpu_inc(trace_buffered_event_cnt);
2221 }
2222 
2223 /**
2224  * trace_buffered_event_disable - disable buffering events
2225  *
2226  * When a filter is removed, it is faster to not use the buffered
2227  * events, and to commit directly into the ring buffer. Free up
2228  * the temp buffers when there are no more users. This requires
2229  * special synchronization with current events.
2230  */
2231 void trace_buffered_event_disable(void)
2232 {
2233 	int cpu;
2234 
2235 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2236 
2237 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2238 		return;
2239 
2240 	if (--trace_buffered_event_ref)
2241 		return;
2242 
2243 	preempt_disable();
2244 	/* For each CPU, set the buffer as used. */
2245 	smp_call_function_many(tracing_buffer_mask,
2246 			       disable_trace_buffered_event, NULL, 1);
2247 	preempt_enable();
2248 
2249 	/* Wait for all current users to finish */
2250 	synchronize_sched();
2251 
2252 	for_each_tracing_cpu(cpu) {
2253 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2254 		per_cpu(trace_buffered_event, cpu) = NULL;
2255 	}
2256 	/*
2257 	 * Make sure trace_buffered_event is NULL before clearing
2258 	 * trace_buffered_event_cnt.
2259 	 */
2260 	smp_wmb();
2261 
2262 	preempt_disable();
2263 	/* Do the work on each cpu */
2264 	smp_call_function_many(tracing_buffer_mask,
2265 			       enable_trace_buffered_event, NULL, 1);
2266 	preempt_enable();
2267 }
2268 
2269 static struct ring_buffer *temp_buffer;
2270 
2271 struct ring_buffer_event *
2272 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2273 			  struct trace_event_file *trace_file,
2274 			  int type, unsigned long len,
2275 			  unsigned long flags, int pc)
2276 {
2277 	struct ring_buffer_event *entry;
2278 	int val;
2279 
2280 	*current_rb = trace_file->tr->trace_buffer.buffer;
2281 
2282 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2283 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2284 	    (entry = this_cpu_read(trace_buffered_event))) {
2285 		/* Try to use the per cpu buffer first */
2286 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2287 		if (val == 1) {
2288 			trace_event_setup(entry, type, flags, pc);
2289 			entry->array[0] = len;
2290 			return entry;
2291 		}
2292 		this_cpu_dec(trace_buffered_event_cnt);
2293 	}
2294 
2295 	entry = __trace_buffer_lock_reserve(*current_rb,
2296 					    type, len, flags, pc);
2297 	/*
2298 	 * If tracing is off, but we have triggers enabled
2299 	 * we still need to look at the event data. Use the temp_buffer
2300 	 * to store the trace event for the tigger to use. It's recusive
2301 	 * safe and will not be recorded anywhere.
2302 	 */
2303 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2304 		*current_rb = temp_buffer;
2305 		entry = __trace_buffer_lock_reserve(*current_rb,
2306 						    type, len, flags, pc);
2307 	}
2308 	return entry;
2309 }
2310 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2311 
2312 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2313 static DEFINE_MUTEX(tracepoint_printk_mutex);
2314 
2315 static void output_printk(struct trace_event_buffer *fbuffer)
2316 {
2317 	struct trace_event_call *event_call;
2318 	struct trace_event *event;
2319 	unsigned long flags;
2320 	struct trace_iterator *iter = tracepoint_print_iter;
2321 
2322 	/* We should never get here if iter is NULL */
2323 	if (WARN_ON_ONCE(!iter))
2324 		return;
2325 
2326 	event_call = fbuffer->trace_file->event_call;
2327 	if (!event_call || !event_call->event.funcs ||
2328 	    !event_call->event.funcs->trace)
2329 		return;
2330 
2331 	event = &fbuffer->trace_file->event_call->event;
2332 
2333 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2334 	trace_seq_init(&iter->seq);
2335 	iter->ent = fbuffer->entry;
2336 	event_call->event.funcs->trace(iter, 0, event);
2337 	trace_seq_putc(&iter->seq, 0);
2338 	printk("%s", iter->seq.buffer);
2339 
2340 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2341 }
2342 
2343 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2344 			     void __user *buffer, size_t *lenp,
2345 			     loff_t *ppos)
2346 {
2347 	int save_tracepoint_printk;
2348 	int ret;
2349 
2350 	mutex_lock(&tracepoint_printk_mutex);
2351 	save_tracepoint_printk = tracepoint_printk;
2352 
2353 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2354 
2355 	/*
2356 	 * This will force exiting early, as tracepoint_printk
2357 	 * is always zero when tracepoint_printk_iter is not allocated
2358 	 */
2359 	if (!tracepoint_print_iter)
2360 		tracepoint_printk = 0;
2361 
2362 	if (save_tracepoint_printk == tracepoint_printk)
2363 		goto out;
2364 
2365 	if (tracepoint_printk)
2366 		static_key_enable(&tracepoint_printk_key.key);
2367 	else
2368 		static_key_disable(&tracepoint_printk_key.key);
2369 
2370  out:
2371 	mutex_unlock(&tracepoint_printk_mutex);
2372 
2373 	return ret;
2374 }
2375 
2376 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2377 {
2378 	if (static_key_false(&tracepoint_printk_key.key))
2379 		output_printk(fbuffer);
2380 
2381 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2382 				    fbuffer->event, fbuffer->entry,
2383 				    fbuffer->flags, fbuffer->pc);
2384 }
2385 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2386 
2387 /*
2388  * Skip 3:
2389  *
2390  *   trace_buffer_unlock_commit_regs()
2391  *   trace_event_buffer_commit()
2392  *   trace_event_raw_event_xxx()
2393  */
2394 # define STACK_SKIP 3
2395 
2396 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2397 				     struct ring_buffer *buffer,
2398 				     struct ring_buffer_event *event,
2399 				     unsigned long flags, int pc,
2400 				     struct pt_regs *regs)
2401 {
2402 	__buffer_unlock_commit(buffer, event);
2403 
2404 	/*
2405 	 * If regs is not set, then skip the necessary functions.
2406 	 * Note, we can still get here via blktrace, wakeup tracer
2407 	 * and mmiotrace, but that's ok if they lose a function or
2408 	 * two. They are not that meaningful.
2409 	 */
2410 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2411 	ftrace_trace_userstack(buffer, flags, pc);
2412 }
2413 
2414 /*
2415  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2416  */
2417 void
2418 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2419 				   struct ring_buffer_event *event)
2420 {
2421 	__buffer_unlock_commit(buffer, event);
2422 }
2423 
2424 static void
2425 trace_process_export(struct trace_export *export,
2426 	       struct ring_buffer_event *event)
2427 {
2428 	struct trace_entry *entry;
2429 	unsigned int size = 0;
2430 
2431 	entry = ring_buffer_event_data(event);
2432 	size = ring_buffer_event_length(event);
2433 	export->write(export, entry, size);
2434 }
2435 
2436 static DEFINE_MUTEX(ftrace_export_lock);
2437 
2438 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2439 
2440 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2441 
2442 static inline void ftrace_exports_enable(void)
2443 {
2444 	static_branch_enable(&ftrace_exports_enabled);
2445 }
2446 
2447 static inline void ftrace_exports_disable(void)
2448 {
2449 	static_branch_disable(&ftrace_exports_enabled);
2450 }
2451 
2452 void ftrace_exports(struct ring_buffer_event *event)
2453 {
2454 	struct trace_export *export;
2455 
2456 	preempt_disable_notrace();
2457 
2458 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2459 	while (export) {
2460 		trace_process_export(export, event);
2461 		export = rcu_dereference_raw_notrace(export->next);
2462 	}
2463 
2464 	preempt_enable_notrace();
2465 }
2466 
2467 static inline void
2468 add_trace_export(struct trace_export **list, struct trace_export *export)
2469 {
2470 	rcu_assign_pointer(export->next, *list);
2471 	/*
2472 	 * We are entering export into the list but another
2473 	 * CPU might be walking that list. We need to make sure
2474 	 * the export->next pointer is valid before another CPU sees
2475 	 * the export pointer included into the list.
2476 	 */
2477 	rcu_assign_pointer(*list, export);
2478 }
2479 
2480 static inline int
2481 rm_trace_export(struct trace_export **list, struct trace_export *export)
2482 {
2483 	struct trace_export **p;
2484 
2485 	for (p = list; *p != NULL; p = &(*p)->next)
2486 		if (*p == export)
2487 			break;
2488 
2489 	if (*p != export)
2490 		return -1;
2491 
2492 	rcu_assign_pointer(*p, (*p)->next);
2493 
2494 	return 0;
2495 }
2496 
2497 static inline void
2498 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2499 {
2500 	if (*list == NULL)
2501 		ftrace_exports_enable();
2502 
2503 	add_trace_export(list, export);
2504 }
2505 
2506 static inline int
2507 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2508 {
2509 	int ret;
2510 
2511 	ret = rm_trace_export(list, export);
2512 	if (*list == NULL)
2513 		ftrace_exports_disable();
2514 
2515 	return ret;
2516 }
2517 
2518 int register_ftrace_export(struct trace_export *export)
2519 {
2520 	if (WARN_ON_ONCE(!export->write))
2521 		return -1;
2522 
2523 	mutex_lock(&ftrace_export_lock);
2524 
2525 	add_ftrace_export(&ftrace_exports_list, export);
2526 
2527 	mutex_unlock(&ftrace_export_lock);
2528 
2529 	return 0;
2530 }
2531 EXPORT_SYMBOL_GPL(register_ftrace_export);
2532 
2533 int unregister_ftrace_export(struct trace_export *export)
2534 {
2535 	int ret;
2536 
2537 	mutex_lock(&ftrace_export_lock);
2538 
2539 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2540 
2541 	mutex_unlock(&ftrace_export_lock);
2542 
2543 	return ret;
2544 }
2545 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2546 
2547 void
2548 trace_function(struct trace_array *tr,
2549 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2550 	       int pc)
2551 {
2552 	struct trace_event_call *call = &event_function;
2553 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2554 	struct ring_buffer_event *event;
2555 	struct ftrace_entry *entry;
2556 
2557 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2558 					    flags, pc);
2559 	if (!event)
2560 		return;
2561 	entry	= ring_buffer_event_data(event);
2562 	entry->ip			= ip;
2563 	entry->parent_ip		= parent_ip;
2564 
2565 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2566 		if (static_branch_unlikely(&ftrace_exports_enabled))
2567 			ftrace_exports(event);
2568 		__buffer_unlock_commit(buffer, event);
2569 	}
2570 }
2571 
2572 #ifdef CONFIG_STACKTRACE
2573 
2574 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2575 struct ftrace_stack {
2576 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2577 };
2578 
2579 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2580 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2581 
2582 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2583 				 unsigned long flags,
2584 				 int skip, int pc, struct pt_regs *regs)
2585 {
2586 	struct trace_event_call *call = &event_kernel_stack;
2587 	struct ring_buffer_event *event;
2588 	struct stack_entry *entry;
2589 	struct stack_trace trace;
2590 	int use_stack;
2591 	int size = FTRACE_STACK_ENTRIES;
2592 
2593 	trace.nr_entries	= 0;
2594 	trace.skip		= skip;
2595 
2596 	/*
2597 	 * Add one, for this function and the call to save_stack_trace()
2598 	 * If regs is set, then these functions will not be in the way.
2599 	 */
2600 #ifndef CONFIG_UNWINDER_ORC
2601 	if (!regs)
2602 		trace.skip++;
2603 #endif
2604 
2605 	/*
2606 	 * Since events can happen in NMIs there's no safe way to
2607 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2608 	 * or NMI comes in, it will just have to use the default
2609 	 * FTRACE_STACK_SIZE.
2610 	 */
2611 	preempt_disable_notrace();
2612 
2613 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2614 	/*
2615 	 * We don't need any atomic variables, just a barrier.
2616 	 * If an interrupt comes in, we don't care, because it would
2617 	 * have exited and put the counter back to what we want.
2618 	 * We just need a barrier to keep gcc from moving things
2619 	 * around.
2620 	 */
2621 	barrier();
2622 	if (use_stack == 1) {
2623 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2624 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2625 
2626 		if (regs)
2627 			save_stack_trace_regs(regs, &trace);
2628 		else
2629 			save_stack_trace(&trace);
2630 
2631 		if (trace.nr_entries > size)
2632 			size = trace.nr_entries;
2633 	} else
2634 		/* From now on, use_stack is a boolean */
2635 		use_stack = 0;
2636 
2637 	size *= sizeof(unsigned long);
2638 
2639 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2640 					    sizeof(*entry) + size, flags, pc);
2641 	if (!event)
2642 		goto out;
2643 	entry = ring_buffer_event_data(event);
2644 
2645 	memset(&entry->caller, 0, size);
2646 
2647 	if (use_stack)
2648 		memcpy(&entry->caller, trace.entries,
2649 		       trace.nr_entries * sizeof(unsigned long));
2650 	else {
2651 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2652 		trace.entries		= entry->caller;
2653 		if (regs)
2654 			save_stack_trace_regs(regs, &trace);
2655 		else
2656 			save_stack_trace(&trace);
2657 	}
2658 
2659 	entry->size = trace.nr_entries;
2660 
2661 	if (!call_filter_check_discard(call, entry, buffer, event))
2662 		__buffer_unlock_commit(buffer, event);
2663 
2664  out:
2665 	/* Again, don't let gcc optimize things here */
2666 	barrier();
2667 	__this_cpu_dec(ftrace_stack_reserve);
2668 	preempt_enable_notrace();
2669 
2670 }
2671 
2672 static inline void ftrace_trace_stack(struct trace_array *tr,
2673 				      struct ring_buffer *buffer,
2674 				      unsigned long flags,
2675 				      int skip, int pc, struct pt_regs *regs)
2676 {
2677 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2678 		return;
2679 
2680 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2681 }
2682 
2683 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2684 		   int pc)
2685 {
2686 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2687 
2688 	if (rcu_is_watching()) {
2689 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2690 		return;
2691 	}
2692 
2693 	/*
2694 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2695 	 * but if the above rcu_is_watching() failed, then the NMI
2696 	 * triggered someplace critical, and rcu_irq_enter() should
2697 	 * not be called from NMI.
2698 	 */
2699 	if (unlikely(in_nmi()))
2700 		return;
2701 
2702 	rcu_irq_enter_irqson();
2703 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2704 	rcu_irq_exit_irqson();
2705 }
2706 
2707 /**
2708  * trace_dump_stack - record a stack back trace in the trace buffer
2709  * @skip: Number of functions to skip (helper handlers)
2710  */
2711 void trace_dump_stack(int skip)
2712 {
2713 	unsigned long flags;
2714 
2715 	if (tracing_disabled || tracing_selftest_running)
2716 		return;
2717 
2718 	local_save_flags(flags);
2719 
2720 #ifndef CONFIG_UNWINDER_ORC
2721 	/* Skip 1 to skip this function. */
2722 	skip++;
2723 #endif
2724 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2725 			     flags, skip, preempt_count(), NULL);
2726 }
2727 
2728 static DEFINE_PER_CPU(int, user_stack_count);
2729 
2730 void
2731 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2732 {
2733 	struct trace_event_call *call = &event_user_stack;
2734 	struct ring_buffer_event *event;
2735 	struct userstack_entry *entry;
2736 	struct stack_trace trace;
2737 
2738 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2739 		return;
2740 
2741 	/*
2742 	 * NMIs can not handle page faults, even with fix ups.
2743 	 * The save user stack can (and often does) fault.
2744 	 */
2745 	if (unlikely(in_nmi()))
2746 		return;
2747 
2748 	/*
2749 	 * prevent recursion, since the user stack tracing may
2750 	 * trigger other kernel events.
2751 	 */
2752 	preempt_disable();
2753 	if (__this_cpu_read(user_stack_count))
2754 		goto out;
2755 
2756 	__this_cpu_inc(user_stack_count);
2757 
2758 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2759 					    sizeof(*entry), flags, pc);
2760 	if (!event)
2761 		goto out_drop_count;
2762 	entry	= ring_buffer_event_data(event);
2763 
2764 	entry->tgid		= current->tgid;
2765 	memset(&entry->caller, 0, sizeof(entry->caller));
2766 
2767 	trace.nr_entries	= 0;
2768 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2769 	trace.skip		= 0;
2770 	trace.entries		= entry->caller;
2771 
2772 	save_stack_trace_user(&trace);
2773 	if (!call_filter_check_discard(call, entry, buffer, event))
2774 		__buffer_unlock_commit(buffer, event);
2775 
2776  out_drop_count:
2777 	__this_cpu_dec(user_stack_count);
2778  out:
2779 	preempt_enable();
2780 }
2781 
2782 #ifdef UNUSED
2783 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2784 {
2785 	ftrace_trace_userstack(tr, flags, preempt_count());
2786 }
2787 #endif /* UNUSED */
2788 
2789 #endif /* CONFIG_STACKTRACE */
2790 
2791 /* created for use with alloc_percpu */
2792 struct trace_buffer_struct {
2793 	int nesting;
2794 	char buffer[4][TRACE_BUF_SIZE];
2795 };
2796 
2797 static struct trace_buffer_struct *trace_percpu_buffer;
2798 
2799 /*
2800  * Thise allows for lockless recording.  If we're nested too deeply, then
2801  * this returns NULL.
2802  */
2803 static char *get_trace_buf(void)
2804 {
2805 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2806 
2807 	if (!buffer || buffer->nesting >= 4)
2808 		return NULL;
2809 
2810 	buffer->nesting++;
2811 
2812 	/* Interrupts must see nesting incremented before we use the buffer */
2813 	barrier();
2814 	return &buffer->buffer[buffer->nesting][0];
2815 }
2816 
2817 static void put_trace_buf(void)
2818 {
2819 	/* Don't let the decrement of nesting leak before this */
2820 	barrier();
2821 	this_cpu_dec(trace_percpu_buffer->nesting);
2822 }
2823 
2824 static int alloc_percpu_trace_buffer(void)
2825 {
2826 	struct trace_buffer_struct *buffers;
2827 
2828 	buffers = alloc_percpu(struct trace_buffer_struct);
2829 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2830 		return -ENOMEM;
2831 
2832 	trace_percpu_buffer = buffers;
2833 	return 0;
2834 }
2835 
2836 static int buffers_allocated;
2837 
2838 void trace_printk_init_buffers(void)
2839 {
2840 	if (buffers_allocated)
2841 		return;
2842 
2843 	if (alloc_percpu_trace_buffer())
2844 		return;
2845 
2846 	/* trace_printk() is for debug use only. Don't use it in production. */
2847 
2848 	pr_warn("\n");
2849 	pr_warn("**********************************************************\n");
2850 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2851 	pr_warn("**                                                      **\n");
2852 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2853 	pr_warn("**                                                      **\n");
2854 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2855 	pr_warn("** unsafe for production use.                           **\n");
2856 	pr_warn("**                                                      **\n");
2857 	pr_warn("** If you see this message and you are not debugging    **\n");
2858 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2859 	pr_warn("**                                                      **\n");
2860 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2861 	pr_warn("**********************************************************\n");
2862 
2863 	/* Expand the buffers to set size */
2864 	tracing_update_buffers();
2865 
2866 	buffers_allocated = 1;
2867 
2868 	/*
2869 	 * trace_printk_init_buffers() can be called by modules.
2870 	 * If that happens, then we need to start cmdline recording
2871 	 * directly here. If the global_trace.buffer is already
2872 	 * allocated here, then this was called by module code.
2873 	 */
2874 	if (global_trace.trace_buffer.buffer)
2875 		tracing_start_cmdline_record();
2876 }
2877 
2878 void trace_printk_start_comm(void)
2879 {
2880 	/* Start tracing comms if trace printk is set */
2881 	if (!buffers_allocated)
2882 		return;
2883 	tracing_start_cmdline_record();
2884 }
2885 
2886 static void trace_printk_start_stop_comm(int enabled)
2887 {
2888 	if (!buffers_allocated)
2889 		return;
2890 
2891 	if (enabled)
2892 		tracing_start_cmdline_record();
2893 	else
2894 		tracing_stop_cmdline_record();
2895 }
2896 
2897 /**
2898  * trace_vbprintk - write binary msg to tracing buffer
2899  *
2900  */
2901 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2902 {
2903 	struct trace_event_call *call = &event_bprint;
2904 	struct ring_buffer_event *event;
2905 	struct ring_buffer *buffer;
2906 	struct trace_array *tr = &global_trace;
2907 	struct bprint_entry *entry;
2908 	unsigned long flags;
2909 	char *tbuffer;
2910 	int len = 0, size, pc;
2911 
2912 	if (unlikely(tracing_selftest_running || tracing_disabled))
2913 		return 0;
2914 
2915 	/* Don't pollute graph traces with trace_vprintk internals */
2916 	pause_graph_tracing();
2917 
2918 	pc = preempt_count();
2919 	preempt_disable_notrace();
2920 
2921 	tbuffer = get_trace_buf();
2922 	if (!tbuffer) {
2923 		len = 0;
2924 		goto out_nobuffer;
2925 	}
2926 
2927 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2928 
2929 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2930 		goto out;
2931 
2932 	local_save_flags(flags);
2933 	size = sizeof(*entry) + sizeof(u32) * len;
2934 	buffer = tr->trace_buffer.buffer;
2935 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2936 					    flags, pc);
2937 	if (!event)
2938 		goto out;
2939 	entry = ring_buffer_event_data(event);
2940 	entry->ip			= ip;
2941 	entry->fmt			= fmt;
2942 
2943 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2944 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2945 		__buffer_unlock_commit(buffer, event);
2946 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2947 	}
2948 
2949 out:
2950 	put_trace_buf();
2951 
2952 out_nobuffer:
2953 	preempt_enable_notrace();
2954 	unpause_graph_tracing();
2955 
2956 	return len;
2957 }
2958 EXPORT_SYMBOL_GPL(trace_vbprintk);
2959 
2960 static int
2961 __trace_array_vprintk(struct ring_buffer *buffer,
2962 		      unsigned long ip, const char *fmt, va_list args)
2963 {
2964 	struct trace_event_call *call = &event_print;
2965 	struct ring_buffer_event *event;
2966 	int len = 0, size, pc;
2967 	struct print_entry *entry;
2968 	unsigned long flags;
2969 	char *tbuffer;
2970 
2971 	if (tracing_disabled || tracing_selftest_running)
2972 		return 0;
2973 
2974 	/* Don't pollute graph traces with trace_vprintk internals */
2975 	pause_graph_tracing();
2976 
2977 	pc = preempt_count();
2978 	preempt_disable_notrace();
2979 
2980 
2981 	tbuffer = get_trace_buf();
2982 	if (!tbuffer) {
2983 		len = 0;
2984 		goto out_nobuffer;
2985 	}
2986 
2987 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2988 
2989 	local_save_flags(flags);
2990 	size = sizeof(*entry) + len + 1;
2991 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2992 					    flags, pc);
2993 	if (!event)
2994 		goto out;
2995 	entry = ring_buffer_event_data(event);
2996 	entry->ip = ip;
2997 
2998 	memcpy(&entry->buf, tbuffer, len + 1);
2999 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3000 		__buffer_unlock_commit(buffer, event);
3001 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3002 	}
3003 
3004 out:
3005 	put_trace_buf();
3006 
3007 out_nobuffer:
3008 	preempt_enable_notrace();
3009 	unpause_graph_tracing();
3010 
3011 	return len;
3012 }
3013 
3014 int trace_array_vprintk(struct trace_array *tr,
3015 			unsigned long ip, const char *fmt, va_list args)
3016 {
3017 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3018 }
3019 
3020 int trace_array_printk(struct trace_array *tr,
3021 		       unsigned long ip, const char *fmt, ...)
3022 {
3023 	int ret;
3024 	va_list ap;
3025 
3026 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3027 		return 0;
3028 
3029 	va_start(ap, fmt);
3030 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3031 	va_end(ap);
3032 	return ret;
3033 }
3034 
3035 int trace_array_printk_buf(struct ring_buffer *buffer,
3036 			   unsigned long ip, const char *fmt, ...)
3037 {
3038 	int ret;
3039 	va_list ap;
3040 
3041 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3042 		return 0;
3043 
3044 	va_start(ap, fmt);
3045 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3046 	va_end(ap);
3047 	return ret;
3048 }
3049 
3050 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3051 {
3052 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3053 }
3054 EXPORT_SYMBOL_GPL(trace_vprintk);
3055 
3056 static void trace_iterator_increment(struct trace_iterator *iter)
3057 {
3058 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3059 
3060 	iter->idx++;
3061 	if (buf_iter)
3062 		ring_buffer_read(buf_iter, NULL);
3063 }
3064 
3065 static struct trace_entry *
3066 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3067 		unsigned long *lost_events)
3068 {
3069 	struct ring_buffer_event *event;
3070 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3071 
3072 	if (buf_iter)
3073 		event = ring_buffer_iter_peek(buf_iter, ts);
3074 	else
3075 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3076 					 lost_events);
3077 
3078 	if (event) {
3079 		iter->ent_size = ring_buffer_event_length(event);
3080 		return ring_buffer_event_data(event);
3081 	}
3082 	iter->ent_size = 0;
3083 	return NULL;
3084 }
3085 
3086 static struct trace_entry *
3087 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3088 		  unsigned long *missing_events, u64 *ent_ts)
3089 {
3090 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3091 	struct trace_entry *ent, *next = NULL;
3092 	unsigned long lost_events = 0, next_lost = 0;
3093 	int cpu_file = iter->cpu_file;
3094 	u64 next_ts = 0, ts;
3095 	int next_cpu = -1;
3096 	int next_size = 0;
3097 	int cpu;
3098 
3099 	/*
3100 	 * If we are in a per_cpu trace file, don't bother by iterating over
3101 	 * all cpu and peek directly.
3102 	 */
3103 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3104 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3105 			return NULL;
3106 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3107 		if (ent_cpu)
3108 			*ent_cpu = cpu_file;
3109 
3110 		return ent;
3111 	}
3112 
3113 	for_each_tracing_cpu(cpu) {
3114 
3115 		if (ring_buffer_empty_cpu(buffer, cpu))
3116 			continue;
3117 
3118 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3119 
3120 		/*
3121 		 * Pick the entry with the smallest timestamp:
3122 		 */
3123 		if (ent && (!next || ts < next_ts)) {
3124 			next = ent;
3125 			next_cpu = cpu;
3126 			next_ts = ts;
3127 			next_lost = lost_events;
3128 			next_size = iter->ent_size;
3129 		}
3130 	}
3131 
3132 	iter->ent_size = next_size;
3133 
3134 	if (ent_cpu)
3135 		*ent_cpu = next_cpu;
3136 
3137 	if (ent_ts)
3138 		*ent_ts = next_ts;
3139 
3140 	if (missing_events)
3141 		*missing_events = next_lost;
3142 
3143 	return next;
3144 }
3145 
3146 /* Find the next real entry, without updating the iterator itself */
3147 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3148 					  int *ent_cpu, u64 *ent_ts)
3149 {
3150 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3151 }
3152 
3153 /* Find the next real entry, and increment the iterator to the next entry */
3154 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3155 {
3156 	iter->ent = __find_next_entry(iter, &iter->cpu,
3157 				      &iter->lost_events, &iter->ts);
3158 
3159 	if (iter->ent)
3160 		trace_iterator_increment(iter);
3161 
3162 	return iter->ent ? iter : NULL;
3163 }
3164 
3165 static void trace_consume(struct trace_iterator *iter)
3166 {
3167 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3168 			    &iter->lost_events);
3169 }
3170 
3171 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3172 {
3173 	struct trace_iterator *iter = m->private;
3174 	int i = (int)*pos;
3175 	void *ent;
3176 
3177 	WARN_ON_ONCE(iter->leftover);
3178 
3179 	(*pos)++;
3180 
3181 	/* can't go backwards */
3182 	if (iter->idx > i)
3183 		return NULL;
3184 
3185 	if (iter->idx < 0)
3186 		ent = trace_find_next_entry_inc(iter);
3187 	else
3188 		ent = iter;
3189 
3190 	while (ent && iter->idx < i)
3191 		ent = trace_find_next_entry_inc(iter);
3192 
3193 	iter->pos = *pos;
3194 
3195 	return ent;
3196 }
3197 
3198 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3199 {
3200 	struct ring_buffer_event *event;
3201 	struct ring_buffer_iter *buf_iter;
3202 	unsigned long entries = 0;
3203 	u64 ts;
3204 
3205 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3206 
3207 	buf_iter = trace_buffer_iter(iter, cpu);
3208 	if (!buf_iter)
3209 		return;
3210 
3211 	ring_buffer_iter_reset(buf_iter);
3212 
3213 	/*
3214 	 * We could have the case with the max latency tracers
3215 	 * that a reset never took place on a cpu. This is evident
3216 	 * by the timestamp being before the start of the buffer.
3217 	 */
3218 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3219 		if (ts >= iter->trace_buffer->time_start)
3220 			break;
3221 		entries++;
3222 		ring_buffer_read(buf_iter, NULL);
3223 	}
3224 
3225 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3226 }
3227 
3228 /*
3229  * The current tracer is copied to avoid a global locking
3230  * all around.
3231  */
3232 static void *s_start(struct seq_file *m, loff_t *pos)
3233 {
3234 	struct trace_iterator *iter = m->private;
3235 	struct trace_array *tr = iter->tr;
3236 	int cpu_file = iter->cpu_file;
3237 	void *p = NULL;
3238 	loff_t l = 0;
3239 	int cpu;
3240 
3241 	/*
3242 	 * copy the tracer to avoid using a global lock all around.
3243 	 * iter->trace is a copy of current_trace, the pointer to the
3244 	 * name may be used instead of a strcmp(), as iter->trace->name
3245 	 * will point to the same string as current_trace->name.
3246 	 */
3247 	mutex_lock(&trace_types_lock);
3248 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3249 		*iter->trace = *tr->current_trace;
3250 	mutex_unlock(&trace_types_lock);
3251 
3252 #ifdef CONFIG_TRACER_MAX_TRACE
3253 	if (iter->snapshot && iter->trace->use_max_tr)
3254 		return ERR_PTR(-EBUSY);
3255 #endif
3256 
3257 	if (!iter->snapshot)
3258 		atomic_inc(&trace_record_taskinfo_disabled);
3259 
3260 	if (*pos != iter->pos) {
3261 		iter->ent = NULL;
3262 		iter->cpu = 0;
3263 		iter->idx = -1;
3264 
3265 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3266 			for_each_tracing_cpu(cpu)
3267 				tracing_iter_reset(iter, cpu);
3268 		} else
3269 			tracing_iter_reset(iter, cpu_file);
3270 
3271 		iter->leftover = 0;
3272 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3273 			;
3274 
3275 	} else {
3276 		/*
3277 		 * If we overflowed the seq_file before, then we want
3278 		 * to just reuse the trace_seq buffer again.
3279 		 */
3280 		if (iter->leftover)
3281 			p = iter;
3282 		else {
3283 			l = *pos - 1;
3284 			p = s_next(m, p, &l);
3285 		}
3286 	}
3287 
3288 	trace_event_read_lock();
3289 	trace_access_lock(cpu_file);
3290 	return p;
3291 }
3292 
3293 static void s_stop(struct seq_file *m, void *p)
3294 {
3295 	struct trace_iterator *iter = m->private;
3296 
3297 #ifdef CONFIG_TRACER_MAX_TRACE
3298 	if (iter->snapshot && iter->trace->use_max_tr)
3299 		return;
3300 #endif
3301 
3302 	if (!iter->snapshot)
3303 		atomic_dec(&trace_record_taskinfo_disabled);
3304 
3305 	trace_access_unlock(iter->cpu_file);
3306 	trace_event_read_unlock();
3307 }
3308 
3309 static void
3310 get_total_entries(struct trace_buffer *buf,
3311 		  unsigned long *total, unsigned long *entries)
3312 {
3313 	unsigned long count;
3314 	int cpu;
3315 
3316 	*total = 0;
3317 	*entries = 0;
3318 
3319 	for_each_tracing_cpu(cpu) {
3320 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3321 		/*
3322 		 * If this buffer has skipped entries, then we hold all
3323 		 * entries for the trace and we need to ignore the
3324 		 * ones before the time stamp.
3325 		 */
3326 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3327 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3328 			/* total is the same as the entries */
3329 			*total += count;
3330 		} else
3331 			*total += count +
3332 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3333 		*entries += count;
3334 	}
3335 }
3336 
3337 static void print_lat_help_header(struct seq_file *m)
3338 {
3339 	seq_puts(m, "#                  _------=> CPU#            \n"
3340 		    "#                 / _-----=> irqs-off        \n"
3341 		    "#                | / _----=> need-resched    \n"
3342 		    "#                || / _---=> hardirq/softirq \n"
3343 		    "#                ||| / _--=> preempt-depth   \n"
3344 		    "#                |||| /     delay            \n"
3345 		    "#  cmd     pid   ||||| time  |   caller      \n"
3346 		    "#     \\   /      |||||  \\    |   /         \n");
3347 }
3348 
3349 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3350 {
3351 	unsigned long total;
3352 	unsigned long entries;
3353 
3354 	get_total_entries(buf, &total, &entries);
3355 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3356 		   entries, total, num_online_cpus());
3357 	seq_puts(m, "#\n");
3358 }
3359 
3360 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3361 				   unsigned int flags)
3362 {
3363 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3364 
3365 	print_event_info(buf, m);
3366 
3367 	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3368 	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
3369 }
3370 
3371 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3372 				       unsigned int flags)
3373 {
3374 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3375 	const char tgid_space[] = "          ";
3376 	const char space[] = "  ";
3377 
3378 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3379 		   tgid ? tgid_space : space);
3380 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3381 		   tgid ? tgid_space : space);
3382 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3383 		   tgid ? tgid_space : space);
3384 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3385 		   tgid ? tgid_space : space);
3386 	seq_printf(m, "#                          %s||| /     delay\n",
3387 		   tgid ? tgid_space : space);
3388 	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3389 		   tgid ? "   TGID   " : space);
3390 	seq_printf(m, "#              | |       | %s||||       |         |\n",
3391 		   tgid ? "     |    " : space);
3392 }
3393 
3394 void
3395 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3396 {
3397 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3398 	struct trace_buffer *buf = iter->trace_buffer;
3399 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3400 	struct tracer *type = iter->trace;
3401 	unsigned long entries;
3402 	unsigned long total;
3403 	const char *name = "preemption";
3404 
3405 	name = type->name;
3406 
3407 	get_total_entries(buf, &total, &entries);
3408 
3409 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3410 		   name, UTS_RELEASE);
3411 	seq_puts(m, "# -----------------------------------"
3412 		 "---------------------------------\n");
3413 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3414 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3415 		   nsecs_to_usecs(data->saved_latency),
3416 		   entries,
3417 		   total,
3418 		   buf->cpu,
3419 #if defined(CONFIG_PREEMPT_NONE)
3420 		   "server",
3421 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3422 		   "desktop",
3423 #elif defined(CONFIG_PREEMPT)
3424 		   "preempt",
3425 #else
3426 		   "unknown",
3427 #endif
3428 		   /* These are reserved for later use */
3429 		   0, 0, 0, 0);
3430 #ifdef CONFIG_SMP
3431 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3432 #else
3433 	seq_puts(m, ")\n");
3434 #endif
3435 	seq_puts(m, "#    -----------------\n");
3436 	seq_printf(m, "#    | task: %.16s-%d "
3437 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3438 		   data->comm, data->pid,
3439 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3440 		   data->policy, data->rt_priority);
3441 	seq_puts(m, "#    -----------------\n");
3442 
3443 	if (data->critical_start) {
3444 		seq_puts(m, "#  => started at: ");
3445 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3446 		trace_print_seq(m, &iter->seq);
3447 		seq_puts(m, "\n#  => ended at:   ");
3448 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3449 		trace_print_seq(m, &iter->seq);
3450 		seq_puts(m, "\n#\n");
3451 	}
3452 
3453 	seq_puts(m, "#\n");
3454 }
3455 
3456 static void test_cpu_buff_start(struct trace_iterator *iter)
3457 {
3458 	struct trace_seq *s = &iter->seq;
3459 	struct trace_array *tr = iter->tr;
3460 
3461 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3462 		return;
3463 
3464 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3465 		return;
3466 
3467 	if (cpumask_available(iter->started) &&
3468 	    cpumask_test_cpu(iter->cpu, iter->started))
3469 		return;
3470 
3471 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3472 		return;
3473 
3474 	if (cpumask_available(iter->started))
3475 		cpumask_set_cpu(iter->cpu, iter->started);
3476 
3477 	/* Don't print started cpu buffer for the first entry of the trace */
3478 	if (iter->idx > 1)
3479 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3480 				iter->cpu);
3481 }
3482 
3483 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3484 {
3485 	struct trace_array *tr = iter->tr;
3486 	struct trace_seq *s = &iter->seq;
3487 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3488 	struct trace_entry *entry;
3489 	struct trace_event *event;
3490 
3491 	entry = iter->ent;
3492 
3493 	test_cpu_buff_start(iter);
3494 
3495 	event = ftrace_find_event(entry->type);
3496 
3497 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3498 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3499 			trace_print_lat_context(iter);
3500 		else
3501 			trace_print_context(iter);
3502 	}
3503 
3504 	if (trace_seq_has_overflowed(s))
3505 		return TRACE_TYPE_PARTIAL_LINE;
3506 
3507 	if (event)
3508 		return event->funcs->trace(iter, sym_flags, event);
3509 
3510 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3511 
3512 	return trace_handle_return(s);
3513 }
3514 
3515 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3516 {
3517 	struct trace_array *tr = iter->tr;
3518 	struct trace_seq *s = &iter->seq;
3519 	struct trace_entry *entry;
3520 	struct trace_event *event;
3521 
3522 	entry = iter->ent;
3523 
3524 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3525 		trace_seq_printf(s, "%d %d %llu ",
3526 				 entry->pid, iter->cpu, iter->ts);
3527 
3528 	if (trace_seq_has_overflowed(s))
3529 		return TRACE_TYPE_PARTIAL_LINE;
3530 
3531 	event = ftrace_find_event(entry->type);
3532 	if (event)
3533 		return event->funcs->raw(iter, 0, event);
3534 
3535 	trace_seq_printf(s, "%d ?\n", entry->type);
3536 
3537 	return trace_handle_return(s);
3538 }
3539 
3540 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3541 {
3542 	struct trace_array *tr = iter->tr;
3543 	struct trace_seq *s = &iter->seq;
3544 	unsigned char newline = '\n';
3545 	struct trace_entry *entry;
3546 	struct trace_event *event;
3547 
3548 	entry = iter->ent;
3549 
3550 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3551 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3552 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3553 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3554 		if (trace_seq_has_overflowed(s))
3555 			return TRACE_TYPE_PARTIAL_LINE;
3556 	}
3557 
3558 	event = ftrace_find_event(entry->type);
3559 	if (event) {
3560 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3561 		if (ret != TRACE_TYPE_HANDLED)
3562 			return ret;
3563 	}
3564 
3565 	SEQ_PUT_FIELD(s, newline);
3566 
3567 	return trace_handle_return(s);
3568 }
3569 
3570 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3571 {
3572 	struct trace_array *tr = iter->tr;
3573 	struct trace_seq *s = &iter->seq;
3574 	struct trace_entry *entry;
3575 	struct trace_event *event;
3576 
3577 	entry = iter->ent;
3578 
3579 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3580 		SEQ_PUT_FIELD(s, entry->pid);
3581 		SEQ_PUT_FIELD(s, iter->cpu);
3582 		SEQ_PUT_FIELD(s, iter->ts);
3583 		if (trace_seq_has_overflowed(s))
3584 			return TRACE_TYPE_PARTIAL_LINE;
3585 	}
3586 
3587 	event = ftrace_find_event(entry->type);
3588 	return event ? event->funcs->binary(iter, 0, event) :
3589 		TRACE_TYPE_HANDLED;
3590 }
3591 
3592 int trace_empty(struct trace_iterator *iter)
3593 {
3594 	struct ring_buffer_iter *buf_iter;
3595 	int cpu;
3596 
3597 	/* If we are looking at one CPU buffer, only check that one */
3598 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3599 		cpu = iter->cpu_file;
3600 		buf_iter = trace_buffer_iter(iter, cpu);
3601 		if (buf_iter) {
3602 			if (!ring_buffer_iter_empty(buf_iter))
3603 				return 0;
3604 		} else {
3605 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3606 				return 0;
3607 		}
3608 		return 1;
3609 	}
3610 
3611 	for_each_tracing_cpu(cpu) {
3612 		buf_iter = trace_buffer_iter(iter, cpu);
3613 		if (buf_iter) {
3614 			if (!ring_buffer_iter_empty(buf_iter))
3615 				return 0;
3616 		} else {
3617 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3618 				return 0;
3619 		}
3620 	}
3621 
3622 	return 1;
3623 }
3624 
3625 /*  Called with trace_event_read_lock() held. */
3626 enum print_line_t print_trace_line(struct trace_iterator *iter)
3627 {
3628 	struct trace_array *tr = iter->tr;
3629 	unsigned long trace_flags = tr->trace_flags;
3630 	enum print_line_t ret;
3631 
3632 	if (iter->lost_events) {
3633 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3634 				 iter->cpu, iter->lost_events);
3635 		if (trace_seq_has_overflowed(&iter->seq))
3636 			return TRACE_TYPE_PARTIAL_LINE;
3637 	}
3638 
3639 	if (iter->trace && iter->trace->print_line) {
3640 		ret = iter->trace->print_line(iter);
3641 		if (ret != TRACE_TYPE_UNHANDLED)
3642 			return ret;
3643 	}
3644 
3645 	if (iter->ent->type == TRACE_BPUTS &&
3646 			trace_flags & TRACE_ITER_PRINTK &&
3647 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3648 		return trace_print_bputs_msg_only(iter);
3649 
3650 	if (iter->ent->type == TRACE_BPRINT &&
3651 			trace_flags & TRACE_ITER_PRINTK &&
3652 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3653 		return trace_print_bprintk_msg_only(iter);
3654 
3655 	if (iter->ent->type == TRACE_PRINT &&
3656 			trace_flags & TRACE_ITER_PRINTK &&
3657 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3658 		return trace_print_printk_msg_only(iter);
3659 
3660 	if (trace_flags & TRACE_ITER_BIN)
3661 		return print_bin_fmt(iter);
3662 
3663 	if (trace_flags & TRACE_ITER_HEX)
3664 		return print_hex_fmt(iter);
3665 
3666 	if (trace_flags & TRACE_ITER_RAW)
3667 		return print_raw_fmt(iter);
3668 
3669 	return print_trace_fmt(iter);
3670 }
3671 
3672 void trace_latency_header(struct seq_file *m)
3673 {
3674 	struct trace_iterator *iter = m->private;
3675 	struct trace_array *tr = iter->tr;
3676 
3677 	/* print nothing if the buffers are empty */
3678 	if (trace_empty(iter))
3679 		return;
3680 
3681 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3682 		print_trace_header(m, iter);
3683 
3684 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3685 		print_lat_help_header(m);
3686 }
3687 
3688 void trace_default_header(struct seq_file *m)
3689 {
3690 	struct trace_iterator *iter = m->private;
3691 	struct trace_array *tr = iter->tr;
3692 	unsigned long trace_flags = tr->trace_flags;
3693 
3694 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3695 		return;
3696 
3697 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3698 		/* print nothing if the buffers are empty */
3699 		if (trace_empty(iter))
3700 			return;
3701 		print_trace_header(m, iter);
3702 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3703 			print_lat_help_header(m);
3704 	} else {
3705 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3706 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3707 				print_func_help_header_irq(iter->trace_buffer,
3708 							   m, trace_flags);
3709 			else
3710 				print_func_help_header(iter->trace_buffer, m,
3711 						       trace_flags);
3712 		}
3713 	}
3714 }
3715 
3716 static void test_ftrace_alive(struct seq_file *m)
3717 {
3718 	if (!ftrace_is_dead())
3719 		return;
3720 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3721 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3722 }
3723 
3724 #ifdef CONFIG_TRACER_MAX_TRACE
3725 static void show_snapshot_main_help(struct seq_file *m)
3726 {
3727 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3728 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3729 		    "#                      Takes a snapshot of the main buffer.\n"
3730 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3731 		    "#                      (Doesn't have to be '2' works with any number that\n"
3732 		    "#                       is not a '0' or '1')\n");
3733 }
3734 
3735 static void show_snapshot_percpu_help(struct seq_file *m)
3736 {
3737 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3738 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3739 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3740 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3741 #else
3742 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3743 		    "#                     Must use main snapshot file to allocate.\n");
3744 #endif
3745 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3746 		    "#                      (Doesn't have to be '2' works with any number that\n"
3747 		    "#                       is not a '0' or '1')\n");
3748 }
3749 
3750 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3751 {
3752 	if (iter->tr->allocated_snapshot)
3753 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3754 	else
3755 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3756 
3757 	seq_puts(m, "# Snapshot commands:\n");
3758 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3759 		show_snapshot_main_help(m);
3760 	else
3761 		show_snapshot_percpu_help(m);
3762 }
3763 #else
3764 /* Should never be called */
3765 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3766 #endif
3767 
3768 static int s_show(struct seq_file *m, void *v)
3769 {
3770 	struct trace_iterator *iter = v;
3771 	int ret;
3772 
3773 	if (iter->ent == NULL) {
3774 		if (iter->tr) {
3775 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3776 			seq_puts(m, "#\n");
3777 			test_ftrace_alive(m);
3778 		}
3779 		if (iter->snapshot && trace_empty(iter))
3780 			print_snapshot_help(m, iter);
3781 		else if (iter->trace && iter->trace->print_header)
3782 			iter->trace->print_header(m);
3783 		else
3784 			trace_default_header(m);
3785 
3786 	} else if (iter->leftover) {
3787 		/*
3788 		 * If we filled the seq_file buffer earlier, we
3789 		 * want to just show it now.
3790 		 */
3791 		ret = trace_print_seq(m, &iter->seq);
3792 
3793 		/* ret should this time be zero, but you never know */
3794 		iter->leftover = ret;
3795 
3796 	} else {
3797 		print_trace_line(iter);
3798 		ret = trace_print_seq(m, &iter->seq);
3799 		/*
3800 		 * If we overflow the seq_file buffer, then it will
3801 		 * ask us for this data again at start up.
3802 		 * Use that instead.
3803 		 *  ret is 0 if seq_file write succeeded.
3804 		 *        -1 otherwise.
3805 		 */
3806 		iter->leftover = ret;
3807 	}
3808 
3809 	return 0;
3810 }
3811 
3812 /*
3813  * Should be used after trace_array_get(), trace_types_lock
3814  * ensures that i_cdev was already initialized.
3815  */
3816 static inline int tracing_get_cpu(struct inode *inode)
3817 {
3818 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3819 		return (long)inode->i_cdev - 1;
3820 	return RING_BUFFER_ALL_CPUS;
3821 }
3822 
3823 static const struct seq_operations tracer_seq_ops = {
3824 	.start		= s_start,
3825 	.next		= s_next,
3826 	.stop		= s_stop,
3827 	.show		= s_show,
3828 };
3829 
3830 static struct trace_iterator *
3831 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3832 {
3833 	struct trace_array *tr = inode->i_private;
3834 	struct trace_iterator *iter;
3835 	int cpu;
3836 
3837 	if (tracing_disabled)
3838 		return ERR_PTR(-ENODEV);
3839 
3840 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3841 	if (!iter)
3842 		return ERR_PTR(-ENOMEM);
3843 
3844 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3845 				    GFP_KERNEL);
3846 	if (!iter->buffer_iter)
3847 		goto release;
3848 
3849 	/*
3850 	 * We make a copy of the current tracer to avoid concurrent
3851 	 * changes on it while we are reading.
3852 	 */
3853 	mutex_lock(&trace_types_lock);
3854 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3855 	if (!iter->trace)
3856 		goto fail;
3857 
3858 	*iter->trace = *tr->current_trace;
3859 
3860 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3861 		goto fail;
3862 
3863 	iter->tr = tr;
3864 
3865 #ifdef CONFIG_TRACER_MAX_TRACE
3866 	/* Currently only the top directory has a snapshot */
3867 	if (tr->current_trace->print_max || snapshot)
3868 		iter->trace_buffer = &tr->max_buffer;
3869 	else
3870 #endif
3871 		iter->trace_buffer = &tr->trace_buffer;
3872 	iter->snapshot = snapshot;
3873 	iter->pos = -1;
3874 	iter->cpu_file = tracing_get_cpu(inode);
3875 	mutex_init(&iter->mutex);
3876 
3877 	/* Notify the tracer early; before we stop tracing. */
3878 	if (iter->trace && iter->trace->open)
3879 		iter->trace->open(iter);
3880 
3881 	/* Annotate start of buffers if we had overruns */
3882 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3883 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3884 
3885 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3886 	if (trace_clocks[tr->clock_id].in_ns)
3887 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3888 
3889 	/* stop the trace while dumping if we are not opening "snapshot" */
3890 	if (!iter->snapshot)
3891 		tracing_stop_tr(tr);
3892 
3893 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3894 		for_each_tracing_cpu(cpu) {
3895 			iter->buffer_iter[cpu] =
3896 				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3897 		}
3898 		ring_buffer_read_prepare_sync();
3899 		for_each_tracing_cpu(cpu) {
3900 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3901 			tracing_iter_reset(iter, cpu);
3902 		}
3903 	} else {
3904 		cpu = iter->cpu_file;
3905 		iter->buffer_iter[cpu] =
3906 			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3907 		ring_buffer_read_prepare_sync();
3908 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3909 		tracing_iter_reset(iter, cpu);
3910 	}
3911 
3912 	mutex_unlock(&trace_types_lock);
3913 
3914 	return iter;
3915 
3916  fail:
3917 	mutex_unlock(&trace_types_lock);
3918 	kfree(iter->trace);
3919 	kfree(iter->buffer_iter);
3920 release:
3921 	seq_release_private(inode, file);
3922 	return ERR_PTR(-ENOMEM);
3923 }
3924 
3925 int tracing_open_generic(struct inode *inode, struct file *filp)
3926 {
3927 	if (tracing_disabled)
3928 		return -ENODEV;
3929 
3930 	filp->private_data = inode->i_private;
3931 	return 0;
3932 }
3933 
3934 bool tracing_is_disabled(void)
3935 {
3936 	return (tracing_disabled) ? true: false;
3937 }
3938 
3939 /*
3940  * Open and update trace_array ref count.
3941  * Must have the current trace_array passed to it.
3942  */
3943 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3944 {
3945 	struct trace_array *tr = inode->i_private;
3946 
3947 	if (tracing_disabled)
3948 		return -ENODEV;
3949 
3950 	if (trace_array_get(tr) < 0)
3951 		return -ENODEV;
3952 
3953 	filp->private_data = inode->i_private;
3954 
3955 	return 0;
3956 }
3957 
3958 static int tracing_release(struct inode *inode, struct file *file)
3959 {
3960 	struct trace_array *tr = inode->i_private;
3961 	struct seq_file *m = file->private_data;
3962 	struct trace_iterator *iter;
3963 	int cpu;
3964 
3965 	if (!(file->f_mode & FMODE_READ)) {
3966 		trace_array_put(tr);
3967 		return 0;
3968 	}
3969 
3970 	/* Writes do not use seq_file */
3971 	iter = m->private;
3972 	mutex_lock(&trace_types_lock);
3973 
3974 	for_each_tracing_cpu(cpu) {
3975 		if (iter->buffer_iter[cpu])
3976 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3977 	}
3978 
3979 	if (iter->trace && iter->trace->close)
3980 		iter->trace->close(iter);
3981 
3982 	if (!iter->snapshot)
3983 		/* reenable tracing if it was previously enabled */
3984 		tracing_start_tr(tr);
3985 
3986 	__trace_array_put(tr);
3987 
3988 	mutex_unlock(&trace_types_lock);
3989 
3990 	mutex_destroy(&iter->mutex);
3991 	free_cpumask_var(iter->started);
3992 	kfree(iter->trace);
3993 	kfree(iter->buffer_iter);
3994 	seq_release_private(inode, file);
3995 
3996 	return 0;
3997 }
3998 
3999 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4000 {
4001 	struct trace_array *tr = inode->i_private;
4002 
4003 	trace_array_put(tr);
4004 	return 0;
4005 }
4006 
4007 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4008 {
4009 	struct trace_array *tr = inode->i_private;
4010 
4011 	trace_array_put(tr);
4012 
4013 	return single_release(inode, file);
4014 }
4015 
4016 static int tracing_open(struct inode *inode, struct file *file)
4017 {
4018 	struct trace_array *tr = inode->i_private;
4019 	struct trace_iterator *iter;
4020 	int ret = 0;
4021 
4022 	if (trace_array_get(tr) < 0)
4023 		return -ENODEV;
4024 
4025 	/* If this file was open for write, then erase contents */
4026 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4027 		int cpu = tracing_get_cpu(inode);
4028 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4029 
4030 #ifdef CONFIG_TRACER_MAX_TRACE
4031 		if (tr->current_trace->print_max)
4032 			trace_buf = &tr->max_buffer;
4033 #endif
4034 
4035 		if (cpu == RING_BUFFER_ALL_CPUS)
4036 			tracing_reset_online_cpus(trace_buf);
4037 		else
4038 			tracing_reset(trace_buf, cpu);
4039 	}
4040 
4041 	if (file->f_mode & FMODE_READ) {
4042 		iter = __tracing_open(inode, file, false);
4043 		if (IS_ERR(iter))
4044 			ret = PTR_ERR(iter);
4045 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4046 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4047 	}
4048 
4049 	if (ret < 0)
4050 		trace_array_put(tr);
4051 
4052 	return ret;
4053 }
4054 
4055 /*
4056  * Some tracers are not suitable for instance buffers.
4057  * A tracer is always available for the global array (toplevel)
4058  * or if it explicitly states that it is.
4059  */
4060 static bool
4061 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4062 {
4063 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4064 }
4065 
4066 /* Find the next tracer that this trace array may use */
4067 static struct tracer *
4068 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4069 {
4070 	while (t && !trace_ok_for_array(t, tr))
4071 		t = t->next;
4072 
4073 	return t;
4074 }
4075 
4076 static void *
4077 t_next(struct seq_file *m, void *v, loff_t *pos)
4078 {
4079 	struct trace_array *tr = m->private;
4080 	struct tracer *t = v;
4081 
4082 	(*pos)++;
4083 
4084 	if (t)
4085 		t = get_tracer_for_array(tr, t->next);
4086 
4087 	return t;
4088 }
4089 
4090 static void *t_start(struct seq_file *m, loff_t *pos)
4091 {
4092 	struct trace_array *tr = m->private;
4093 	struct tracer *t;
4094 	loff_t l = 0;
4095 
4096 	mutex_lock(&trace_types_lock);
4097 
4098 	t = get_tracer_for_array(tr, trace_types);
4099 	for (; t && l < *pos; t = t_next(m, t, &l))
4100 			;
4101 
4102 	return t;
4103 }
4104 
4105 static void t_stop(struct seq_file *m, void *p)
4106 {
4107 	mutex_unlock(&trace_types_lock);
4108 }
4109 
4110 static int t_show(struct seq_file *m, void *v)
4111 {
4112 	struct tracer *t = v;
4113 
4114 	if (!t)
4115 		return 0;
4116 
4117 	seq_puts(m, t->name);
4118 	if (t->next)
4119 		seq_putc(m, ' ');
4120 	else
4121 		seq_putc(m, '\n');
4122 
4123 	return 0;
4124 }
4125 
4126 static const struct seq_operations show_traces_seq_ops = {
4127 	.start		= t_start,
4128 	.next		= t_next,
4129 	.stop		= t_stop,
4130 	.show		= t_show,
4131 };
4132 
4133 static int show_traces_open(struct inode *inode, struct file *file)
4134 {
4135 	struct trace_array *tr = inode->i_private;
4136 	struct seq_file *m;
4137 	int ret;
4138 
4139 	if (tracing_disabled)
4140 		return -ENODEV;
4141 
4142 	ret = seq_open(file, &show_traces_seq_ops);
4143 	if (ret)
4144 		return ret;
4145 
4146 	m = file->private_data;
4147 	m->private = tr;
4148 
4149 	return 0;
4150 }
4151 
4152 static ssize_t
4153 tracing_write_stub(struct file *filp, const char __user *ubuf,
4154 		   size_t count, loff_t *ppos)
4155 {
4156 	return count;
4157 }
4158 
4159 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4160 {
4161 	int ret;
4162 
4163 	if (file->f_mode & FMODE_READ)
4164 		ret = seq_lseek(file, offset, whence);
4165 	else
4166 		file->f_pos = ret = 0;
4167 
4168 	return ret;
4169 }
4170 
4171 static const struct file_operations tracing_fops = {
4172 	.open		= tracing_open,
4173 	.read		= seq_read,
4174 	.write		= tracing_write_stub,
4175 	.llseek		= tracing_lseek,
4176 	.release	= tracing_release,
4177 };
4178 
4179 static const struct file_operations show_traces_fops = {
4180 	.open		= show_traces_open,
4181 	.read		= seq_read,
4182 	.release	= seq_release,
4183 	.llseek		= seq_lseek,
4184 };
4185 
4186 static ssize_t
4187 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4188 		     size_t count, loff_t *ppos)
4189 {
4190 	struct trace_array *tr = file_inode(filp)->i_private;
4191 	char *mask_str;
4192 	int len;
4193 
4194 	len = snprintf(NULL, 0, "%*pb\n",
4195 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4196 	mask_str = kmalloc(len, GFP_KERNEL);
4197 	if (!mask_str)
4198 		return -ENOMEM;
4199 
4200 	len = snprintf(mask_str, len, "%*pb\n",
4201 		       cpumask_pr_args(tr->tracing_cpumask));
4202 	if (len >= count) {
4203 		count = -EINVAL;
4204 		goto out_err;
4205 	}
4206 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4207 
4208 out_err:
4209 	kfree(mask_str);
4210 
4211 	return count;
4212 }
4213 
4214 static ssize_t
4215 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4216 		      size_t count, loff_t *ppos)
4217 {
4218 	struct trace_array *tr = file_inode(filp)->i_private;
4219 	cpumask_var_t tracing_cpumask_new;
4220 	int err, cpu;
4221 
4222 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4223 		return -ENOMEM;
4224 
4225 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4226 	if (err)
4227 		goto err_unlock;
4228 
4229 	local_irq_disable();
4230 	arch_spin_lock(&tr->max_lock);
4231 	for_each_tracing_cpu(cpu) {
4232 		/*
4233 		 * Increase/decrease the disabled counter if we are
4234 		 * about to flip a bit in the cpumask:
4235 		 */
4236 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4237 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4238 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4239 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4240 		}
4241 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4242 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4243 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4244 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4245 		}
4246 	}
4247 	arch_spin_unlock(&tr->max_lock);
4248 	local_irq_enable();
4249 
4250 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4251 	free_cpumask_var(tracing_cpumask_new);
4252 
4253 	return count;
4254 
4255 err_unlock:
4256 	free_cpumask_var(tracing_cpumask_new);
4257 
4258 	return err;
4259 }
4260 
4261 static const struct file_operations tracing_cpumask_fops = {
4262 	.open		= tracing_open_generic_tr,
4263 	.read		= tracing_cpumask_read,
4264 	.write		= tracing_cpumask_write,
4265 	.release	= tracing_release_generic_tr,
4266 	.llseek		= generic_file_llseek,
4267 };
4268 
4269 static int tracing_trace_options_show(struct seq_file *m, void *v)
4270 {
4271 	struct tracer_opt *trace_opts;
4272 	struct trace_array *tr = m->private;
4273 	u32 tracer_flags;
4274 	int i;
4275 
4276 	mutex_lock(&trace_types_lock);
4277 	tracer_flags = tr->current_trace->flags->val;
4278 	trace_opts = tr->current_trace->flags->opts;
4279 
4280 	for (i = 0; trace_options[i]; i++) {
4281 		if (tr->trace_flags & (1 << i))
4282 			seq_printf(m, "%s\n", trace_options[i]);
4283 		else
4284 			seq_printf(m, "no%s\n", trace_options[i]);
4285 	}
4286 
4287 	for (i = 0; trace_opts[i].name; i++) {
4288 		if (tracer_flags & trace_opts[i].bit)
4289 			seq_printf(m, "%s\n", trace_opts[i].name);
4290 		else
4291 			seq_printf(m, "no%s\n", trace_opts[i].name);
4292 	}
4293 	mutex_unlock(&trace_types_lock);
4294 
4295 	return 0;
4296 }
4297 
4298 static int __set_tracer_option(struct trace_array *tr,
4299 			       struct tracer_flags *tracer_flags,
4300 			       struct tracer_opt *opts, int neg)
4301 {
4302 	struct tracer *trace = tracer_flags->trace;
4303 	int ret;
4304 
4305 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4306 	if (ret)
4307 		return ret;
4308 
4309 	if (neg)
4310 		tracer_flags->val &= ~opts->bit;
4311 	else
4312 		tracer_flags->val |= opts->bit;
4313 	return 0;
4314 }
4315 
4316 /* Try to assign a tracer specific option */
4317 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4318 {
4319 	struct tracer *trace = tr->current_trace;
4320 	struct tracer_flags *tracer_flags = trace->flags;
4321 	struct tracer_opt *opts = NULL;
4322 	int i;
4323 
4324 	for (i = 0; tracer_flags->opts[i].name; i++) {
4325 		opts = &tracer_flags->opts[i];
4326 
4327 		if (strcmp(cmp, opts->name) == 0)
4328 			return __set_tracer_option(tr, trace->flags, opts, neg);
4329 	}
4330 
4331 	return -EINVAL;
4332 }
4333 
4334 /* Some tracers require overwrite to stay enabled */
4335 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4336 {
4337 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4338 		return -1;
4339 
4340 	return 0;
4341 }
4342 
4343 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4344 {
4345 	/* do nothing if flag is already set */
4346 	if (!!(tr->trace_flags & mask) == !!enabled)
4347 		return 0;
4348 
4349 	/* Give the tracer a chance to approve the change */
4350 	if (tr->current_trace->flag_changed)
4351 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4352 			return -EINVAL;
4353 
4354 	if (enabled)
4355 		tr->trace_flags |= mask;
4356 	else
4357 		tr->trace_flags &= ~mask;
4358 
4359 	if (mask == TRACE_ITER_RECORD_CMD)
4360 		trace_event_enable_cmd_record(enabled);
4361 
4362 	if (mask == TRACE_ITER_RECORD_TGID) {
4363 		if (!tgid_map)
4364 			tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4365 					   GFP_KERNEL);
4366 		if (!tgid_map) {
4367 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4368 			return -ENOMEM;
4369 		}
4370 
4371 		trace_event_enable_tgid_record(enabled);
4372 	}
4373 
4374 	if (mask == TRACE_ITER_EVENT_FORK)
4375 		trace_event_follow_fork(tr, enabled);
4376 
4377 	if (mask == TRACE_ITER_FUNC_FORK)
4378 		ftrace_pid_follow_fork(tr, enabled);
4379 
4380 	if (mask == TRACE_ITER_OVERWRITE) {
4381 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4382 #ifdef CONFIG_TRACER_MAX_TRACE
4383 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4384 #endif
4385 	}
4386 
4387 	if (mask == TRACE_ITER_PRINTK) {
4388 		trace_printk_start_stop_comm(enabled);
4389 		trace_printk_control(enabled);
4390 	}
4391 
4392 	return 0;
4393 }
4394 
4395 static int trace_set_options(struct trace_array *tr, char *option)
4396 {
4397 	char *cmp;
4398 	int neg = 0;
4399 	int ret;
4400 	size_t orig_len = strlen(option);
4401 
4402 	cmp = strstrip(option);
4403 
4404 	if (strncmp(cmp, "no", 2) == 0) {
4405 		neg = 1;
4406 		cmp += 2;
4407 	}
4408 
4409 	mutex_lock(&trace_types_lock);
4410 
4411 	ret = match_string(trace_options, -1, cmp);
4412 	/* If no option could be set, test the specific tracer options */
4413 	if (ret < 0)
4414 		ret = set_tracer_option(tr, cmp, neg);
4415 	else
4416 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4417 
4418 	mutex_unlock(&trace_types_lock);
4419 
4420 	/*
4421 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4422 	 * turn it back into a space.
4423 	 */
4424 	if (orig_len > strlen(option))
4425 		option[strlen(option)] = ' ';
4426 
4427 	return ret;
4428 }
4429 
4430 static void __init apply_trace_boot_options(void)
4431 {
4432 	char *buf = trace_boot_options_buf;
4433 	char *option;
4434 
4435 	while (true) {
4436 		option = strsep(&buf, ",");
4437 
4438 		if (!option)
4439 			break;
4440 
4441 		if (*option)
4442 			trace_set_options(&global_trace, option);
4443 
4444 		/* Put back the comma to allow this to be called again */
4445 		if (buf)
4446 			*(buf - 1) = ',';
4447 	}
4448 }
4449 
4450 static ssize_t
4451 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4452 			size_t cnt, loff_t *ppos)
4453 {
4454 	struct seq_file *m = filp->private_data;
4455 	struct trace_array *tr = m->private;
4456 	char buf[64];
4457 	int ret;
4458 
4459 	if (cnt >= sizeof(buf))
4460 		return -EINVAL;
4461 
4462 	if (copy_from_user(buf, ubuf, cnt))
4463 		return -EFAULT;
4464 
4465 	buf[cnt] = 0;
4466 
4467 	ret = trace_set_options(tr, buf);
4468 	if (ret < 0)
4469 		return ret;
4470 
4471 	*ppos += cnt;
4472 
4473 	return cnt;
4474 }
4475 
4476 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4477 {
4478 	struct trace_array *tr = inode->i_private;
4479 	int ret;
4480 
4481 	if (tracing_disabled)
4482 		return -ENODEV;
4483 
4484 	if (trace_array_get(tr) < 0)
4485 		return -ENODEV;
4486 
4487 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4488 	if (ret < 0)
4489 		trace_array_put(tr);
4490 
4491 	return ret;
4492 }
4493 
4494 static const struct file_operations tracing_iter_fops = {
4495 	.open		= tracing_trace_options_open,
4496 	.read		= seq_read,
4497 	.llseek		= seq_lseek,
4498 	.release	= tracing_single_release_tr,
4499 	.write		= tracing_trace_options_write,
4500 };
4501 
4502 static const char readme_msg[] =
4503 	"tracing mini-HOWTO:\n\n"
4504 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4505 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4506 	" Important files:\n"
4507 	"  trace\t\t\t- The static contents of the buffer\n"
4508 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4509 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4510 	"  current_tracer\t- function and latency tracers\n"
4511 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4512 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4513 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4514 	"  trace_clock\t\t-change the clock used to order events\n"
4515 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4516 	"      global:   Synced across CPUs but slows tracing down.\n"
4517 	"     counter:   Not a clock, but just an increment\n"
4518 	"      uptime:   Jiffy counter from time of boot\n"
4519 	"        perf:   Same clock that perf events use\n"
4520 #ifdef CONFIG_X86_64
4521 	"     x86-tsc:   TSC cycle counter\n"
4522 #endif
4523 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
4524 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4525 	"    absolute:   Absolute (standalone) timestamp\n"
4526 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4527 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4528 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4529 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4530 	"\t\t\t  Remove sub-buffer with rmdir\n"
4531 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4532 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4533 	"\t\t\t  option name\n"
4534 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4535 #ifdef CONFIG_DYNAMIC_FTRACE
4536 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4537 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4538 	"\t\t\t  functions\n"
4539 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4540 	"\t     modules: Can select a group via module\n"
4541 	"\t      Format: :mod:<module-name>\n"
4542 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4543 	"\t    triggers: a command to perform when function is hit\n"
4544 	"\t      Format: <function>:<trigger>[:count]\n"
4545 	"\t     trigger: traceon, traceoff\n"
4546 	"\t\t      enable_event:<system>:<event>\n"
4547 	"\t\t      disable_event:<system>:<event>\n"
4548 #ifdef CONFIG_STACKTRACE
4549 	"\t\t      stacktrace\n"
4550 #endif
4551 #ifdef CONFIG_TRACER_SNAPSHOT
4552 	"\t\t      snapshot\n"
4553 #endif
4554 	"\t\t      dump\n"
4555 	"\t\t      cpudump\n"
4556 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4557 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4558 	"\t     The first one will disable tracing every time do_fault is hit\n"
4559 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4560 	"\t       The first time do trap is hit and it disables tracing, the\n"
4561 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4562 	"\t       the counter will not decrement. It only decrements when the\n"
4563 	"\t       trigger did work\n"
4564 	"\t     To remove trigger without count:\n"
4565 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4566 	"\t     To remove trigger with a count:\n"
4567 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4568 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4569 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4570 	"\t    modules: Can select a group via module command :mod:\n"
4571 	"\t    Does not accept triggers\n"
4572 #endif /* CONFIG_DYNAMIC_FTRACE */
4573 #ifdef CONFIG_FUNCTION_TRACER
4574 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4575 	"\t\t    (function)\n"
4576 #endif
4577 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4578 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4579 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4580 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4581 #endif
4582 #ifdef CONFIG_TRACER_SNAPSHOT
4583 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4584 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4585 	"\t\t\t  information\n"
4586 #endif
4587 #ifdef CONFIG_STACK_TRACER
4588 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4589 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4590 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4591 	"\t\t\t  new trace)\n"
4592 #ifdef CONFIG_DYNAMIC_FTRACE
4593 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4594 	"\t\t\t  traces\n"
4595 #endif
4596 #endif /* CONFIG_STACK_TRACER */
4597 #ifdef CONFIG_KPROBE_EVENTS
4598 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4599 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4600 #endif
4601 #ifdef CONFIG_UPROBE_EVENTS
4602 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4603 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4604 #endif
4605 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4606 	"\t  accepts: event-definitions (one definition per line)\n"
4607 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4608 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4609 	"\t           -:[<group>/]<event>\n"
4610 #ifdef CONFIG_KPROBE_EVENTS
4611 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4612   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4613 #endif
4614 #ifdef CONFIG_UPROBE_EVENTS
4615 	"\t    place: <path>:<offset>\n"
4616 #endif
4617 	"\t     args: <name>=fetcharg[:type]\n"
4618 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4619 	"\t           $stack<index>, $stack, $retval, $comm\n"
4620 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4621 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4622 #endif
4623 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4624 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4625 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4626 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4627 	"\t\t\t  events\n"
4628 	"      filter\t\t- If set, only events passing filter are traced\n"
4629 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4630 	"\t\t\t  <event>:\n"
4631 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4632 	"      filter\t\t- If set, only events passing filter are traced\n"
4633 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4634 	"\t    Format: <trigger>[:count][if <filter>]\n"
4635 	"\t   trigger: traceon, traceoff\n"
4636 	"\t            enable_event:<system>:<event>\n"
4637 	"\t            disable_event:<system>:<event>\n"
4638 #ifdef CONFIG_HIST_TRIGGERS
4639 	"\t            enable_hist:<system>:<event>\n"
4640 	"\t            disable_hist:<system>:<event>\n"
4641 #endif
4642 #ifdef CONFIG_STACKTRACE
4643 	"\t\t    stacktrace\n"
4644 #endif
4645 #ifdef CONFIG_TRACER_SNAPSHOT
4646 	"\t\t    snapshot\n"
4647 #endif
4648 #ifdef CONFIG_HIST_TRIGGERS
4649 	"\t\t    hist (see below)\n"
4650 #endif
4651 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4652 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4653 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4654 	"\t                  events/block/block_unplug/trigger\n"
4655 	"\t   The first disables tracing every time block_unplug is hit.\n"
4656 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4657 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4658 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4659 	"\t   Like function triggers, the counter is only decremented if it\n"
4660 	"\t    enabled or disabled tracing.\n"
4661 	"\t   To remove a trigger without a count:\n"
4662 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4663 	"\t   To remove a trigger with a count:\n"
4664 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4665 	"\t   Filters can be ignored when removing a trigger.\n"
4666 #ifdef CONFIG_HIST_TRIGGERS
4667 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4668 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4669 	"\t            [:values=<field1[,field2,...]>]\n"
4670 	"\t            [:sort=<field1[,field2,...]>]\n"
4671 	"\t            [:size=#entries]\n"
4672 	"\t            [:pause][:continue][:clear]\n"
4673 	"\t            [:name=histname1]\n"
4674 	"\t            [if <filter>]\n\n"
4675 	"\t    When a matching event is hit, an entry is added to a hash\n"
4676 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4677 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4678 	"\t    correspond to fields in the event's format description.  Keys\n"
4679 	"\t    can be any field, or the special string 'stacktrace'.\n"
4680 	"\t    Compound keys consisting of up to two fields can be specified\n"
4681 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4682 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4683 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4684 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4685 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4686 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4687 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4688 	"\t    its histogram data will be shared with other triggers of the\n"
4689 	"\t    same name, and trigger hits will update this common data.\n\n"
4690 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4691 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4692 	"\t    triggers attached to an event, there will be a table for each\n"
4693 	"\t    trigger in the output.  The table displayed for a named\n"
4694 	"\t    trigger will be the same as any other instance having the\n"
4695 	"\t    same name.  The default format used to display a given field\n"
4696 	"\t    can be modified by appending any of the following modifiers\n"
4697 	"\t    to the field name, as applicable:\n\n"
4698 	"\t            .hex        display a number as a hex value\n"
4699 	"\t            .sym        display an address as a symbol\n"
4700 	"\t            .sym-offset display an address as a symbol and offset\n"
4701 	"\t            .execname   display a common_pid as a program name\n"
4702 	"\t            .syscall    display a syscall id as a syscall name\n"
4703 	"\t            .log2       display log2 value rather than raw number\n"
4704 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
4705 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4706 	"\t    trigger or to start a hist trigger but not log any events\n"
4707 	"\t    until told to do so.  'continue' can be used to start or\n"
4708 	"\t    restart a paused hist trigger.\n\n"
4709 	"\t    The 'clear' parameter will clear the contents of a running\n"
4710 	"\t    hist trigger and leave its current paused/active state\n"
4711 	"\t    unchanged.\n\n"
4712 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4713 	"\t    have one event conditionally start and stop another event's\n"
4714 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4715 	"\t    the enable_event and disable_event triggers.\n"
4716 #endif
4717 ;
4718 
4719 static ssize_t
4720 tracing_readme_read(struct file *filp, char __user *ubuf,
4721 		       size_t cnt, loff_t *ppos)
4722 {
4723 	return simple_read_from_buffer(ubuf, cnt, ppos,
4724 					readme_msg, strlen(readme_msg));
4725 }
4726 
4727 static const struct file_operations tracing_readme_fops = {
4728 	.open		= tracing_open_generic,
4729 	.read		= tracing_readme_read,
4730 	.llseek		= generic_file_llseek,
4731 };
4732 
4733 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4734 {
4735 	int *ptr = v;
4736 
4737 	if (*pos || m->count)
4738 		ptr++;
4739 
4740 	(*pos)++;
4741 
4742 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4743 		if (trace_find_tgid(*ptr))
4744 			return ptr;
4745 	}
4746 
4747 	return NULL;
4748 }
4749 
4750 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4751 {
4752 	void *v;
4753 	loff_t l = 0;
4754 
4755 	if (!tgid_map)
4756 		return NULL;
4757 
4758 	v = &tgid_map[0];
4759 	while (l <= *pos) {
4760 		v = saved_tgids_next(m, v, &l);
4761 		if (!v)
4762 			return NULL;
4763 	}
4764 
4765 	return v;
4766 }
4767 
4768 static void saved_tgids_stop(struct seq_file *m, void *v)
4769 {
4770 }
4771 
4772 static int saved_tgids_show(struct seq_file *m, void *v)
4773 {
4774 	int pid = (int *)v - tgid_map;
4775 
4776 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4777 	return 0;
4778 }
4779 
4780 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4781 	.start		= saved_tgids_start,
4782 	.stop		= saved_tgids_stop,
4783 	.next		= saved_tgids_next,
4784 	.show		= saved_tgids_show,
4785 };
4786 
4787 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4788 {
4789 	if (tracing_disabled)
4790 		return -ENODEV;
4791 
4792 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4793 }
4794 
4795 
4796 static const struct file_operations tracing_saved_tgids_fops = {
4797 	.open		= tracing_saved_tgids_open,
4798 	.read		= seq_read,
4799 	.llseek		= seq_lseek,
4800 	.release	= seq_release,
4801 };
4802 
4803 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4804 {
4805 	unsigned int *ptr = v;
4806 
4807 	if (*pos || m->count)
4808 		ptr++;
4809 
4810 	(*pos)++;
4811 
4812 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4813 	     ptr++) {
4814 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4815 			continue;
4816 
4817 		return ptr;
4818 	}
4819 
4820 	return NULL;
4821 }
4822 
4823 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4824 {
4825 	void *v;
4826 	loff_t l = 0;
4827 
4828 	preempt_disable();
4829 	arch_spin_lock(&trace_cmdline_lock);
4830 
4831 	v = &savedcmd->map_cmdline_to_pid[0];
4832 	while (l <= *pos) {
4833 		v = saved_cmdlines_next(m, v, &l);
4834 		if (!v)
4835 			return NULL;
4836 	}
4837 
4838 	return v;
4839 }
4840 
4841 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4842 {
4843 	arch_spin_unlock(&trace_cmdline_lock);
4844 	preempt_enable();
4845 }
4846 
4847 static int saved_cmdlines_show(struct seq_file *m, void *v)
4848 {
4849 	char buf[TASK_COMM_LEN];
4850 	unsigned int *pid = v;
4851 
4852 	__trace_find_cmdline(*pid, buf);
4853 	seq_printf(m, "%d %s\n", *pid, buf);
4854 	return 0;
4855 }
4856 
4857 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4858 	.start		= saved_cmdlines_start,
4859 	.next		= saved_cmdlines_next,
4860 	.stop		= saved_cmdlines_stop,
4861 	.show		= saved_cmdlines_show,
4862 };
4863 
4864 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4865 {
4866 	if (tracing_disabled)
4867 		return -ENODEV;
4868 
4869 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4870 }
4871 
4872 static const struct file_operations tracing_saved_cmdlines_fops = {
4873 	.open		= tracing_saved_cmdlines_open,
4874 	.read		= seq_read,
4875 	.llseek		= seq_lseek,
4876 	.release	= seq_release,
4877 };
4878 
4879 static ssize_t
4880 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4881 				 size_t cnt, loff_t *ppos)
4882 {
4883 	char buf[64];
4884 	int r;
4885 
4886 	arch_spin_lock(&trace_cmdline_lock);
4887 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4888 	arch_spin_unlock(&trace_cmdline_lock);
4889 
4890 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4891 }
4892 
4893 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4894 {
4895 	kfree(s->saved_cmdlines);
4896 	kfree(s->map_cmdline_to_pid);
4897 	kfree(s);
4898 }
4899 
4900 static int tracing_resize_saved_cmdlines(unsigned int val)
4901 {
4902 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4903 
4904 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4905 	if (!s)
4906 		return -ENOMEM;
4907 
4908 	if (allocate_cmdlines_buffer(val, s) < 0) {
4909 		kfree(s);
4910 		return -ENOMEM;
4911 	}
4912 
4913 	arch_spin_lock(&trace_cmdline_lock);
4914 	savedcmd_temp = savedcmd;
4915 	savedcmd = s;
4916 	arch_spin_unlock(&trace_cmdline_lock);
4917 	free_saved_cmdlines_buffer(savedcmd_temp);
4918 
4919 	return 0;
4920 }
4921 
4922 static ssize_t
4923 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4924 				  size_t cnt, loff_t *ppos)
4925 {
4926 	unsigned long val;
4927 	int ret;
4928 
4929 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4930 	if (ret)
4931 		return ret;
4932 
4933 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4934 	if (!val || val > PID_MAX_DEFAULT)
4935 		return -EINVAL;
4936 
4937 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4938 	if (ret < 0)
4939 		return ret;
4940 
4941 	*ppos += cnt;
4942 
4943 	return cnt;
4944 }
4945 
4946 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4947 	.open		= tracing_open_generic,
4948 	.read		= tracing_saved_cmdlines_size_read,
4949 	.write		= tracing_saved_cmdlines_size_write,
4950 };
4951 
4952 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4953 static union trace_eval_map_item *
4954 update_eval_map(union trace_eval_map_item *ptr)
4955 {
4956 	if (!ptr->map.eval_string) {
4957 		if (ptr->tail.next) {
4958 			ptr = ptr->tail.next;
4959 			/* Set ptr to the next real item (skip head) */
4960 			ptr++;
4961 		} else
4962 			return NULL;
4963 	}
4964 	return ptr;
4965 }
4966 
4967 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4968 {
4969 	union trace_eval_map_item *ptr = v;
4970 
4971 	/*
4972 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4973 	 * This really should never happen.
4974 	 */
4975 	ptr = update_eval_map(ptr);
4976 	if (WARN_ON_ONCE(!ptr))
4977 		return NULL;
4978 
4979 	ptr++;
4980 
4981 	(*pos)++;
4982 
4983 	ptr = update_eval_map(ptr);
4984 
4985 	return ptr;
4986 }
4987 
4988 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4989 {
4990 	union trace_eval_map_item *v;
4991 	loff_t l = 0;
4992 
4993 	mutex_lock(&trace_eval_mutex);
4994 
4995 	v = trace_eval_maps;
4996 	if (v)
4997 		v++;
4998 
4999 	while (v && l < *pos) {
5000 		v = eval_map_next(m, v, &l);
5001 	}
5002 
5003 	return v;
5004 }
5005 
5006 static void eval_map_stop(struct seq_file *m, void *v)
5007 {
5008 	mutex_unlock(&trace_eval_mutex);
5009 }
5010 
5011 static int eval_map_show(struct seq_file *m, void *v)
5012 {
5013 	union trace_eval_map_item *ptr = v;
5014 
5015 	seq_printf(m, "%s %ld (%s)\n",
5016 		   ptr->map.eval_string, ptr->map.eval_value,
5017 		   ptr->map.system);
5018 
5019 	return 0;
5020 }
5021 
5022 static const struct seq_operations tracing_eval_map_seq_ops = {
5023 	.start		= eval_map_start,
5024 	.next		= eval_map_next,
5025 	.stop		= eval_map_stop,
5026 	.show		= eval_map_show,
5027 };
5028 
5029 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5030 {
5031 	if (tracing_disabled)
5032 		return -ENODEV;
5033 
5034 	return seq_open(filp, &tracing_eval_map_seq_ops);
5035 }
5036 
5037 static const struct file_operations tracing_eval_map_fops = {
5038 	.open		= tracing_eval_map_open,
5039 	.read		= seq_read,
5040 	.llseek		= seq_lseek,
5041 	.release	= seq_release,
5042 };
5043 
5044 static inline union trace_eval_map_item *
5045 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5046 {
5047 	/* Return tail of array given the head */
5048 	return ptr + ptr->head.length + 1;
5049 }
5050 
5051 static void
5052 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5053 			   int len)
5054 {
5055 	struct trace_eval_map **stop;
5056 	struct trace_eval_map **map;
5057 	union trace_eval_map_item *map_array;
5058 	union trace_eval_map_item *ptr;
5059 
5060 	stop = start + len;
5061 
5062 	/*
5063 	 * The trace_eval_maps contains the map plus a head and tail item,
5064 	 * where the head holds the module and length of array, and the
5065 	 * tail holds a pointer to the next list.
5066 	 */
5067 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5068 	if (!map_array) {
5069 		pr_warn("Unable to allocate trace eval mapping\n");
5070 		return;
5071 	}
5072 
5073 	mutex_lock(&trace_eval_mutex);
5074 
5075 	if (!trace_eval_maps)
5076 		trace_eval_maps = map_array;
5077 	else {
5078 		ptr = trace_eval_maps;
5079 		for (;;) {
5080 			ptr = trace_eval_jmp_to_tail(ptr);
5081 			if (!ptr->tail.next)
5082 				break;
5083 			ptr = ptr->tail.next;
5084 
5085 		}
5086 		ptr->tail.next = map_array;
5087 	}
5088 	map_array->head.mod = mod;
5089 	map_array->head.length = len;
5090 	map_array++;
5091 
5092 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5093 		map_array->map = **map;
5094 		map_array++;
5095 	}
5096 	memset(map_array, 0, sizeof(*map_array));
5097 
5098 	mutex_unlock(&trace_eval_mutex);
5099 }
5100 
5101 static void trace_create_eval_file(struct dentry *d_tracer)
5102 {
5103 	trace_create_file("eval_map", 0444, d_tracer,
5104 			  NULL, &tracing_eval_map_fops);
5105 }
5106 
5107 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5108 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5109 static inline void trace_insert_eval_map_file(struct module *mod,
5110 			      struct trace_eval_map **start, int len) { }
5111 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5112 
5113 static void trace_insert_eval_map(struct module *mod,
5114 				  struct trace_eval_map **start, int len)
5115 {
5116 	struct trace_eval_map **map;
5117 
5118 	if (len <= 0)
5119 		return;
5120 
5121 	map = start;
5122 
5123 	trace_event_eval_update(map, len);
5124 
5125 	trace_insert_eval_map_file(mod, start, len);
5126 }
5127 
5128 static ssize_t
5129 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5130 		       size_t cnt, loff_t *ppos)
5131 {
5132 	struct trace_array *tr = filp->private_data;
5133 	char buf[MAX_TRACER_SIZE+2];
5134 	int r;
5135 
5136 	mutex_lock(&trace_types_lock);
5137 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5138 	mutex_unlock(&trace_types_lock);
5139 
5140 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5141 }
5142 
5143 int tracer_init(struct tracer *t, struct trace_array *tr)
5144 {
5145 	tracing_reset_online_cpus(&tr->trace_buffer);
5146 	return t->init(tr);
5147 }
5148 
5149 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5150 {
5151 	int cpu;
5152 
5153 	for_each_tracing_cpu(cpu)
5154 		per_cpu_ptr(buf->data, cpu)->entries = val;
5155 }
5156 
5157 #ifdef CONFIG_TRACER_MAX_TRACE
5158 /* resize @tr's buffer to the size of @size_tr's entries */
5159 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5160 					struct trace_buffer *size_buf, int cpu_id)
5161 {
5162 	int cpu, ret = 0;
5163 
5164 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5165 		for_each_tracing_cpu(cpu) {
5166 			ret = ring_buffer_resize(trace_buf->buffer,
5167 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5168 			if (ret < 0)
5169 				break;
5170 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5171 				per_cpu_ptr(size_buf->data, cpu)->entries;
5172 		}
5173 	} else {
5174 		ret = ring_buffer_resize(trace_buf->buffer,
5175 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5176 		if (ret == 0)
5177 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5178 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5179 	}
5180 
5181 	return ret;
5182 }
5183 #endif /* CONFIG_TRACER_MAX_TRACE */
5184 
5185 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5186 					unsigned long size, int cpu)
5187 {
5188 	int ret;
5189 
5190 	/*
5191 	 * If kernel or user changes the size of the ring buffer
5192 	 * we use the size that was given, and we can forget about
5193 	 * expanding it later.
5194 	 */
5195 	ring_buffer_expanded = true;
5196 
5197 	/* May be called before buffers are initialized */
5198 	if (!tr->trace_buffer.buffer)
5199 		return 0;
5200 
5201 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5202 	if (ret < 0)
5203 		return ret;
5204 
5205 #ifdef CONFIG_TRACER_MAX_TRACE
5206 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5207 	    !tr->current_trace->use_max_tr)
5208 		goto out;
5209 
5210 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5211 	if (ret < 0) {
5212 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5213 						     &tr->trace_buffer, cpu);
5214 		if (r < 0) {
5215 			/*
5216 			 * AARGH! We are left with different
5217 			 * size max buffer!!!!
5218 			 * The max buffer is our "snapshot" buffer.
5219 			 * When a tracer needs a snapshot (one of the
5220 			 * latency tracers), it swaps the max buffer
5221 			 * with the saved snap shot. We succeeded to
5222 			 * update the size of the main buffer, but failed to
5223 			 * update the size of the max buffer. But when we tried
5224 			 * to reset the main buffer to the original size, we
5225 			 * failed there too. This is very unlikely to
5226 			 * happen, but if it does, warn and kill all
5227 			 * tracing.
5228 			 */
5229 			WARN_ON(1);
5230 			tracing_disabled = 1;
5231 		}
5232 		return ret;
5233 	}
5234 
5235 	if (cpu == RING_BUFFER_ALL_CPUS)
5236 		set_buffer_entries(&tr->max_buffer, size);
5237 	else
5238 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5239 
5240  out:
5241 #endif /* CONFIG_TRACER_MAX_TRACE */
5242 
5243 	if (cpu == RING_BUFFER_ALL_CPUS)
5244 		set_buffer_entries(&tr->trace_buffer, size);
5245 	else
5246 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5247 
5248 	return ret;
5249 }
5250 
5251 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5252 					  unsigned long size, int cpu_id)
5253 {
5254 	int ret = size;
5255 
5256 	mutex_lock(&trace_types_lock);
5257 
5258 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5259 		/* make sure, this cpu is enabled in the mask */
5260 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5261 			ret = -EINVAL;
5262 			goto out;
5263 		}
5264 	}
5265 
5266 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5267 	if (ret < 0)
5268 		ret = -ENOMEM;
5269 
5270 out:
5271 	mutex_unlock(&trace_types_lock);
5272 
5273 	return ret;
5274 }
5275 
5276 
5277 /**
5278  * tracing_update_buffers - used by tracing facility to expand ring buffers
5279  *
5280  * To save on memory when the tracing is never used on a system with it
5281  * configured in. The ring buffers are set to a minimum size. But once
5282  * a user starts to use the tracing facility, then they need to grow
5283  * to their default size.
5284  *
5285  * This function is to be called when a tracer is about to be used.
5286  */
5287 int tracing_update_buffers(void)
5288 {
5289 	int ret = 0;
5290 
5291 	mutex_lock(&trace_types_lock);
5292 	if (!ring_buffer_expanded)
5293 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5294 						RING_BUFFER_ALL_CPUS);
5295 	mutex_unlock(&trace_types_lock);
5296 
5297 	return ret;
5298 }
5299 
5300 struct trace_option_dentry;
5301 
5302 static void
5303 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5304 
5305 /*
5306  * Used to clear out the tracer before deletion of an instance.
5307  * Must have trace_types_lock held.
5308  */
5309 static void tracing_set_nop(struct trace_array *tr)
5310 {
5311 	if (tr->current_trace == &nop_trace)
5312 		return;
5313 
5314 	tr->current_trace->enabled--;
5315 
5316 	if (tr->current_trace->reset)
5317 		tr->current_trace->reset(tr);
5318 
5319 	tr->current_trace = &nop_trace;
5320 }
5321 
5322 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5323 {
5324 	/* Only enable if the directory has been created already. */
5325 	if (!tr->dir)
5326 		return;
5327 
5328 	create_trace_option_files(tr, t);
5329 }
5330 
5331 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5332 {
5333 	struct tracer *t;
5334 #ifdef CONFIG_TRACER_MAX_TRACE
5335 	bool had_max_tr;
5336 #endif
5337 	int ret = 0;
5338 
5339 	mutex_lock(&trace_types_lock);
5340 
5341 	if (!ring_buffer_expanded) {
5342 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5343 						RING_BUFFER_ALL_CPUS);
5344 		if (ret < 0)
5345 			goto out;
5346 		ret = 0;
5347 	}
5348 
5349 	for (t = trace_types; t; t = t->next) {
5350 		if (strcmp(t->name, buf) == 0)
5351 			break;
5352 	}
5353 	if (!t) {
5354 		ret = -EINVAL;
5355 		goto out;
5356 	}
5357 	if (t == tr->current_trace)
5358 		goto out;
5359 
5360 	/* Some tracers won't work on kernel command line */
5361 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5362 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5363 			t->name);
5364 		goto out;
5365 	}
5366 
5367 	/* Some tracers are only allowed for the top level buffer */
5368 	if (!trace_ok_for_array(t, tr)) {
5369 		ret = -EINVAL;
5370 		goto out;
5371 	}
5372 
5373 	/* If trace pipe files are being read, we can't change the tracer */
5374 	if (tr->current_trace->ref) {
5375 		ret = -EBUSY;
5376 		goto out;
5377 	}
5378 
5379 	trace_branch_disable();
5380 
5381 	tr->current_trace->enabled--;
5382 
5383 	if (tr->current_trace->reset)
5384 		tr->current_trace->reset(tr);
5385 
5386 	/* Current trace needs to be nop_trace before synchronize_sched */
5387 	tr->current_trace = &nop_trace;
5388 
5389 #ifdef CONFIG_TRACER_MAX_TRACE
5390 	had_max_tr = tr->allocated_snapshot;
5391 
5392 	if (had_max_tr && !t->use_max_tr) {
5393 		/*
5394 		 * We need to make sure that the update_max_tr sees that
5395 		 * current_trace changed to nop_trace to keep it from
5396 		 * swapping the buffers after we resize it.
5397 		 * The update_max_tr is called from interrupts disabled
5398 		 * so a synchronized_sched() is sufficient.
5399 		 */
5400 		synchronize_sched();
5401 		free_snapshot(tr);
5402 	}
5403 #endif
5404 
5405 #ifdef CONFIG_TRACER_MAX_TRACE
5406 	if (t->use_max_tr && !had_max_tr) {
5407 		ret = tracing_alloc_snapshot_instance(tr);
5408 		if (ret < 0)
5409 			goto out;
5410 	}
5411 #endif
5412 
5413 	if (t->init) {
5414 		ret = tracer_init(t, tr);
5415 		if (ret)
5416 			goto out;
5417 	}
5418 
5419 	tr->current_trace = t;
5420 	tr->current_trace->enabled++;
5421 	trace_branch_enable(tr);
5422  out:
5423 	mutex_unlock(&trace_types_lock);
5424 
5425 	return ret;
5426 }
5427 
5428 static ssize_t
5429 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5430 			size_t cnt, loff_t *ppos)
5431 {
5432 	struct trace_array *tr = filp->private_data;
5433 	char buf[MAX_TRACER_SIZE+1];
5434 	int i;
5435 	size_t ret;
5436 	int err;
5437 
5438 	ret = cnt;
5439 
5440 	if (cnt > MAX_TRACER_SIZE)
5441 		cnt = MAX_TRACER_SIZE;
5442 
5443 	if (copy_from_user(buf, ubuf, cnt))
5444 		return -EFAULT;
5445 
5446 	buf[cnt] = 0;
5447 
5448 	/* strip ending whitespace. */
5449 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5450 		buf[i] = 0;
5451 
5452 	err = tracing_set_tracer(tr, buf);
5453 	if (err)
5454 		return err;
5455 
5456 	*ppos += ret;
5457 
5458 	return ret;
5459 }
5460 
5461 static ssize_t
5462 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5463 		   size_t cnt, loff_t *ppos)
5464 {
5465 	char buf[64];
5466 	int r;
5467 
5468 	r = snprintf(buf, sizeof(buf), "%ld\n",
5469 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5470 	if (r > sizeof(buf))
5471 		r = sizeof(buf);
5472 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5473 }
5474 
5475 static ssize_t
5476 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5477 		    size_t cnt, loff_t *ppos)
5478 {
5479 	unsigned long val;
5480 	int ret;
5481 
5482 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5483 	if (ret)
5484 		return ret;
5485 
5486 	*ptr = val * 1000;
5487 
5488 	return cnt;
5489 }
5490 
5491 static ssize_t
5492 tracing_thresh_read(struct file *filp, char __user *ubuf,
5493 		    size_t cnt, loff_t *ppos)
5494 {
5495 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5496 }
5497 
5498 static ssize_t
5499 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5500 		     size_t cnt, loff_t *ppos)
5501 {
5502 	struct trace_array *tr = filp->private_data;
5503 	int ret;
5504 
5505 	mutex_lock(&trace_types_lock);
5506 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5507 	if (ret < 0)
5508 		goto out;
5509 
5510 	if (tr->current_trace->update_thresh) {
5511 		ret = tr->current_trace->update_thresh(tr);
5512 		if (ret < 0)
5513 			goto out;
5514 	}
5515 
5516 	ret = cnt;
5517 out:
5518 	mutex_unlock(&trace_types_lock);
5519 
5520 	return ret;
5521 }
5522 
5523 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5524 
5525 static ssize_t
5526 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5527 		     size_t cnt, loff_t *ppos)
5528 {
5529 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5530 }
5531 
5532 static ssize_t
5533 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5534 		      size_t cnt, loff_t *ppos)
5535 {
5536 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5537 }
5538 
5539 #endif
5540 
5541 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5542 {
5543 	struct trace_array *tr = inode->i_private;
5544 	struct trace_iterator *iter;
5545 	int ret = 0;
5546 
5547 	if (tracing_disabled)
5548 		return -ENODEV;
5549 
5550 	if (trace_array_get(tr) < 0)
5551 		return -ENODEV;
5552 
5553 	mutex_lock(&trace_types_lock);
5554 
5555 	/* create a buffer to store the information to pass to userspace */
5556 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5557 	if (!iter) {
5558 		ret = -ENOMEM;
5559 		__trace_array_put(tr);
5560 		goto out;
5561 	}
5562 
5563 	trace_seq_init(&iter->seq);
5564 	iter->trace = tr->current_trace;
5565 
5566 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5567 		ret = -ENOMEM;
5568 		goto fail;
5569 	}
5570 
5571 	/* trace pipe does not show start of buffer */
5572 	cpumask_setall(iter->started);
5573 
5574 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5575 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5576 
5577 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5578 	if (trace_clocks[tr->clock_id].in_ns)
5579 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5580 
5581 	iter->tr = tr;
5582 	iter->trace_buffer = &tr->trace_buffer;
5583 	iter->cpu_file = tracing_get_cpu(inode);
5584 	mutex_init(&iter->mutex);
5585 	filp->private_data = iter;
5586 
5587 	if (iter->trace->pipe_open)
5588 		iter->trace->pipe_open(iter);
5589 
5590 	nonseekable_open(inode, filp);
5591 
5592 	tr->current_trace->ref++;
5593 out:
5594 	mutex_unlock(&trace_types_lock);
5595 	return ret;
5596 
5597 fail:
5598 	kfree(iter->trace);
5599 	kfree(iter);
5600 	__trace_array_put(tr);
5601 	mutex_unlock(&trace_types_lock);
5602 	return ret;
5603 }
5604 
5605 static int tracing_release_pipe(struct inode *inode, struct file *file)
5606 {
5607 	struct trace_iterator *iter = file->private_data;
5608 	struct trace_array *tr = inode->i_private;
5609 
5610 	mutex_lock(&trace_types_lock);
5611 
5612 	tr->current_trace->ref--;
5613 
5614 	if (iter->trace->pipe_close)
5615 		iter->trace->pipe_close(iter);
5616 
5617 	mutex_unlock(&trace_types_lock);
5618 
5619 	free_cpumask_var(iter->started);
5620 	mutex_destroy(&iter->mutex);
5621 	kfree(iter);
5622 
5623 	trace_array_put(tr);
5624 
5625 	return 0;
5626 }
5627 
5628 static __poll_t
5629 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5630 {
5631 	struct trace_array *tr = iter->tr;
5632 
5633 	/* Iterators are static, they should be filled or empty */
5634 	if (trace_buffer_iter(iter, iter->cpu_file))
5635 		return EPOLLIN | EPOLLRDNORM;
5636 
5637 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5638 		/*
5639 		 * Always select as readable when in blocking mode
5640 		 */
5641 		return EPOLLIN | EPOLLRDNORM;
5642 	else
5643 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5644 					     filp, poll_table);
5645 }
5646 
5647 static __poll_t
5648 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5649 {
5650 	struct trace_iterator *iter = filp->private_data;
5651 
5652 	return trace_poll(iter, filp, poll_table);
5653 }
5654 
5655 /* Must be called with iter->mutex held. */
5656 static int tracing_wait_pipe(struct file *filp)
5657 {
5658 	struct trace_iterator *iter = filp->private_data;
5659 	int ret;
5660 
5661 	while (trace_empty(iter)) {
5662 
5663 		if ((filp->f_flags & O_NONBLOCK)) {
5664 			return -EAGAIN;
5665 		}
5666 
5667 		/*
5668 		 * We block until we read something and tracing is disabled.
5669 		 * We still block if tracing is disabled, but we have never
5670 		 * read anything. This allows a user to cat this file, and
5671 		 * then enable tracing. But after we have read something,
5672 		 * we give an EOF when tracing is again disabled.
5673 		 *
5674 		 * iter->pos will be 0 if we haven't read anything.
5675 		 */
5676 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5677 			break;
5678 
5679 		mutex_unlock(&iter->mutex);
5680 
5681 		ret = wait_on_pipe(iter, false);
5682 
5683 		mutex_lock(&iter->mutex);
5684 
5685 		if (ret)
5686 			return ret;
5687 	}
5688 
5689 	return 1;
5690 }
5691 
5692 /*
5693  * Consumer reader.
5694  */
5695 static ssize_t
5696 tracing_read_pipe(struct file *filp, char __user *ubuf,
5697 		  size_t cnt, loff_t *ppos)
5698 {
5699 	struct trace_iterator *iter = filp->private_data;
5700 	ssize_t sret;
5701 
5702 	/*
5703 	 * Avoid more than one consumer on a single file descriptor
5704 	 * This is just a matter of traces coherency, the ring buffer itself
5705 	 * is protected.
5706 	 */
5707 	mutex_lock(&iter->mutex);
5708 
5709 	/* return any leftover data */
5710 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5711 	if (sret != -EBUSY)
5712 		goto out;
5713 
5714 	trace_seq_init(&iter->seq);
5715 
5716 	if (iter->trace->read) {
5717 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5718 		if (sret)
5719 			goto out;
5720 	}
5721 
5722 waitagain:
5723 	sret = tracing_wait_pipe(filp);
5724 	if (sret <= 0)
5725 		goto out;
5726 
5727 	/* stop when tracing is finished */
5728 	if (trace_empty(iter)) {
5729 		sret = 0;
5730 		goto out;
5731 	}
5732 
5733 	if (cnt >= PAGE_SIZE)
5734 		cnt = PAGE_SIZE - 1;
5735 
5736 	/* reset all but tr, trace, and overruns */
5737 	memset(&iter->seq, 0,
5738 	       sizeof(struct trace_iterator) -
5739 	       offsetof(struct trace_iterator, seq));
5740 	cpumask_clear(iter->started);
5741 	iter->pos = -1;
5742 
5743 	trace_event_read_lock();
5744 	trace_access_lock(iter->cpu_file);
5745 	while (trace_find_next_entry_inc(iter) != NULL) {
5746 		enum print_line_t ret;
5747 		int save_len = iter->seq.seq.len;
5748 
5749 		ret = print_trace_line(iter);
5750 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5751 			/* don't print partial lines */
5752 			iter->seq.seq.len = save_len;
5753 			break;
5754 		}
5755 		if (ret != TRACE_TYPE_NO_CONSUME)
5756 			trace_consume(iter);
5757 
5758 		if (trace_seq_used(&iter->seq) >= cnt)
5759 			break;
5760 
5761 		/*
5762 		 * Setting the full flag means we reached the trace_seq buffer
5763 		 * size and we should leave by partial output condition above.
5764 		 * One of the trace_seq_* functions is not used properly.
5765 		 */
5766 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5767 			  iter->ent->type);
5768 	}
5769 	trace_access_unlock(iter->cpu_file);
5770 	trace_event_read_unlock();
5771 
5772 	/* Now copy what we have to the user */
5773 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5774 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5775 		trace_seq_init(&iter->seq);
5776 
5777 	/*
5778 	 * If there was nothing to send to user, in spite of consuming trace
5779 	 * entries, go back to wait for more entries.
5780 	 */
5781 	if (sret == -EBUSY)
5782 		goto waitagain;
5783 
5784 out:
5785 	mutex_unlock(&iter->mutex);
5786 
5787 	return sret;
5788 }
5789 
5790 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5791 				     unsigned int idx)
5792 {
5793 	__free_page(spd->pages[idx]);
5794 }
5795 
5796 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5797 	.can_merge		= 0,
5798 	.confirm		= generic_pipe_buf_confirm,
5799 	.release		= generic_pipe_buf_release,
5800 	.steal			= generic_pipe_buf_steal,
5801 	.get			= generic_pipe_buf_get,
5802 };
5803 
5804 static size_t
5805 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5806 {
5807 	size_t count;
5808 	int save_len;
5809 	int ret;
5810 
5811 	/* Seq buffer is page-sized, exactly what we need. */
5812 	for (;;) {
5813 		save_len = iter->seq.seq.len;
5814 		ret = print_trace_line(iter);
5815 
5816 		if (trace_seq_has_overflowed(&iter->seq)) {
5817 			iter->seq.seq.len = save_len;
5818 			break;
5819 		}
5820 
5821 		/*
5822 		 * This should not be hit, because it should only
5823 		 * be set if the iter->seq overflowed. But check it
5824 		 * anyway to be safe.
5825 		 */
5826 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5827 			iter->seq.seq.len = save_len;
5828 			break;
5829 		}
5830 
5831 		count = trace_seq_used(&iter->seq) - save_len;
5832 		if (rem < count) {
5833 			rem = 0;
5834 			iter->seq.seq.len = save_len;
5835 			break;
5836 		}
5837 
5838 		if (ret != TRACE_TYPE_NO_CONSUME)
5839 			trace_consume(iter);
5840 		rem -= count;
5841 		if (!trace_find_next_entry_inc(iter))	{
5842 			rem = 0;
5843 			iter->ent = NULL;
5844 			break;
5845 		}
5846 	}
5847 
5848 	return rem;
5849 }
5850 
5851 static ssize_t tracing_splice_read_pipe(struct file *filp,
5852 					loff_t *ppos,
5853 					struct pipe_inode_info *pipe,
5854 					size_t len,
5855 					unsigned int flags)
5856 {
5857 	struct page *pages_def[PIPE_DEF_BUFFERS];
5858 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5859 	struct trace_iterator *iter = filp->private_data;
5860 	struct splice_pipe_desc spd = {
5861 		.pages		= pages_def,
5862 		.partial	= partial_def,
5863 		.nr_pages	= 0, /* This gets updated below. */
5864 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5865 		.ops		= &tracing_pipe_buf_ops,
5866 		.spd_release	= tracing_spd_release_pipe,
5867 	};
5868 	ssize_t ret;
5869 	size_t rem;
5870 	unsigned int i;
5871 
5872 	if (splice_grow_spd(pipe, &spd))
5873 		return -ENOMEM;
5874 
5875 	mutex_lock(&iter->mutex);
5876 
5877 	if (iter->trace->splice_read) {
5878 		ret = iter->trace->splice_read(iter, filp,
5879 					       ppos, pipe, len, flags);
5880 		if (ret)
5881 			goto out_err;
5882 	}
5883 
5884 	ret = tracing_wait_pipe(filp);
5885 	if (ret <= 0)
5886 		goto out_err;
5887 
5888 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5889 		ret = -EFAULT;
5890 		goto out_err;
5891 	}
5892 
5893 	trace_event_read_lock();
5894 	trace_access_lock(iter->cpu_file);
5895 
5896 	/* Fill as many pages as possible. */
5897 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5898 		spd.pages[i] = alloc_page(GFP_KERNEL);
5899 		if (!spd.pages[i])
5900 			break;
5901 
5902 		rem = tracing_fill_pipe_page(rem, iter);
5903 
5904 		/* Copy the data into the page, so we can start over. */
5905 		ret = trace_seq_to_buffer(&iter->seq,
5906 					  page_address(spd.pages[i]),
5907 					  trace_seq_used(&iter->seq));
5908 		if (ret < 0) {
5909 			__free_page(spd.pages[i]);
5910 			break;
5911 		}
5912 		spd.partial[i].offset = 0;
5913 		spd.partial[i].len = trace_seq_used(&iter->seq);
5914 
5915 		trace_seq_init(&iter->seq);
5916 	}
5917 
5918 	trace_access_unlock(iter->cpu_file);
5919 	trace_event_read_unlock();
5920 	mutex_unlock(&iter->mutex);
5921 
5922 	spd.nr_pages = i;
5923 
5924 	if (i)
5925 		ret = splice_to_pipe(pipe, &spd);
5926 	else
5927 		ret = 0;
5928 out:
5929 	splice_shrink_spd(&spd);
5930 	return ret;
5931 
5932 out_err:
5933 	mutex_unlock(&iter->mutex);
5934 	goto out;
5935 }
5936 
5937 static ssize_t
5938 tracing_entries_read(struct file *filp, char __user *ubuf,
5939 		     size_t cnt, loff_t *ppos)
5940 {
5941 	struct inode *inode = file_inode(filp);
5942 	struct trace_array *tr = inode->i_private;
5943 	int cpu = tracing_get_cpu(inode);
5944 	char buf[64];
5945 	int r = 0;
5946 	ssize_t ret;
5947 
5948 	mutex_lock(&trace_types_lock);
5949 
5950 	if (cpu == RING_BUFFER_ALL_CPUS) {
5951 		int cpu, buf_size_same;
5952 		unsigned long size;
5953 
5954 		size = 0;
5955 		buf_size_same = 1;
5956 		/* check if all cpu sizes are same */
5957 		for_each_tracing_cpu(cpu) {
5958 			/* fill in the size from first enabled cpu */
5959 			if (size == 0)
5960 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5961 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5962 				buf_size_same = 0;
5963 				break;
5964 			}
5965 		}
5966 
5967 		if (buf_size_same) {
5968 			if (!ring_buffer_expanded)
5969 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5970 					    size >> 10,
5971 					    trace_buf_size >> 10);
5972 			else
5973 				r = sprintf(buf, "%lu\n", size >> 10);
5974 		} else
5975 			r = sprintf(buf, "X\n");
5976 	} else
5977 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5978 
5979 	mutex_unlock(&trace_types_lock);
5980 
5981 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5982 	return ret;
5983 }
5984 
5985 static ssize_t
5986 tracing_entries_write(struct file *filp, const char __user *ubuf,
5987 		      size_t cnt, loff_t *ppos)
5988 {
5989 	struct inode *inode = file_inode(filp);
5990 	struct trace_array *tr = inode->i_private;
5991 	unsigned long val;
5992 	int ret;
5993 
5994 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5995 	if (ret)
5996 		return ret;
5997 
5998 	/* must have at least 1 entry */
5999 	if (!val)
6000 		return -EINVAL;
6001 
6002 	/* value is in KB */
6003 	val <<= 10;
6004 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6005 	if (ret < 0)
6006 		return ret;
6007 
6008 	*ppos += cnt;
6009 
6010 	return cnt;
6011 }
6012 
6013 static ssize_t
6014 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6015 				size_t cnt, loff_t *ppos)
6016 {
6017 	struct trace_array *tr = filp->private_data;
6018 	char buf[64];
6019 	int r, cpu;
6020 	unsigned long size = 0, expanded_size = 0;
6021 
6022 	mutex_lock(&trace_types_lock);
6023 	for_each_tracing_cpu(cpu) {
6024 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6025 		if (!ring_buffer_expanded)
6026 			expanded_size += trace_buf_size >> 10;
6027 	}
6028 	if (ring_buffer_expanded)
6029 		r = sprintf(buf, "%lu\n", size);
6030 	else
6031 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6032 	mutex_unlock(&trace_types_lock);
6033 
6034 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6035 }
6036 
6037 static ssize_t
6038 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6039 			  size_t cnt, loff_t *ppos)
6040 {
6041 	/*
6042 	 * There is no need to read what the user has written, this function
6043 	 * is just to make sure that there is no error when "echo" is used
6044 	 */
6045 
6046 	*ppos += cnt;
6047 
6048 	return cnt;
6049 }
6050 
6051 static int
6052 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6053 {
6054 	struct trace_array *tr = inode->i_private;
6055 
6056 	/* disable tracing ? */
6057 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6058 		tracer_tracing_off(tr);
6059 	/* resize the ring buffer to 0 */
6060 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6061 
6062 	trace_array_put(tr);
6063 
6064 	return 0;
6065 }
6066 
6067 static ssize_t
6068 tracing_mark_write(struct file *filp, const char __user *ubuf,
6069 					size_t cnt, loff_t *fpos)
6070 {
6071 	struct trace_array *tr = filp->private_data;
6072 	struct ring_buffer_event *event;
6073 	enum event_trigger_type tt = ETT_NONE;
6074 	struct ring_buffer *buffer;
6075 	struct print_entry *entry;
6076 	unsigned long irq_flags;
6077 	const char faulted[] = "<faulted>";
6078 	ssize_t written;
6079 	int size;
6080 	int len;
6081 
6082 /* Used in tracing_mark_raw_write() as well */
6083 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6084 
6085 	if (tracing_disabled)
6086 		return -EINVAL;
6087 
6088 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6089 		return -EINVAL;
6090 
6091 	if (cnt > TRACE_BUF_SIZE)
6092 		cnt = TRACE_BUF_SIZE;
6093 
6094 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6095 
6096 	local_save_flags(irq_flags);
6097 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6098 
6099 	/* If less than "<faulted>", then make sure we can still add that */
6100 	if (cnt < FAULTED_SIZE)
6101 		size += FAULTED_SIZE - cnt;
6102 
6103 	buffer = tr->trace_buffer.buffer;
6104 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6105 					    irq_flags, preempt_count());
6106 	if (unlikely(!event))
6107 		/* Ring buffer disabled, return as if not open for write */
6108 		return -EBADF;
6109 
6110 	entry = ring_buffer_event_data(event);
6111 	entry->ip = _THIS_IP_;
6112 
6113 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6114 	if (len) {
6115 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6116 		cnt = FAULTED_SIZE;
6117 		written = -EFAULT;
6118 	} else
6119 		written = cnt;
6120 	len = cnt;
6121 
6122 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6123 		/* do not add \n before testing triggers, but add \0 */
6124 		entry->buf[cnt] = '\0';
6125 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6126 	}
6127 
6128 	if (entry->buf[cnt - 1] != '\n') {
6129 		entry->buf[cnt] = '\n';
6130 		entry->buf[cnt + 1] = '\0';
6131 	} else
6132 		entry->buf[cnt] = '\0';
6133 
6134 	__buffer_unlock_commit(buffer, event);
6135 
6136 	if (tt)
6137 		event_triggers_post_call(tr->trace_marker_file, tt);
6138 
6139 	if (written > 0)
6140 		*fpos += written;
6141 
6142 	return written;
6143 }
6144 
6145 /* Limit it for now to 3K (including tag) */
6146 #define RAW_DATA_MAX_SIZE (1024*3)
6147 
6148 static ssize_t
6149 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6150 					size_t cnt, loff_t *fpos)
6151 {
6152 	struct trace_array *tr = filp->private_data;
6153 	struct ring_buffer_event *event;
6154 	struct ring_buffer *buffer;
6155 	struct raw_data_entry *entry;
6156 	const char faulted[] = "<faulted>";
6157 	unsigned long irq_flags;
6158 	ssize_t written;
6159 	int size;
6160 	int len;
6161 
6162 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6163 
6164 	if (tracing_disabled)
6165 		return -EINVAL;
6166 
6167 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6168 		return -EINVAL;
6169 
6170 	/* The marker must at least have a tag id */
6171 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6172 		return -EINVAL;
6173 
6174 	if (cnt > TRACE_BUF_SIZE)
6175 		cnt = TRACE_BUF_SIZE;
6176 
6177 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6178 
6179 	local_save_flags(irq_flags);
6180 	size = sizeof(*entry) + cnt;
6181 	if (cnt < FAULT_SIZE_ID)
6182 		size += FAULT_SIZE_ID - cnt;
6183 
6184 	buffer = tr->trace_buffer.buffer;
6185 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6186 					    irq_flags, preempt_count());
6187 	if (!event)
6188 		/* Ring buffer disabled, return as if not open for write */
6189 		return -EBADF;
6190 
6191 	entry = ring_buffer_event_data(event);
6192 
6193 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6194 	if (len) {
6195 		entry->id = -1;
6196 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6197 		written = -EFAULT;
6198 	} else
6199 		written = cnt;
6200 
6201 	__buffer_unlock_commit(buffer, event);
6202 
6203 	if (written > 0)
6204 		*fpos += written;
6205 
6206 	return written;
6207 }
6208 
6209 static int tracing_clock_show(struct seq_file *m, void *v)
6210 {
6211 	struct trace_array *tr = m->private;
6212 	int i;
6213 
6214 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6215 		seq_printf(m,
6216 			"%s%s%s%s", i ? " " : "",
6217 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6218 			i == tr->clock_id ? "]" : "");
6219 	seq_putc(m, '\n');
6220 
6221 	return 0;
6222 }
6223 
6224 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6225 {
6226 	int i;
6227 
6228 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6229 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6230 			break;
6231 	}
6232 	if (i == ARRAY_SIZE(trace_clocks))
6233 		return -EINVAL;
6234 
6235 	mutex_lock(&trace_types_lock);
6236 
6237 	tr->clock_id = i;
6238 
6239 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6240 
6241 	/*
6242 	 * New clock may not be consistent with the previous clock.
6243 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6244 	 */
6245 	tracing_reset_online_cpus(&tr->trace_buffer);
6246 
6247 #ifdef CONFIG_TRACER_MAX_TRACE
6248 	if (tr->max_buffer.buffer)
6249 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6250 	tracing_reset_online_cpus(&tr->max_buffer);
6251 #endif
6252 
6253 	mutex_unlock(&trace_types_lock);
6254 
6255 	return 0;
6256 }
6257 
6258 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6259 				   size_t cnt, loff_t *fpos)
6260 {
6261 	struct seq_file *m = filp->private_data;
6262 	struct trace_array *tr = m->private;
6263 	char buf[64];
6264 	const char *clockstr;
6265 	int ret;
6266 
6267 	if (cnt >= sizeof(buf))
6268 		return -EINVAL;
6269 
6270 	if (copy_from_user(buf, ubuf, cnt))
6271 		return -EFAULT;
6272 
6273 	buf[cnt] = 0;
6274 
6275 	clockstr = strstrip(buf);
6276 
6277 	ret = tracing_set_clock(tr, clockstr);
6278 	if (ret)
6279 		return ret;
6280 
6281 	*fpos += cnt;
6282 
6283 	return cnt;
6284 }
6285 
6286 static int tracing_clock_open(struct inode *inode, struct file *file)
6287 {
6288 	struct trace_array *tr = inode->i_private;
6289 	int ret;
6290 
6291 	if (tracing_disabled)
6292 		return -ENODEV;
6293 
6294 	if (trace_array_get(tr))
6295 		return -ENODEV;
6296 
6297 	ret = single_open(file, tracing_clock_show, inode->i_private);
6298 	if (ret < 0)
6299 		trace_array_put(tr);
6300 
6301 	return ret;
6302 }
6303 
6304 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6305 {
6306 	struct trace_array *tr = m->private;
6307 
6308 	mutex_lock(&trace_types_lock);
6309 
6310 	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6311 		seq_puts(m, "delta [absolute]\n");
6312 	else
6313 		seq_puts(m, "[delta] absolute\n");
6314 
6315 	mutex_unlock(&trace_types_lock);
6316 
6317 	return 0;
6318 }
6319 
6320 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6321 {
6322 	struct trace_array *tr = inode->i_private;
6323 	int ret;
6324 
6325 	if (tracing_disabled)
6326 		return -ENODEV;
6327 
6328 	if (trace_array_get(tr))
6329 		return -ENODEV;
6330 
6331 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6332 	if (ret < 0)
6333 		trace_array_put(tr);
6334 
6335 	return ret;
6336 }
6337 
6338 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6339 {
6340 	int ret = 0;
6341 
6342 	mutex_lock(&trace_types_lock);
6343 
6344 	if (abs && tr->time_stamp_abs_ref++)
6345 		goto out;
6346 
6347 	if (!abs) {
6348 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6349 			ret = -EINVAL;
6350 			goto out;
6351 		}
6352 
6353 		if (--tr->time_stamp_abs_ref)
6354 			goto out;
6355 	}
6356 
6357 	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6358 
6359 #ifdef CONFIG_TRACER_MAX_TRACE
6360 	if (tr->max_buffer.buffer)
6361 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6362 #endif
6363  out:
6364 	mutex_unlock(&trace_types_lock);
6365 
6366 	return ret;
6367 }
6368 
6369 struct ftrace_buffer_info {
6370 	struct trace_iterator	iter;
6371 	void			*spare;
6372 	unsigned int		spare_cpu;
6373 	unsigned int		read;
6374 };
6375 
6376 #ifdef CONFIG_TRACER_SNAPSHOT
6377 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6378 {
6379 	struct trace_array *tr = inode->i_private;
6380 	struct trace_iterator *iter;
6381 	struct seq_file *m;
6382 	int ret = 0;
6383 
6384 	if (trace_array_get(tr) < 0)
6385 		return -ENODEV;
6386 
6387 	if (file->f_mode & FMODE_READ) {
6388 		iter = __tracing_open(inode, file, true);
6389 		if (IS_ERR(iter))
6390 			ret = PTR_ERR(iter);
6391 	} else {
6392 		/* Writes still need the seq_file to hold the private data */
6393 		ret = -ENOMEM;
6394 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6395 		if (!m)
6396 			goto out;
6397 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6398 		if (!iter) {
6399 			kfree(m);
6400 			goto out;
6401 		}
6402 		ret = 0;
6403 
6404 		iter->tr = tr;
6405 		iter->trace_buffer = &tr->max_buffer;
6406 		iter->cpu_file = tracing_get_cpu(inode);
6407 		m->private = iter;
6408 		file->private_data = m;
6409 	}
6410 out:
6411 	if (ret < 0)
6412 		trace_array_put(tr);
6413 
6414 	return ret;
6415 }
6416 
6417 static ssize_t
6418 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6419 		       loff_t *ppos)
6420 {
6421 	struct seq_file *m = filp->private_data;
6422 	struct trace_iterator *iter = m->private;
6423 	struct trace_array *tr = iter->tr;
6424 	unsigned long val;
6425 	int ret;
6426 
6427 	ret = tracing_update_buffers();
6428 	if (ret < 0)
6429 		return ret;
6430 
6431 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6432 	if (ret)
6433 		return ret;
6434 
6435 	mutex_lock(&trace_types_lock);
6436 
6437 	if (tr->current_trace->use_max_tr) {
6438 		ret = -EBUSY;
6439 		goto out;
6440 	}
6441 
6442 	switch (val) {
6443 	case 0:
6444 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6445 			ret = -EINVAL;
6446 			break;
6447 		}
6448 		if (tr->allocated_snapshot)
6449 			free_snapshot(tr);
6450 		break;
6451 	case 1:
6452 /* Only allow per-cpu swap if the ring buffer supports it */
6453 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6454 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6455 			ret = -EINVAL;
6456 			break;
6457 		}
6458 #endif
6459 		if (!tr->allocated_snapshot) {
6460 			ret = tracing_alloc_snapshot_instance(tr);
6461 			if (ret < 0)
6462 				break;
6463 		}
6464 		local_irq_disable();
6465 		/* Now, we're going to swap */
6466 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6467 			update_max_tr(tr, current, smp_processor_id());
6468 		else
6469 			update_max_tr_single(tr, current, iter->cpu_file);
6470 		local_irq_enable();
6471 		break;
6472 	default:
6473 		if (tr->allocated_snapshot) {
6474 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6475 				tracing_reset_online_cpus(&tr->max_buffer);
6476 			else
6477 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6478 		}
6479 		break;
6480 	}
6481 
6482 	if (ret >= 0) {
6483 		*ppos += cnt;
6484 		ret = cnt;
6485 	}
6486 out:
6487 	mutex_unlock(&trace_types_lock);
6488 	return ret;
6489 }
6490 
6491 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6492 {
6493 	struct seq_file *m = file->private_data;
6494 	int ret;
6495 
6496 	ret = tracing_release(inode, file);
6497 
6498 	if (file->f_mode & FMODE_READ)
6499 		return ret;
6500 
6501 	/* If write only, the seq_file is just a stub */
6502 	if (m)
6503 		kfree(m->private);
6504 	kfree(m);
6505 
6506 	return 0;
6507 }
6508 
6509 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6510 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6511 				    size_t count, loff_t *ppos);
6512 static int tracing_buffers_release(struct inode *inode, struct file *file);
6513 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6514 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6515 
6516 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6517 {
6518 	struct ftrace_buffer_info *info;
6519 	int ret;
6520 
6521 	ret = tracing_buffers_open(inode, filp);
6522 	if (ret < 0)
6523 		return ret;
6524 
6525 	info = filp->private_data;
6526 
6527 	if (info->iter.trace->use_max_tr) {
6528 		tracing_buffers_release(inode, filp);
6529 		return -EBUSY;
6530 	}
6531 
6532 	info->iter.snapshot = true;
6533 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6534 
6535 	return ret;
6536 }
6537 
6538 #endif /* CONFIG_TRACER_SNAPSHOT */
6539 
6540 
6541 static const struct file_operations tracing_thresh_fops = {
6542 	.open		= tracing_open_generic,
6543 	.read		= tracing_thresh_read,
6544 	.write		= tracing_thresh_write,
6545 	.llseek		= generic_file_llseek,
6546 };
6547 
6548 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6549 static const struct file_operations tracing_max_lat_fops = {
6550 	.open		= tracing_open_generic,
6551 	.read		= tracing_max_lat_read,
6552 	.write		= tracing_max_lat_write,
6553 	.llseek		= generic_file_llseek,
6554 };
6555 #endif
6556 
6557 static const struct file_operations set_tracer_fops = {
6558 	.open		= tracing_open_generic,
6559 	.read		= tracing_set_trace_read,
6560 	.write		= tracing_set_trace_write,
6561 	.llseek		= generic_file_llseek,
6562 };
6563 
6564 static const struct file_operations tracing_pipe_fops = {
6565 	.open		= tracing_open_pipe,
6566 	.poll		= tracing_poll_pipe,
6567 	.read		= tracing_read_pipe,
6568 	.splice_read	= tracing_splice_read_pipe,
6569 	.release	= tracing_release_pipe,
6570 	.llseek		= no_llseek,
6571 };
6572 
6573 static const struct file_operations tracing_entries_fops = {
6574 	.open		= tracing_open_generic_tr,
6575 	.read		= tracing_entries_read,
6576 	.write		= tracing_entries_write,
6577 	.llseek		= generic_file_llseek,
6578 	.release	= tracing_release_generic_tr,
6579 };
6580 
6581 static const struct file_operations tracing_total_entries_fops = {
6582 	.open		= tracing_open_generic_tr,
6583 	.read		= tracing_total_entries_read,
6584 	.llseek		= generic_file_llseek,
6585 	.release	= tracing_release_generic_tr,
6586 };
6587 
6588 static const struct file_operations tracing_free_buffer_fops = {
6589 	.open		= tracing_open_generic_tr,
6590 	.write		= tracing_free_buffer_write,
6591 	.release	= tracing_free_buffer_release,
6592 };
6593 
6594 static const struct file_operations tracing_mark_fops = {
6595 	.open		= tracing_open_generic_tr,
6596 	.write		= tracing_mark_write,
6597 	.llseek		= generic_file_llseek,
6598 	.release	= tracing_release_generic_tr,
6599 };
6600 
6601 static const struct file_operations tracing_mark_raw_fops = {
6602 	.open		= tracing_open_generic_tr,
6603 	.write		= tracing_mark_raw_write,
6604 	.llseek		= generic_file_llseek,
6605 	.release	= tracing_release_generic_tr,
6606 };
6607 
6608 static const struct file_operations trace_clock_fops = {
6609 	.open		= tracing_clock_open,
6610 	.read		= seq_read,
6611 	.llseek		= seq_lseek,
6612 	.release	= tracing_single_release_tr,
6613 	.write		= tracing_clock_write,
6614 };
6615 
6616 static const struct file_operations trace_time_stamp_mode_fops = {
6617 	.open		= tracing_time_stamp_mode_open,
6618 	.read		= seq_read,
6619 	.llseek		= seq_lseek,
6620 	.release	= tracing_single_release_tr,
6621 };
6622 
6623 #ifdef CONFIG_TRACER_SNAPSHOT
6624 static const struct file_operations snapshot_fops = {
6625 	.open		= tracing_snapshot_open,
6626 	.read		= seq_read,
6627 	.write		= tracing_snapshot_write,
6628 	.llseek		= tracing_lseek,
6629 	.release	= tracing_snapshot_release,
6630 };
6631 
6632 static const struct file_operations snapshot_raw_fops = {
6633 	.open		= snapshot_raw_open,
6634 	.read		= tracing_buffers_read,
6635 	.release	= tracing_buffers_release,
6636 	.splice_read	= tracing_buffers_splice_read,
6637 	.llseek		= no_llseek,
6638 };
6639 
6640 #endif /* CONFIG_TRACER_SNAPSHOT */
6641 
6642 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6643 {
6644 	struct trace_array *tr = inode->i_private;
6645 	struct ftrace_buffer_info *info;
6646 	int ret;
6647 
6648 	if (tracing_disabled)
6649 		return -ENODEV;
6650 
6651 	if (trace_array_get(tr) < 0)
6652 		return -ENODEV;
6653 
6654 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6655 	if (!info) {
6656 		trace_array_put(tr);
6657 		return -ENOMEM;
6658 	}
6659 
6660 	mutex_lock(&trace_types_lock);
6661 
6662 	info->iter.tr		= tr;
6663 	info->iter.cpu_file	= tracing_get_cpu(inode);
6664 	info->iter.trace	= tr->current_trace;
6665 	info->iter.trace_buffer = &tr->trace_buffer;
6666 	info->spare		= NULL;
6667 	/* Force reading ring buffer for first read */
6668 	info->read		= (unsigned int)-1;
6669 
6670 	filp->private_data = info;
6671 
6672 	tr->current_trace->ref++;
6673 
6674 	mutex_unlock(&trace_types_lock);
6675 
6676 	ret = nonseekable_open(inode, filp);
6677 	if (ret < 0)
6678 		trace_array_put(tr);
6679 
6680 	return ret;
6681 }
6682 
6683 static __poll_t
6684 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6685 {
6686 	struct ftrace_buffer_info *info = filp->private_data;
6687 	struct trace_iterator *iter = &info->iter;
6688 
6689 	return trace_poll(iter, filp, poll_table);
6690 }
6691 
6692 static ssize_t
6693 tracing_buffers_read(struct file *filp, char __user *ubuf,
6694 		     size_t count, loff_t *ppos)
6695 {
6696 	struct ftrace_buffer_info *info = filp->private_data;
6697 	struct trace_iterator *iter = &info->iter;
6698 	ssize_t ret = 0;
6699 	ssize_t size;
6700 
6701 	if (!count)
6702 		return 0;
6703 
6704 #ifdef CONFIG_TRACER_MAX_TRACE
6705 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6706 		return -EBUSY;
6707 #endif
6708 
6709 	if (!info->spare) {
6710 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6711 							  iter->cpu_file);
6712 		if (IS_ERR(info->spare)) {
6713 			ret = PTR_ERR(info->spare);
6714 			info->spare = NULL;
6715 		} else {
6716 			info->spare_cpu = iter->cpu_file;
6717 		}
6718 	}
6719 	if (!info->spare)
6720 		return ret;
6721 
6722 	/* Do we have previous read data to read? */
6723 	if (info->read < PAGE_SIZE)
6724 		goto read;
6725 
6726  again:
6727 	trace_access_lock(iter->cpu_file);
6728 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6729 				    &info->spare,
6730 				    count,
6731 				    iter->cpu_file, 0);
6732 	trace_access_unlock(iter->cpu_file);
6733 
6734 	if (ret < 0) {
6735 		if (trace_empty(iter)) {
6736 			if ((filp->f_flags & O_NONBLOCK))
6737 				return -EAGAIN;
6738 
6739 			ret = wait_on_pipe(iter, false);
6740 			if (ret)
6741 				return ret;
6742 
6743 			goto again;
6744 		}
6745 		return 0;
6746 	}
6747 
6748 	info->read = 0;
6749  read:
6750 	size = PAGE_SIZE - info->read;
6751 	if (size > count)
6752 		size = count;
6753 
6754 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6755 	if (ret == size)
6756 		return -EFAULT;
6757 
6758 	size -= ret;
6759 
6760 	*ppos += size;
6761 	info->read += size;
6762 
6763 	return size;
6764 }
6765 
6766 static int tracing_buffers_release(struct inode *inode, struct file *file)
6767 {
6768 	struct ftrace_buffer_info *info = file->private_data;
6769 	struct trace_iterator *iter = &info->iter;
6770 
6771 	mutex_lock(&trace_types_lock);
6772 
6773 	iter->tr->current_trace->ref--;
6774 
6775 	__trace_array_put(iter->tr);
6776 
6777 	if (info->spare)
6778 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6779 					   info->spare_cpu, info->spare);
6780 	kfree(info);
6781 
6782 	mutex_unlock(&trace_types_lock);
6783 
6784 	return 0;
6785 }
6786 
6787 struct buffer_ref {
6788 	struct ring_buffer	*buffer;
6789 	void			*page;
6790 	int			cpu;
6791 	int			ref;
6792 };
6793 
6794 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6795 				    struct pipe_buffer *buf)
6796 {
6797 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6798 
6799 	if (--ref->ref)
6800 		return;
6801 
6802 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6803 	kfree(ref);
6804 	buf->private = 0;
6805 }
6806 
6807 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6808 				struct pipe_buffer *buf)
6809 {
6810 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6811 
6812 	ref->ref++;
6813 }
6814 
6815 /* Pipe buffer operations for a buffer. */
6816 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6817 	.can_merge		= 0,
6818 	.confirm		= generic_pipe_buf_confirm,
6819 	.release		= buffer_pipe_buf_release,
6820 	.steal			= generic_pipe_buf_steal,
6821 	.get			= buffer_pipe_buf_get,
6822 };
6823 
6824 /*
6825  * Callback from splice_to_pipe(), if we need to release some pages
6826  * at the end of the spd in case we error'ed out in filling the pipe.
6827  */
6828 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6829 {
6830 	struct buffer_ref *ref =
6831 		(struct buffer_ref *)spd->partial[i].private;
6832 
6833 	if (--ref->ref)
6834 		return;
6835 
6836 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6837 	kfree(ref);
6838 	spd->partial[i].private = 0;
6839 }
6840 
6841 static ssize_t
6842 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6843 			    struct pipe_inode_info *pipe, size_t len,
6844 			    unsigned int flags)
6845 {
6846 	struct ftrace_buffer_info *info = file->private_data;
6847 	struct trace_iterator *iter = &info->iter;
6848 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6849 	struct page *pages_def[PIPE_DEF_BUFFERS];
6850 	struct splice_pipe_desc spd = {
6851 		.pages		= pages_def,
6852 		.partial	= partial_def,
6853 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6854 		.ops		= &buffer_pipe_buf_ops,
6855 		.spd_release	= buffer_spd_release,
6856 	};
6857 	struct buffer_ref *ref;
6858 	int entries, i;
6859 	ssize_t ret = 0;
6860 
6861 #ifdef CONFIG_TRACER_MAX_TRACE
6862 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6863 		return -EBUSY;
6864 #endif
6865 
6866 	if (*ppos & (PAGE_SIZE - 1))
6867 		return -EINVAL;
6868 
6869 	if (len & (PAGE_SIZE - 1)) {
6870 		if (len < PAGE_SIZE)
6871 			return -EINVAL;
6872 		len &= PAGE_MASK;
6873 	}
6874 
6875 	if (splice_grow_spd(pipe, &spd))
6876 		return -ENOMEM;
6877 
6878  again:
6879 	trace_access_lock(iter->cpu_file);
6880 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6881 
6882 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6883 		struct page *page;
6884 		int r;
6885 
6886 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6887 		if (!ref) {
6888 			ret = -ENOMEM;
6889 			break;
6890 		}
6891 
6892 		ref->ref = 1;
6893 		ref->buffer = iter->trace_buffer->buffer;
6894 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6895 		if (IS_ERR(ref->page)) {
6896 			ret = PTR_ERR(ref->page);
6897 			ref->page = NULL;
6898 			kfree(ref);
6899 			break;
6900 		}
6901 		ref->cpu = iter->cpu_file;
6902 
6903 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6904 					  len, iter->cpu_file, 1);
6905 		if (r < 0) {
6906 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6907 						   ref->page);
6908 			kfree(ref);
6909 			break;
6910 		}
6911 
6912 		page = virt_to_page(ref->page);
6913 
6914 		spd.pages[i] = page;
6915 		spd.partial[i].len = PAGE_SIZE;
6916 		spd.partial[i].offset = 0;
6917 		spd.partial[i].private = (unsigned long)ref;
6918 		spd.nr_pages++;
6919 		*ppos += PAGE_SIZE;
6920 
6921 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6922 	}
6923 
6924 	trace_access_unlock(iter->cpu_file);
6925 	spd.nr_pages = i;
6926 
6927 	/* did we read anything? */
6928 	if (!spd.nr_pages) {
6929 		if (ret)
6930 			goto out;
6931 
6932 		ret = -EAGAIN;
6933 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6934 			goto out;
6935 
6936 		ret = wait_on_pipe(iter, true);
6937 		if (ret)
6938 			goto out;
6939 
6940 		goto again;
6941 	}
6942 
6943 	ret = splice_to_pipe(pipe, &spd);
6944 out:
6945 	splice_shrink_spd(&spd);
6946 
6947 	return ret;
6948 }
6949 
6950 static const struct file_operations tracing_buffers_fops = {
6951 	.open		= tracing_buffers_open,
6952 	.read		= tracing_buffers_read,
6953 	.poll		= tracing_buffers_poll,
6954 	.release	= tracing_buffers_release,
6955 	.splice_read	= tracing_buffers_splice_read,
6956 	.llseek		= no_llseek,
6957 };
6958 
6959 static ssize_t
6960 tracing_stats_read(struct file *filp, char __user *ubuf,
6961 		   size_t count, loff_t *ppos)
6962 {
6963 	struct inode *inode = file_inode(filp);
6964 	struct trace_array *tr = inode->i_private;
6965 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6966 	int cpu = tracing_get_cpu(inode);
6967 	struct trace_seq *s;
6968 	unsigned long cnt;
6969 	unsigned long long t;
6970 	unsigned long usec_rem;
6971 
6972 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6973 	if (!s)
6974 		return -ENOMEM;
6975 
6976 	trace_seq_init(s);
6977 
6978 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6979 	trace_seq_printf(s, "entries: %ld\n", cnt);
6980 
6981 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6982 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6983 
6984 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6985 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6986 
6987 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6988 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6989 
6990 	if (trace_clocks[tr->clock_id].in_ns) {
6991 		/* local or global for trace_clock */
6992 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6993 		usec_rem = do_div(t, USEC_PER_SEC);
6994 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6995 								t, usec_rem);
6996 
6997 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6998 		usec_rem = do_div(t, USEC_PER_SEC);
6999 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7000 	} else {
7001 		/* counter or tsc mode for trace_clock */
7002 		trace_seq_printf(s, "oldest event ts: %llu\n",
7003 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7004 
7005 		trace_seq_printf(s, "now ts: %llu\n",
7006 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7007 	}
7008 
7009 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7010 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7011 
7012 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7013 	trace_seq_printf(s, "read events: %ld\n", cnt);
7014 
7015 	count = simple_read_from_buffer(ubuf, count, ppos,
7016 					s->buffer, trace_seq_used(s));
7017 
7018 	kfree(s);
7019 
7020 	return count;
7021 }
7022 
7023 static const struct file_operations tracing_stats_fops = {
7024 	.open		= tracing_open_generic_tr,
7025 	.read		= tracing_stats_read,
7026 	.llseek		= generic_file_llseek,
7027 	.release	= tracing_release_generic_tr,
7028 };
7029 
7030 #ifdef CONFIG_DYNAMIC_FTRACE
7031 
7032 static ssize_t
7033 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7034 		  size_t cnt, loff_t *ppos)
7035 {
7036 	unsigned long *p = filp->private_data;
7037 	char buf[64]; /* Not too big for a shallow stack */
7038 	int r;
7039 
7040 	r = scnprintf(buf, 63, "%ld", *p);
7041 	buf[r++] = '\n';
7042 
7043 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7044 }
7045 
7046 static const struct file_operations tracing_dyn_info_fops = {
7047 	.open		= tracing_open_generic,
7048 	.read		= tracing_read_dyn_info,
7049 	.llseek		= generic_file_llseek,
7050 };
7051 #endif /* CONFIG_DYNAMIC_FTRACE */
7052 
7053 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7054 static void
7055 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7056 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7057 		void *data)
7058 {
7059 	tracing_snapshot_instance(tr);
7060 }
7061 
7062 static void
7063 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7064 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7065 		      void *data)
7066 {
7067 	struct ftrace_func_mapper *mapper = data;
7068 	long *count = NULL;
7069 
7070 	if (mapper)
7071 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7072 
7073 	if (count) {
7074 
7075 		if (*count <= 0)
7076 			return;
7077 
7078 		(*count)--;
7079 	}
7080 
7081 	tracing_snapshot_instance(tr);
7082 }
7083 
7084 static int
7085 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7086 		      struct ftrace_probe_ops *ops, void *data)
7087 {
7088 	struct ftrace_func_mapper *mapper = data;
7089 	long *count = NULL;
7090 
7091 	seq_printf(m, "%ps:", (void *)ip);
7092 
7093 	seq_puts(m, "snapshot");
7094 
7095 	if (mapper)
7096 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7097 
7098 	if (count)
7099 		seq_printf(m, ":count=%ld\n", *count);
7100 	else
7101 		seq_puts(m, ":unlimited\n");
7102 
7103 	return 0;
7104 }
7105 
7106 static int
7107 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7108 		     unsigned long ip, void *init_data, void **data)
7109 {
7110 	struct ftrace_func_mapper *mapper = *data;
7111 
7112 	if (!mapper) {
7113 		mapper = allocate_ftrace_func_mapper();
7114 		if (!mapper)
7115 			return -ENOMEM;
7116 		*data = mapper;
7117 	}
7118 
7119 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7120 }
7121 
7122 static void
7123 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7124 		     unsigned long ip, void *data)
7125 {
7126 	struct ftrace_func_mapper *mapper = data;
7127 
7128 	if (!ip) {
7129 		if (!mapper)
7130 			return;
7131 		free_ftrace_func_mapper(mapper, NULL);
7132 		return;
7133 	}
7134 
7135 	ftrace_func_mapper_remove_ip(mapper, ip);
7136 }
7137 
7138 static struct ftrace_probe_ops snapshot_probe_ops = {
7139 	.func			= ftrace_snapshot,
7140 	.print			= ftrace_snapshot_print,
7141 };
7142 
7143 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7144 	.func			= ftrace_count_snapshot,
7145 	.print			= ftrace_snapshot_print,
7146 	.init			= ftrace_snapshot_init,
7147 	.free			= ftrace_snapshot_free,
7148 };
7149 
7150 static int
7151 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7152 			       char *glob, char *cmd, char *param, int enable)
7153 {
7154 	struct ftrace_probe_ops *ops;
7155 	void *count = (void *)-1;
7156 	char *number;
7157 	int ret;
7158 
7159 	if (!tr)
7160 		return -ENODEV;
7161 
7162 	/* hash funcs only work with set_ftrace_filter */
7163 	if (!enable)
7164 		return -EINVAL;
7165 
7166 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7167 
7168 	if (glob[0] == '!')
7169 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7170 
7171 	if (!param)
7172 		goto out_reg;
7173 
7174 	number = strsep(&param, ":");
7175 
7176 	if (!strlen(number))
7177 		goto out_reg;
7178 
7179 	/*
7180 	 * We use the callback data field (which is a pointer)
7181 	 * as our counter.
7182 	 */
7183 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7184 	if (ret)
7185 		return ret;
7186 
7187  out_reg:
7188 	ret = tracing_alloc_snapshot_instance(tr);
7189 	if (ret < 0)
7190 		goto out;
7191 
7192 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7193 
7194  out:
7195 	return ret < 0 ? ret : 0;
7196 }
7197 
7198 static struct ftrace_func_command ftrace_snapshot_cmd = {
7199 	.name			= "snapshot",
7200 	.func			= ftrace_trace_snapshot_callback,
7201 };
7202 
7203 static __init int register_snapshot_cmd(void)
7204 {
7205 	return register_ftrace_command(&ftrace_snapshot_cmd);
7206 }
7207 #else
7208 static inline __init int register_snapshot_cmd(void) { return 0; }
7209 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7210 
7211 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7212 {
7213 	if (WARN_ON(!tr->dir))
7214 		return ERR_PTR(-ENODEV);
7215 
7216 	/* Top directory uses NULL as the parent */
7217 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7218 		return NULL;
7219 
7220 	/* All sub buffers have a descriptor */
7221 	return tr->dir;
7222 }
7223 
7224 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7225 {
7226 	struct dentry *d_tracer;
7227 
7228 	if (tr->percpu_dir)
7229 		return tr->percpu_dir;
7230 
7231 	d_tracer = tracing_get_dentry(tr);
7232 	if (IS_ERR(d_tracer))
7233 		return NULL;
7234 
7235 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7236 
7237 	WARN_ONCE(!tr->percpu_dir,
7238 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7239 
7240 	return tr->percpu_dir;
7241 }
7242 
7243 static struct dentry *
7244 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7245 		      void *data, long cpu, const struct file_operations *fops)
7246 {
7247 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7248 
7249 	if (ret) /* See tracing_get_cpu() */
7250 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7251 	return ret;
7252 }
7253 
7254 static void
7255 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7256 {
7257 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7258 	struct dentry *d_cpu;
7259 	char cpu_dir[30]; /* 30 characters should be more than enough */
7260 
7261 	if (!d_percpu)
7262 		return;
7263 
7264 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7265 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7266 	if (!d_cpu) {
7267 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7268 		return;
7269 	}
7270 
7271 	/* per cpu trace_pipe */
7272 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7273 				tr, cpu, &tracing_pipe_fops);
7274 
7275 	/* per cpu trace */
7276 	trace_create_cpu_file("trace", 0644, d_cpu,
7277 				tr, cpu, &tracing_fops);
7278 
7279 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7280 				tr, cpu, &tracing_buffers_fops);
7281 
7282 	trace_create_cpu_file("stats", 0444, d_cpu,
7283 				tr, cpu, &tracing_stats_fops);
7284 
7285 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7286 				tr, cpu, &tracing_entries_fops);
7287 
7288 #ifdef CONFIG_TRACER_SNAPSHOT
7289 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7290 				tr, cpu, &snapshot_fops);
7291 
7292 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7293 				tr, cpu, &snapshot_raw_fops);
7294 #endif
7295 }
7296 
7297 #ifdef CONFIG_FTRACE_SELFTEST
7298 /* Let selftest have access to static functions in this file */
7299 #include "trace_selftest.c"
7300 #endif
7301 
7302 static ssize_t
7303 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7304 			loff_t *ppos)
7305 {
7306 	struct trace_option_dentry *topt = filp->private_data;
7307 	char *buf;
7308 
7309 	if (topt->flags->val & topt->opt->bit)
7310 		buf = "1\n";
7311 	else
7312 		buf = "0\n";
7313 
7314 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7315 }
7316 
7317 static ssize_t
7318 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7319 			 loff_t *ppos)
7320 {
7321 	struct trace_option_dentry *topt = filp->private_data;
7322 	unsigned long val;
7323 	int ret;
7324 
7325 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7326 	if (ret)
7327 		return ret;
7328 
7329 	if (val != 0 && val != 1)
7330 		return -EINVAL;
7331 
7332 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7333 		mutex_lock(&trace_types_lock);
7334 		ret = __set_tracer_option(topt->tr, topt->flags,
7335 					  topt->opt, !val);
7336 		mutex_unlock(&trace_types_lock);
7337 		if (ret)
7338 			return ret;
7339 	}
7340 
7341 	*ppos += cnt;
7342 
7343 	return cnt;
7344 }
7345 
7346 
7347 static const struct file_operations trace_options_fops = {
7348 	.open = tracing_open_generic,
7349 	.read = trace_options_read,
7350 	.write = trace_options_write,
7351 	.llseek	= generic_file_llseek,
7352 };
7353 
7354 /*
7355  * In order to pass in both the trace_array descriptor as well as the index
7356  * to the flag that the trace option file represents, the trace_array
7357  * has a character array of trace_flags_index[], which holds the index
7358  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7359  * The address of this character array is passed to the flag option file
7360  * read/write callbacks.
7361  *
7362  * In order to extract both the index and the trace_array descriptor,
7363  * get_tr_index() uses the following algorithm.
7364  *
7365  *   idx = *ptr;
7366  *
7367  * As the pointer itself contains the address of the index (remember
7368  * index[1] == 1).
7369  *
7370  * Then to get the trace_array descriptor, by subtracting that index
7371  * from the ptr, we get to the start of the index itself.
7372  *
7373  *   ptr - idx == &index[0]
7374  *
7375  * Then a simple container_of() from that pointer gets us to the
7376  * trace_array descriptor.
7377  */
7378 static void get_tr_index(void *data, struct trace_array **ptr,
7379 			 unsigned int *pindex)
7380 {
7381 	*pindex = *(unsigned char *)data;
7382 
7383 	*ptr = container_of(data - *pindex, struct trace_array,
7384 			    trace_flags_index);
7385 }
7386 
7387 static ssize_t
7388 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7389 			loff_t *ppos)
7390 {
7391 	void *tr_index = filp->private_data;
7392 	struct trace_array *tr;
7393 	unsigned int index;
7394 	char *buf;
7395 
7396 	get_tr_index(tr_index, &tr, &index);
7397 
7398 	if (tr->trace_flags & (1 << index))
7399 		buf = "1\n";
7400 	else
7401 		buf = "0\n";
7402 
7403 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7404 }
7405 
7406 static ssize_t
7407 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7408 			 loff_t *ppos)
7409 {
7410 	void *tr_index = filp->private_data;
7411 	struct trace_array *tr;
7412 	unsigned int index;
7413 	unsigned long val;
7414 	int ret;
7415 
7416 	get_tr_index(tr_index, &tr, &index);
7417 
7418 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7419 	if (ret)
7420 		return ret;
7421 
7422 	if (val != 0 && val != 1)
7423 		return -EINVAL;
7424 
7425 	mutex_lock(&trace_types_lock);
7426 	ret = set_tracer_flag(tr, 1 << index, val);
7427 	mutex_unlock(&trace_types_lock);
7428 
7429 	if (ret < 0)
7430 		return ret;
7431 
7432 	*ppos += cnt;
7433 
7434 	return cnt;
7435 }
7436 
7437 static const struct file_operations trace_options_core_fops = {
7438 	.open = tracing_open_generic,
7439 	.read = trace_options_core_read,
7440 	.write = trace_options_core_write,
7441 	.llseek = generic_file_llseek,
7442 };
7443 
7444 struct dentry *trace_create_file(const char *name,
7445 				 umode_t mode,
7446 				 struct dentry *parent,
7447 				 void *data,
7448 				 const struct file_operations *fops)
7449 {
7450 	struct dentry *ret;
7451 
7452 	ret = tracefs_create_file(name, mode, parent, data, fops);
7453 	if (!ret)
7454 		pr_warn("Could not create tracefs '%s' entry\n", name);
7455 
7456 	return ret;
7457 }
7458 
7459 
7460 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7461 {
7462 	struct dentry *d_tracer;
7463 
7464 	if (tr->options)
7465 		return tr->options;
7466 
7467 	d_tracer = tracing_get_dentry(tr);
7468 	if (IS_ERR(d_tracer))
7469 		return NULL;
7470 
7471 	tr->options = tracefs_create_dir("options", d_tracer);
7472 	if (!tr->options) {
7473 		pr_warn("Could not create tracefs directory 'options'\n");
7474 		return NULL;
7475 	}
7476 
7477 	return tr->options;
7478 }
7479 
7480 static void
7481 create_trace_option_file(struct trace_array *tr,
7482 			 struct trace_option_dentry *topt,
7483 			 struct tracer_flags *flags,
7484 			 struct tracer_opt *opt)
7485 {
7486 	struct dentry *t_options;
7487 
7488 	t_options = trace_options_init_dentry(tr);
7489 	if (!t_options)
7490 		return;
7491 
7492 	topt->flags = flags;
7493 	topt->opt = opt;
7494 	topt->tr = tr;
7495 
7496 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7497 				    &trace_options_fops);
7498 
7499 }
7500 
7501 static void
7502 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7503 {
7504 	struct trace_option_dentry *topts;
7505 	struct trace_options *tr_topts;
7506 	struct tracer_flags *flags;
7507 	struct tracer_opt *opts;
7508 	int cnt;
7509 	int i;
7510 
7511 	if (!tracer)
7512 		return;
7513 
7514 	flags = tracer->flags;
7515 
7516 	if (!flags || !flags->opts)
7517 		return;
7518 
7519 	/*
7520 	 * If this is an instance, only create flags for tracers
7521 	 * the instance may have.
7522 	 */
7523 	if (!trace_ok_for_array(tracer, tr))
7524 		return;
7525 
7526 	for (i = 0; i < tr->nr_topts; i++) {
7527 		/* Make sure there's no duplicate flags. */
7528 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7529 			return;
7530 	}
7531 
7532 	opts = flags->opts;
7533 
7534 	for (cnt = 0; opts[cnt].name; cnt++)
7535 		;
7536 
7537 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7538 	if (!topts)
7539 		return;
7540 
7541 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7542 			    GFP_KERNEL);
7543 	if (!tr_topts) {
7544 		kfree(topts);
7545 		return;
7546 	}
7547 
7548 	tr->topts = tr_topts;
7549 	tr->topts[tr->nr_topts].tracer = tracer;
7550 	tr->topts[tr->nr_topts].topts = topts;
7551 	tr->nr_topts++;
7552 
7553 	for (cnt = 0; opts[cnt].name; cnt++) {
7554 		create_trace_option_file(tr, &topts[cnt], flags,
7555 					 &opts[cnt]);
7556 		WARN_ONCE(topts[cnt].entry == NULL,
7557 			  "Failed to create trace option: %s",
7558 			  opts[cnt].name);
7559 	}
7560 }
7561 
7562 static struct dentry *
7563 create_trace_option_core_file(struct trace_array *tr,
7564 			      const char *option, long index)
7565 {
7566 	struct dentry *t_options;
7567 
7568 	t_options = trace_options_init_dentry(tr);
7569 	if (!t_options)
7570 		return NULL;
7571 
7572 	return trace_create_file(option, 0644, t_options,
7573 				 (void *)&tr->trace_flags_index[index],
7574 				 &trace_options_core_fops);
7575 }
7576 
7577 static void create_trace_options_dir(struct trace_array *tr)
7578 {
7579 	struct dentry *t_options;
7580 	bool top_level = tr == &global_trace;
7581 	int i;
7582 
7583 	t_options = trace_options_init_dentry(tr);
7584 	if (!t_options)
7585 		return;
7586 
7587 	for (i = 0; trace_options[i]; i++) {
7588 		if (top_level ||
7589 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7590 			create_trace_option_core_file(tr, trace_options[i], i);
7591 	}
7592 }
7593 
7594 static ssize_t
7595 rb_simple_read(struct file *filp, char __user *ubuf,
7596 	       size_t cnt, loff_t *ppos)
7597 {
7598 	struct trace_array *tr = filp->private_data;
7599 	char buf[64];
7600 	int r;
7601 
7602 	r = tracer_tracing_is_on(tr);
7603 	r = sprintf(buf, "%d\n", r);
7604 
7605 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7606 }
7607 
7608 static ssize_t
7609 rb_simple_write(struct file *filp, const char __user *ubuf,
7610 		size_t cnt, loff_t *ppos)
7611 {
7612 	struct trace_array *tr = filp->private_data;
7613 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7614 	unsigned long val;
7615 	int ret;
7616 
7617 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7618 	if (ret)
7619 		return ret;
7620 
7621 	if (buffer) {
7622 		mutex_lock(&trace_types_lock);
7623 		if (val) {
7624 			tracer_tracing_on(tr);
7625 			if (tr->current_trace->start)
7626 				tr->current_trace->start(tr);
7627 		} else {
7628 			tracer_tracing_off(tr);
7629 			if (tr->current_trace->stop)
7630 				tr->current_trace->stop(tr);
7631 		}
7632 		mutex_unlock(&trace_types_lock);
7633 	}
7634 
7635 	(*ppos)++;
7636 
7637 	return cnt;
7638 }
7639 
7640 static const struct file_operations rb_simple_fops = {
7641 	.open		= tracing_open_generic_tr,
7642 	.read		= rb_simple_read,
7643 	.write		= rb_simple_write,
7644 	.release	= tracing_release_generic_tr,
7645 	.llseek		= default_llseek,
7646 };
7647 
7648 struct dentry *trace_instance_dir;
7649 
7650 static void
7651 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7652 
7653 static int
7654 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7655 {
7656 	enum ring_buffer_flags rb_flags;
7657 
7658 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7659 
7660 	buf->tr = tr;
7661 
7662 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7663 	if (!buf->buffer)
7664 		return -ENOMEM;
7665 
7666 	buf->data = alloc_percpu(struct trace_array_cpu);
7667 	if (!buf->data) {
7668 		ring_buffer_free(buf->buffer);
7669 		buf->buffer = NULL;
7670 		return -ENOMEM;
7671 	}
7672 
7673 	/* Allocate the first page for all buffers */
7674 	set_buffer_entries(&tr->trace_buffer,
7675 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7676 
7677 	return 0;
7678 }
7679 
7680 static int allocate_trace_buffers(struct trace_array *tr, int size)
7681 {
7682 	int ret;
7683 
7684 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7685 	if (ret)
7686 		return ret;
7687 
7688 #ifdef CONFIG_TRACER_MAX_TRACE
7689 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7690 				    allocate_snapshot ? size : 1);
7691 	if (WARN_ON(ret)) {
7692 		ring_buffer_free(tr->trace_buffer.buffer);
7693 		tr->trace_buffer.buffer = NULL;
7694 		free_percpu(tr->trace_buffer.data);
7695 		tr->trace_buffer.data = NULL;
7696 		return -ENOMEM;
7697 	}
7698 	tr->allocated_snapshot = allocate_snapshot;
7699 
7700 	/*
7701 	 * Only the top level trace array gets its snapshot allocated
7702 	 * from the kernel command line.
7703 	 */
7704 	allocate_snapshot = false;
7705 #endif
7706 	return 0;
7707 }
7708 
7709 static void free_trace_buffer(struct trace_buffer *buf)
7710 {
7711 	if (buf->buffer) {
7712 		ring_buffer_free(buf->buffer);
7713 		buf->buffer = NULL;
7714 		free_percpu(buf->data);
7715 		buf->data = NULL;
7716 	}
7717 }
7718 
7719 static void free_trace_buffers(struct trace_array *tr)
7720 {
7721 	if (!tr)
7722 		return;
7723 
7724 	free_trace_buffer(&tr->trace_buffer);
7725 
7726 #ifdef CONFIG_TRACER_MAX_TRACE
7727 	free_trace_buffer(&tr->max_buffer);
7728 #endif
7729 }
7730 
7731 static void init_trace_flags_index(struct trace_array *tr)
7732 {
7733 	int i;
7734 
7735 	/* Used by the trace options files */
7736 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7737 		tr->trace_flags_index[i] = i;
7738 }
7739 
7740 static void __update_tracer_options(struct trace_array *tr)
7741 {
7742 	struct tracer *t;
7743 
7744 	for (t = trace_types; t; t = t->next)
7745 		add_tracer_options(tr, t);
7746 }
7747 
7748 static void update_tracer_options(struct trace_array *tr)
7749 {
7750 	mutex_lock(&trace_types_lock);
7751 	__update_tracer_options(tr);
7752 	mutex_unlock(&trace_types_lock);
7753 }
7754 
7755 static int instance_mkdir(const char *name)
7756 {
7757 	struct trace_array *tr;
7758 	int ret;
7759 
7760 	mutex_lock(&event_mutex);
7761 	mutex_lock(&trace_types_lock);
7762 
7763 	ret = -EEXIST;
7764 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7765 		if (tr->name && strcmp(tr->name, name) == 0)
7766 			goto out_unlock;
7767 	}
7768 
7769 	ret = -ENOMEM;
7770 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7771 	if (!tr)
7772 		goto out_unlock;
7773 
7774 	tr->name = kstrdup(name, GFP_KERNEL);
7775 	if (!tr->name)
7776 		goto out_free_tr;
7777 
7778 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7779 		goto out_free_tr;
7780 
7781 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7782 
7783 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7784 
7785 	raw_spin_lock_init(&tr->start_lock);
7786 
7787 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7788 
7789 	tr->current_trace = &nop_trace;
7790 
7791 	INIT_LIST_HEAD(&tr->systems);
7792 	INIT_LIST_HEAD(&tr->events);
7793 	INIT_LIST_HEAD(&tr->hist_vars);
7794 
7795 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7796 		goto out_free_tr;
7797 
7798 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7799 	if (!tr->dir)
7800 		goto out_free_tr;
7801 
7802 	ret = event_trace_add_tracer(tr->dir, tr);
7803 	if (ret) {
7804 		tracefs_remove_recursive(tr->dir);
7805 		goto out_free_tr;
7806 	}
7807 
7808 	ftrace_init_trace_array(tr);
7809 
7810 	init_tracer_tracefs(tr, tr->dir);
7811 	init_trace_flags_index(tr);
7812 	__update_tracer_options(tr);
7813 
7814 	list_add(&tr->list, &ftrace_trace_arrays);
7815 
7816 	mutex_unlock(&trace_types_lock);
7817 	mutex_unlock(&event_mutex);
7818 
7819 	return 0;
7820 
7821  out_free_tr:
7822 	free_trace_buffers(tr);
7823 	free_cpumask_var(tr->tracing_cpumask);
7824 	kfree(tr->name);
7825 	kfree(tr);
7826 
7827  out_unlock:
7828 	mutex_unlock(&trace_types_lock);
7829 	mutex_unlock(&event_mutex);
7830 
7831 	return ret;
7832 
7833 }
7834 
7835 static int instance_rmdir(const char *name)
7836 {
7837 	struct trace_array *tr;
7838 	int found = 0;
7839 	int ret;
7840 	int i;
7841 
7842 	mutex_lock(&event_mutex);
7843 	mutex_lock(&trace_types_lock);
7844 
7845 	ret = -ENODEV;
7846 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7847 		if (tr->name && strcmp(tr->name, name) == 0) {
7848 			found = 1;
7849 			break;
7850 		}
7851 	}
7852 	if (!found)
7853 		goto out_unlock;
7854 
7855 	ret = -EBUSY;
7856 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7857 		goto out_unlock;
7858 
7859 	list_del(&tr->list);
7860 
7861 	/* Disable all the flags that were enabled coming in */
7862 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7863 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7864 			set_tracer_flag(tr, 1 << i, 0);
7865 	}
7866 
7867 	tracing_set_nop(tr);
7868 	clear_ftrace_function_probes(tr);
7869 	event_trace_del_tracer(tr);
7870 	ftrace_clear_pids(tr);
7871 	ftrace_destroy_function_files(tr);
7872 	tracefs_remove_recursive(tr->dir);
7873 	free_trace_buffers(tr);
7874 
7875 	for (i = 0; i < tr->nr_topts; i++) {
7876 		kfree(tr->topts[i].topts);
7877 	}
7878 	kfree(tr->topts);
7879 
7880 	free_cpumask_var(tr->tracing_cpumask);
7881 	kfree(tr->name);
7882 	kfree(tr);
7883 
7884 	ret = 0;
7885 
7886  out_unlock:
7887 	mutex_unlock(&trace_types_lock);
7888 	mutex_unlock(&event_mutex);
7889 
7890 	return ret;
7891 }
7892 
7893 static __init void create_trace_instances(struct dentry *d_tracer)
7894 {
7895 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7896 							 instance_mkdir,
7897 							 instance_rmdir);
7898 	if (WARN_ON(!trace_instance_dir))
7899 		return;
7900 }
7901 
7902 static void
7903 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7904 {
7905 	struct trace_event_file *file;
7906 	int cpu;
7907 
7908 	trace_create_file("available_tracers", 0444, d_tracer,
7909 			tr, &show_traces_fops);
7910 
7911 	trace_create_file("current_tracer", 0644, d_tracer,
7912 			tr, &set_tracer_fops);
7913 
7914 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7915 			  tr, &tracing_cpumask_fops);
7916 
7917 	trace_create_file("trace_options", 0644, d_tracer,
7918 			  tr, &tracing_iter_fops);
7919 
7920 	trace_create_file("trace", 0644, d_tracer,
7921 			  tr, &tracing_fops);
7922 
7923 	trace_create_file("trace_pipe", 0444, d_tracer,
7924 			  tr, &tracing_pipe_fops);
7925 
7926 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7927 			  tr, &tracing_entries_fops);
7928 
7929 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7930 			  tr, &tracing_total_entries_fops);
7931 
7932 	trace_create_file("free_buffer", 0200, d_tracer,
7933 			  tr, &tracing_free_buffer_fops);
7934 
7935 	trace_create_file("trace_marker", 0220, d_tracer,
7936 			  tr, &tracing_mark_fops);
7937 
7938 	file = __find_event_file(tr, "ftrace", "print");
7939 	if (file && file->dir)
7940 		trace_create_file("trigger", 0644, file->dir, file,
7941 				  &event_trigger_fops);
7942 	tr->trace_marker_file = file;
7943 
7944 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7945 			  tr, &tracing_mark_raw_fops);
7946 
7947 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7948 			  &trace_clock_fops);
7949 
7950 	trace_create_file("tracing_on", 0644, d_tracer,
7951 			  tr, &rb_simple_fops);
7952 
7953 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7954 			  &trace_time_stamp_mode_fops);
7955 
7956 	create_trace_options_dir(tr);
7957 
7958 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7959 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7960 			&tr->max_latency, &tracing_max_lat_fops);
7961 #endif
7962 
7963 	if (ftrace_create_function_files(tr, d_tracer))
7964 		WARN(1, "Could not allocate function filter files");
7965 
7966 #ifdef CONFIG_TRACER_SNAPSHOT
7967 	trace_create_file("snapshot", 0644, d_tracer,
7968 			  tr, &snapshot_fops);
7969 #endif
7970 
7971 	for_each_tracing_cpu(cpu)
7972 		tracing_init_tracefs_percpu(tr, cpu);
7973 
7974 	ftrace_init_tracefs(tr, d_tracer);
7975 }
7976 
7977 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7978 {
7979 	struct vfsmount *mnt;
7980 	struct file_system_type *type;
7981 
7982 	/*
7983 	 * To maintain backward compatibility for tools that mount
7984 	 * debugfs to get to the tracing facility, tracefs is automatically
7985 	 * mounted to the debugfs/tracing directory.
7986 	 */
7987 	type = get_fs_type("tracefs");
7988 	if (!type)
7989 		return NULL;
7990 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7991 	put_filesystem(type);
7992 	if (IS_ERR(mnt))
7993 		return NULL;
7994 	mntget(mnt);
7995 
7996 	return mnt;
7997 }
7998 
7999 /**
8000  * tracing_init_dentry - initialize top level trace array
8001  *
8002  * This is called when creating files or directories in the tracing
8003  * directory. It is called via fs_initcall() by any of the boot up code
8004  * and expects to return the dentry of the top level tracing directory.
8005  */
8006 struct dentry *tracing_init_dentry(void)
8007 {
8008 	struct trace_array *tr = &global_trace;
8009 
8010 	/* The top level trace array uses  NULL as parent */
8011 	if (tr->dir)
8012 		return NULL;
8013 
8014 	if (WARN_ON(!tracefs_initialized()) ||
8015 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
8016 		 WARN_ON(!debugfs_initialized())))
8017 		return ERR_PTR(-ENODEV);
8018 
8019 	/*
8020 	 * As there may still be users that expect the tracing
8021 	 * files to exist in debugfs/tracing, we must automount
8022 	 * the tracefs file system there, so older tools still
8023 	 * work with the newer kerenl.
8024 	 */
8025 	tr->dir = debugfs_create_automount("tracing", NULL,
8026 					   trace_automount, NULL);
8027 	if (!tr->dir) {
8028 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
8029 		return ERR_PTR(-ENOMEM);
8030 	}
8031 
8032 	return NULL;
8033 }
8034 
8035 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8036 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8037 
8038 static void __init trace_eval_init(void)
8039 {
8040 	int len;
8041 
8042 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8043 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8044 }
8045 
8046 #ifdef CONFIG_MODULES
8047 static void trace_module_add_evals(struct module *mod)
8048 {
8049 	if (!mod->num_trace_evals)
8050 		return;
8051 
8052 	/*
8053 	 * Modules with bad taint do not have events created, do
8054 	 * not bother with enums either.
8055 	 */
8056 	if (trace_module_has_bad_taint(mod))
8057 		return;
8058 
8059 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8060 }
8061 
8062 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8063 static void trace_module_remove_evals(struct module *mod)
8064 {
8065 	union trace_eval_map_item *map;
8066 	union trace_eval_map_item **last = &trace_eval_maps;
8067 
8068 	if (!mod->num_trace_evals)
8069 		return;
8070 
8071 	mutex_lock(&trace_eval_mutex);
8072 
8073 	map = trace_eval_maps;
8074 
8075 	while (map) {
8076 		if (map->head.mod == mod)
8077 			break;
8078 		map = trace_eval_jmp_to_tail(map);
8079 		last = &map->tail.next;
8080 		map = map->tail.next;
8081 	}
8082 	if (!map)
8083 		goto out;
8084 
8085 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8086 	kfree(map);
8087  out:
8088 	mutex_unlock(&trace_eval_mutex);
8089 }
8090 #else
8091 static inline void trace_module_remove_evals(struct module *mod) { }
8092 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8093 
8094 static int trace_module_notify(struct notifier_block *self,
8095 			       unsigned long val, void *data)
8096 {
8097 	struct module *mod = data;
8098 
8099 	switch (val) {
8100 	case MODULE_STATE_COMING:
8101 		trace_module_add_evals(mod);
8102 		break;
8103 	case MODULE_STATE_GOING:
8104 		trace_module_remove_evals(mod);
8105 		break;
8106 	}
8107 
8108 	return 0;
8109 }
8110 
8111 static struct notifier_block trace_module_nb = {
8112 	.notifier_call = trace_module_notify,
8113 	.priority = 0,
8114 };
8115 #endif /* CONFIG_MODULES */
8116 
8117 static __init int tracer_init_tracefs(void)
8118 {
8119 	struct dentry *d_tracer;
8120 
8121 	trace_access_lock_init();
8122 
8123 	d_tracer = tracing_init_dentry();
8124 	if (IS_ERR(d_tracer))
8125 		return 0;
8126 
8127 	event_trace_init();
8128 
8129 	init_tracer_tracefs(&global_trace, d_tracer);
8130 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8131 
8132 	trace_create_file("tracing_thresh", 0644, d_tracer,
8133 			&global_trace, &tracing_thresh_fops);
8134 
8135 	trace_create_file("README", 0444, d_tracer,
8136 			NULL, &tracing_readme_fops);
8137 
8138 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8139 			NULL, &tracing_saved_cmdlines_fops);
8140 
8141 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8142 			  NULL, &tracing_saved_cmdlines_size_fops);
8143 
8144 	trace_create_file("saved_tgids", 0444, d_tracer,
8145 			NULL, &tracing_saved_tgids_fops);
8146 
8147 	trace_eval_init();
8148 
8149 	trace_create_eval_file(d_tracer);
8150 
8151 #ifdef CONFIG_MODULES
8152 	register_module_notifier(&trace_module_nb);
8153 #endif
8154 
8155 #ifdef CONFIG_DYNAMIC_FTRACE
8156 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8157 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8158 #endif
8159 
8160 	create_trace_instances(d_tracer);
8161 
8162 	update_tracer_options(&global_trace);
8163 
8164 	return 0;
8165 }
8166 
8167 static int trace_panic_handler(struct notifier_block *this,
8168 			       unsigned long event, void *unused)
8169 {
8170 	if (ftrace_dump_on_oops)
8171 		ftrace_dump(ftrace_dump_on_oops);
8172 	return NOTIFY_OK;
8173 }
8174 
8175 static struct notifier_block trace_panic_notifier = {
8176 	.notifier_call  = trace_panic_handler,
8177 	.next           = NULL,
8178 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8179 };
8180 
8181 static int trace_die_handler(struct notifier_block *self,
8182 			     unsigned long val,
8183 			     void *data)
8184 {
8185 	switch (val) {
8186 	case DIE_OOPS:
8187 		if (ftrace_dump_on_oops)
8188 			ftrace_dump(ftrace_dump_on_oops);
8189 		break;
8190 	default:
8191 		break;
8192 	}
8193 	return NOTIFY_OK;
8194 }
8195 
8196 static struct notifier_block trace_die_notifier = {
8197 	.notifier_call = trace_die_handler,
8198 	.priority = 200
8199 };
8200 
8201 /*
8202  * printk is set to max of 1024, we really don't need it that big.
8203  * Nothing should be printing 1000 characters anyway.
8204  */
8205 #define TRACE_MAX_PRINT		1000
8206 
8207 /*
8208  * Define here KERN_TRACE so that we have one place to modify
8209  * it if we decide to change what log level the ftrace dump
8210  * should be at.
8211  */
8212 #define KERN_TRACE		KERN_EMERG
8213 
8214 void
8215 trace_printk_seq(struct trace_seq *s)
8216 {
8217 	/* Probably should print a warning here. */
8218 	if (s->seq.len >= TRACE_MAX_PRINT)
8219 		s->seq.len = TRACE_MAX_PRINT;
8220 
8221 	/*
8222 	 * More paranoid code. Although the buffer size is set to
8223 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8224 	 * an extra layer of protection.
8225 	 */
8226 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8227 		s->seq.len = s->seq.size - 1;
8228 
8229 	/* should be zero ended, but we are paranoid. */
8230 	s->buffer[s->seq.len] = 0;
8231 
8232 	printk(KERN_TRACE "%s", s->buffer);
8233 
8234 	trace_seq_init(s);
8235 }
8236 
8237 void trace_init_global_iter(struct trace_iterator *iter)
8238 {
8239 	iter->tr = &global_trace;
8240 	iter->trace = iter->tr->current_trace;
8241 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8242 	iter->trace_buffer = &global_trace.trace_buffer;
8243 
8244 	if (iter->trace && iter->trace->open)
8245 		iter->trace->open(iter);
8246 
8247 	/* Annotate start of buffers if we had overruns */
8248 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8249 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8250 
8251 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8252 	if (trace_clocks[iter->tr->clock_id].in_ns)
8253 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8254 }
8255 
8256 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8257 {
8258 	/* use static because iter can be a bit big for the stack */
8259 	static struct trace_iterator iter;
8260 	static atomic_t dump_running;
8261 	struct trace_array *tr = &global_trace;
8262 	unsigned int old_userobj;
8263 	unsigned long flags;
8264 	int cnt = 0, cpu;
8265 
8266 	/* Only allow one dump user at a time. */
8267 	if (atomic_inc_return(&dump_running) != 1) {
8268 		atomic_dec(&dump_running);
8269 		return;
8270 	}
8271 
8272 	/*
8273 	 * Always turn off tracing when we dump.
8274 	 * We don't need to show trace output of what happens
8275 	 * between multiple crashes.
8276 	 *
8277 	 * If the user does a sysrq-z, then they can re-enable
8278 	 * tracing with echo 1 > tracing_on.
8279 	 */
8280 	tracing_off();
8281 
8282 	local_irq_save(flags);
8283 
8284 	/* Simulate the iterator */
8285 	trace_init_global_iter(&iter);
8286 
8287 	for_each_tracing_cpu(cpu) {
8288 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8289 	}
8290 
8291 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8292 
8293 	/* don't look at user memory in panic mode */
8294 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8295 
8296 	switch (oops_dump_mode) {
8297 	case DUMP_ALL:
8298 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8299 		break;
8300 	case DUMP_ORIG:
8301 		iter.cpu_file = raw_smp_processor_id();
8302 		break;
8303 	case DUMP_NONE:
8304 		goto out_enable;
8305 	default:
8306 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8307 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8308 	}
8309 
8310 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8311 
8312 	/* Did function tracer already get disabled? */
8313 	if (ftrace_is_dead()) {
8314 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8315 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8316 	}
8317 
8318 	/*
8319 	 * We need to stop all tracing on all CPUS to read the
8320 	 * the next buffer. This is a bit expensive, but is
8321 	 * not done often. We fill all what we can read,
8322 	 * and then release the locks again.
8323 	 */
8324 
8325 	while (!trace_empty(&iter)) {
8326 
8327 		if (!cnt)
8328 			printk(KERN_TRACE "---------------------------------\n");
8329 
8330 		cnt++;
8331 
8332 		/* reset all but tr, trace, and overruns */
8333 		memset(&iter.seq, 0,
8334 		       sizeof(struct trace_iterator) -
8335 		       offsetof(struct trace_iterator, seq));
8336 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8337 		iter.pos = -1;
8338 
8339 		if (trace_find_next_entry_inc(&iter) != NULL) {
8340 			int ret;
8341 
8342 			ret = print_trace_line(&iter);
8343 			if (ret != TRACE_TYPE_NO_CONSUME)
8344 				trace_consume(&iter);
8345 		}
8346 		touch_nmi_watchdog();
8347 
8348 		trace_printk_seq(&iter.seq);
8349 	}
8350 
8351 	if (!cnt)
8352 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8353 	else
8354 		printk(KERN_TRACE "---------------------------------\n");
8355 
8356  out_enable:
8357 	tr->trace_flags |= old_userobj;
8358 
8359 	for_each_tracing_cpu(cpu) {
8360 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8361 	}
8362  	atomic_dec(&dump_running);
8363 	local_irq_restore(flags);
8364 }
8365 EXPORT_SYMBOL_GPL(ftrace_dump);
8366 
8367 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8368 {
8369 	char **argv;
8370 	int argc, ret;
8371 
8372 	argc = 0;
8373 	ret = 0;
8374 	argv = argv_split(GFP_KERNEL, buf, &argc);
8375 	if (!argv)
8376 		return -ENOMEM;
8377 
8378 	if (argc)
8379 		ret = createfn(argc, argv);
8380 
8381 	argv_free(argv);
8382 
8383 	return ret;
8384 }
8385 
8386 #define WRITE_BUFSIZE  4096
8387 
8388 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8389 				size_t count, loff_t *ppos,
8390 				int (*createfn)(int, char **))
8391 {
8392 	char *kbuf, *buf, *tmp;
8393 	int ret = 0;
8394 	size_t done = 0;
8395 	size_t size;
8396 
8397 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8398 	if (!kbuf)
8399 		return -ENOMEM;
8400 
8401 	while (done < count) {
8402 		size = count - done;
8403 
8404 		if (size >= WRITE_BUFSIZE)
8405 			size = WRITE_BUFSIZE - 1;
8406 
8407 		if (copy_from_user(kbuf, buffer + done, size)) {
8408 			ret = -EFAULT;
8409 			goto out;
8410 		}
8411 		kbuf[size] = '\0';
8412 		buf = kbuf;
8413 		do {
8414 			tmp = strchr(buf, '\n');
8415 			if (tmp) {
8416 				*tmp = '\0';
8417 				size = tmp - buf + 1;
8418 			} else {
8419 				size = strlen(buf);
8420 				if (done + size < count) {
8421 					if (buf != kbuf)
8422 						break;
8423 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8424 					pr_warn("Line length is too long: Should be less than %d\n",
8425 						WRITE_BUFSIZE - 2);
8426 					ret = -EINVAL;
8427 					goto out;
8428 				}
8429 			}
8430 			done += size;
8431 
8432 			/* Remove comments */
8433 			tmp = strchr(buf, '#');
8434 
8435 			if (tmp)
8436 				*tmp = '\0';
8437 
8438 			ret = trace_run_command(buf, createfn);
8439 			if (ret)
8440 				goto out;
8441 			buf += size;
8442 
8443 		} while (done < count);
8444 	}
8445 	ret = done;
8446 
8447 out:
8448 	kfree(kbuf);
8449 
8450 	return ret;
8451 }
8452 
8453 __init static int tracer_alloc_buffers(void)
8454 {
8455 	int ring_buf_size;
8456 	int ret = -ENOMEM;
8457 
8458 	/*
8459 	 * Make sure we don't accidently add more trace options
8460 	 * than we have bits for.
8461 	 */
8462 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8463 
8464 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8465 		goto out;
8466 
8467 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8468 		goto out_free_buffer_mask;
8469 
8470 	/* Only allocate trace_printk buffers if a trace_printk exists */
8471 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8472 		/* Must be called before global_trace.buffer is allocated */
8473 		trace_printk_init_buffers();
8474 
8475 	/* To save memory, keep the ring buffer size to its minimum */
8476 	if (ring_buffer_expanded)
8477 		ring_buf_size = trace_buf_size;
8478 	else
8479 		ring_buf_size = 1;
8480 
8481 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8482 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8483 
8484 	raw_spin_lock_init(&global_trace.start_lock);
8485 
8486 	/*
8487 	 * The prepare callbacks allocates some memory for the ring buffer. We
8488 	 * don't free the buffer if the if the CPU goes down. If we were to free
8489 	 * the buffer, then the user would lose any trace that was in the
8490 	 * buffer. The memory will be removed once the "instance" is removed.
8491 	 */
8492 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8493 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8494 				      NULL);
8495 	if (ret < 0)
8496 		goto out_free_cpumask;
8497 	/* Used for event triggers */
8498 	ret = -ENOMEM;
8499 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8500 	if (!temp_buffer)
8501 		goto out_rm_hp_state;
8502 
8503 	if (trace_create_savedcmd() < 0)
8504 		goto out_free_temp_buffer;
8505 
8506 	/* TODO: make the number of buffers hot pluggable with CPUS */
8507 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8508 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8509 		WARN_ON(1);
8510 		goto out_free_savedcmd;
8511 	}
8512 
8513 	if (global_trace.buffer_disabled)
8514 		tracing_off();
8515 
8516 	if (trace_boot_clock) {
8517 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8518 		if (ret < 0)
8519 			pr_warn("Trace clock %s not defined, going back to default\n",
8520 				trace_boot_clock);
8521 	}
8522 
8523 	/*
8524 	 * register_tracer() might reference current_trace, so it
8525 	 * needs to be set before we register anything. This is
8526 	 * just a bootstrap of current_trace anyway.
8527 	 */
8528 	global_trace.current_trace = &nop_trace;
8529 
8530 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8531 
8532 	ftrace_init_global_array_ops(&global_trace);
8533 
8534 	init_trace_flags_index(&global_trace);
8535 
8536 	register_tracer(&nop_trace);
8537 
8538 	/* Function tracing may start here (via kernel command line) */
8539 	init_function_trace();
8540 
8541 	/* All seems OK, enable tracing */
8542 	tracing_disabled = 0;
8543 
8544 	atomic_notifier_chain_register(&panic_notifier_list,
8545 				       &trace_panic_notifier);
8546 
8547 	register_die_notifier(&trace_die_notifier);
8548 
8549 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8550 
8551 	INIT_LIST_HEAD(&global_trace.systems);
8552 	INIT_LIST_HEAD(&global_trace.events);
8553 	INIT_LIST_HEAD(&global_trace.hist_vars);
8554 	list_add(&global_trace.list, &ftrace_trace_arrays);
8555 
8556 	apply_trace_boot_options();
8557 
8558 	register_snapshot_cmd();
8559 
8560 	return 0;
8561 
8562 out_free_savedcmd:
8563 	free_saved_cmdlines_buffer(savedcmd);
8564 out_free_temp_buffer:
8565 	ring_buffer_free(temp_buffer);
8566 out_rm_hp_state:
8567 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8568 out_free_cpumask:
8569 	free_cpumask_var(global_trace.tracing_cpumask);
8570 out_free_buffer_mask:
8571 	free_cpumask_var(tracing_buffer_mask);
8572 out:
8573 	return ret;
8574 }
8575 
8576 void __init early_trace_init(void)
8577 {
8578 	if (tracepoint_printk) {
8579 		tracepoint_print_iter =
8580 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8581 		if (WARN_ON(!tracepoint_print_iter))
8582 			tracepoint_printk = 0;
8583 		else
8584 			static_key_enable(&tracepoint_printk_key.key);
8585 	}
8586 	tracer_alloc_buffers();
8587 }
8588 
8589 void __init trace_init(void)
8590 {
8591 	trace_event_init();
8592 }
8593 
8594 __init static int clear_boot_tracer(void)
8595 {
8596 	/*
8597 	 * The default tracer at boot buffer is an init section.
8598 	 * This function is called in lateinit. If we did not
8599 	 * find the boot tracer, then clear it out, to prevent
8600 	 * later registration from accessing the buffer that is
8601 	 * about to be freed.
8602 	 */
8603 	if (!default_bootup_tracer)
8604 		return 0;
8605 
8606 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8607 	       default_bootup_tracer);
8608 	default_bootup_tracer = NULL;
8609 
8610 	return 0;
8611 }
8612 
8613 fs_initcall(tracer_init_tracefs);
8614 late_initcall_sync(clear_boot_tracer);
8615 
8616 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8617 __init static int tracing_set_default_clock(void)
8618 {
8619 	/* sched_clock_stable() is determined in late_initcall */
8620 	if (!trace_boot_clock && !sched_clock_stable()) {
8621 		printk(KERN_WARNING
8622 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8623 		       "If you want to keep using the local clock, then add:\n"
8624 		       "  \"trace_clock=local\"\n"
8625 		       "on the kernel command line\n");
8626 		tracing_set_clock(&global_trace, "global");
8627 	}
8628 
8629 	return 0;
8630 }
8631 late_initcall_sync(tracing_set_default_clock);
8632 #endif
8633