xref: /linux-6.15/kernel/trace/trace.c (revision 54b3498d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
6  * Copyright (C) 2008 Ingo Molnar <[email protected]>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <[email protected]>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include "trace.h"
54 #include "trace_output.h"
55 
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61 
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136 
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138 
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141 
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145 	struct module			*mod;
146 	unsigned long			length;
147 };
148 
149 union trace_eval_map_item;
150 
151 struct trace_eval_map_tail {
152 	/*
153 	 * "end" is first and points to NULL as it must be different
154 	 * than "mod" or "eval_string"
155 	 */
156 	union trace_eval_map_item	*next;
157 	const char			*end;	/* points to NULL */
158 };
159 
160 static DEFINE_MUTEX(trace_eval_mutex);
161 
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170 	struct trace_eval_map		map;
171 	struct trace_eval_map_head	head;
172 	struct trace_eval_map_tail	tail;
173 };
174 
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177 
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 				   struct trace_buffer *buffer,
181 				   unsigned int trace_ctx);
182 
183 #define MAX_TRACER_SIZE		100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 
189 static int __init set_cmdline_ftrace(char *str)
190 {
191 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192 	default_bootup_tracer = bootup_tracer_buf;
193 	/* We are using ftrace early, expand it */
194 	ring_buffer_expanded = true;
195 	return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198 
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
202 		ftrace_dump_on_oops = DUMP_ALL;
203 		return 1;
204 	}
205 
206 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207 		ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210 
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214 
215 static int __init stop_trace_on_warning(char *str)
216 {
217 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218 		__disable_trace_on_warning = 1;
219 	return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222 
223 static int __init boot_alloc_snapshot(char *str)
224 {
225 	allocate_snapshot = true;
226 	/* We also need the main ring buffer expanded */
227 	ring_buffer_expanded = true;
228 	return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231 
232 
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234 
235 static int __init set_trace_boot_options(char *str)
236 {
237 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 	return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241 
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244 
245 static int __init set_trace_boot_clock(char *str)
246 {
247 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248 	trace_boot_clock = trace_boot_clock_buf;
249 	return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252 
253 static int __init set_tracepoint_printk(char *str)
254 {
255 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256 		tracepoint_printk = 1;
257 	return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260 
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263 	tracepoint_printk_stop_on_boot = true;
264 	return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267 
268 unsigned long long ns2usecs(u64 nsec)
269 {
270 	nsec += 500;
271 	do_div(nsec, 1000);
272 	return nsec;
273 }
274 
275 static void
276 trace_process_export(struct trace_export *export,
277 	       struct ring_buffer_event *event, int flag)
278 {
279 	struct trace_entry *entry;
280 	unsigned int size = 0;
281 
282 	if (export->flags & flag) {
283 		entry = ring_buffer_event_data(event);
284 		size = ring_buffer_event_length(event);
285 		export->write(export, entry, size);
286 	}
287 }
288 
289 static DEFINE_MUTEX(ftrace_export_lock);
290 
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292 
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296 
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299 	if (export->flags & TRACE_EXPORT_FUNCTION)
300 		static_branch_inc(&trace_function_exports_enabled);
301 
302 	if (export->flags & TRACE_EXPORT_EVENT)
303 		static_branch_inc(&trace_event_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_MARKER)
306 		static_branch_inc(&trace_marker_exports_enabled);
307 }
308 
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311 	if (export->flags & TRACE_EXPORT_FUNCTION)
312 		static_branch_dec(&trace_function_exports_enabled);
313 
314 	if (export->flags & TRACE_EXPORT_EVENT)
315 		static_branch_dec(&trace_event_exports_enabled);
316 
317 	if (export->flags & TRACE_EXPORT_MARKER)
318 		static_branch_dec(&trace_marker_exports_enabled);
319 }
320 
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323 	struct trace_export *export;
324 
325 	preempt_disable_notrace();
326 
327 	export = rcu_dereference_raw_check(ftrace_exports_list);
328 	while (export) {
329 		trace_process_export(export, event, flag);
330 		export = rcu_dereference_raw_check(export->next);
331 	}
332 
333 	preempt_enable_notrace();
334 }
335 
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339 	rcu_assign_pointer(export->next, *list);
340 	/*
341 	 * We are entering export into the list but another
342 	 * CPU might be walking that list. We need to make sure
343 	 * the export->next pointer is valid before another CPU sees
344 	 * the export pointer included into the list.
345 	 */
346 	rcu_assign_pointer(*list, export);
347 }
348 
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352 	struct trace_export **p;
353 
354 	for (p = list; *p != NULL; p = &(*p)->next)
355 		if (*p == export)
356 			break;
357 
358 	if (*p != export)
359 		return -1;
360 
361 	rcu_assign_pointer(*p, (*p)->next);
362 
363 	return 0;
364 }
365 
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369 	ftrace_exports_enable(export);
370 
371 	add_trace_export(list, export);
372 }
373 
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377 	int ret;
378 
379 	ret = rm_trace_export(list, export);
380 	ftrace_exports_disable(export);
381 
382 	return ret;
383 }
384 
385 int register_ftrace_export(struct trace_export *export)
386 {
387 	if (WARN_ON_ONCE(!export->write))
388 		return -1;
389 
390 	mutex_lock(&ftrace_export_lock);
391 
392 	add_ftrace_export(&ftrace_exports_list, export);
393 
394 	mutex_unlock(&ftrace_export_lock);
395 
396 	return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399 
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402 	int ret;
403 
404 	mutex_lock(&ftrace_export_lock);
405 
406 	ret = rm_ftrace_export(&ftrace_exports_list, export);
407 
408 	mutex_unlock(&ftrace_export_lock);
409 
410 	return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413 
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS						\
416 	(FUNCTION_DEFAULT_FLAGS |					\
417 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
418 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
419 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
420 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
421 	 TRACE_ITER_HASH_PTR)
422 
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
425 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426 
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430 
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436 	.trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438 
439 LIST_HEAD(ftrace_trace_arrays);
440 
441 int trace_array_get(struct trace_array *this_tr)
442 {
443 	struct trace_array *tr;
444 	int ret = -ENODEV;
445 
446 	mutex_lock(&trace_types_lock);
447 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448 		if (tr == this_tr) {
449 			tr->ref++;
450 			ret = 0;
451 			break;
452 		}
453 	}
454 	mutex_unlock(&trace_types_lock);
455 
456 	return ret;
457 }
458 
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461 	WARN_ON(!this_tr->ref);
462 	this_tr->ref--;
463 }
464 
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476 	if (!this_tr)
477 		return;
478 
479 	mutex_lock(&trace_types_lock);
480 	__trace_array_put(this_tr);
481 	mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484 
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487 	int ret;
488 
489 	ret = security_locked_down(LOCKDOWN_TRACEFS);
490 	if (ret)
491 		return ret;
492 
493 	if (tracing_disabled)
494 		return -ENODEV;
495 
496 	if (tr && trace_array_get(tr) < 0)
497 		return -ENODEV;
498 
499 	return 0;
500 }
501 
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503 			      struct trace_buffer *buffer,
504 			      struct ring_buffer_event *event)
505 {
506 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507 	    !filter_match_preds(call->filter, rec)) {
508 		__trace_event_discard_commit(buffer, event);
509 		return 1;
510 	}
511 
512 	return 0;
513 }
514 
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517 	vfree(pid_list->pids);
518 	kfree(pid_list);
519 }
520 
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 	/*
532 	 * If pid_max changed after filtered_pids was created, we
533 	 * by default ignore all pids greater than the previous pid_max.
534 	 */
535 	if (search_pid >= filtered_pids->pid_max)
536 		return false;
537 
538 	return test_bit(search_pid, filtered_pids->pids);
539 }
540 
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553 		       struct trace_pid_list *filtered_no_pids,
554 		       struct task_struct *task)
555 {
556 	/*
557 	 * If filtered_no_pids is not empty, and the task's pid is listed
558 	 * in filtered_no_pids, then return true.
559 	 * Otherwise, if filtered_pids is empty, that means we can
560 	 * trace all tasks. If it has content, then only trace pids
561 	 * within filtered_pids.
562 	 */
563 
564 	return (filtered_pids &&
565 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
566 		(filtered_no_pids &&
567 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569 
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583 				  struct task_struct *self,
584 				  struct task_struct *task)
585 {
586 	if (!pid_list)
587 		return;
588 
589 	/* For forks, we only add if the forking task is listed */
590 	if (self) {
591 		if (!trace_find_filtered_pid(pid_list, self->pid))
592 			return;
593 	}
594 
595 	/* Sorry, but we don't support pid_max changing after setting */
596 	if (task->pid >= pid_list->pid_max)
597 		return;
598 
599 	/* "self" is set for forks, and NULL for exits */
600 	if (self)
601 		set_bit(task->pid, pid_list->pids);
602 	else
603 		clear_bit(task->pid, pid_list->pids);
604 }
605 
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620 	unsigned long pid = (unsigned long)v;
621 
622 	(*pos)++;
623 
624 	/* pid already is +1 of the actual previous bit */
625 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626 
627 	/* Return pid + 1 to allow zero to be represented */
628 	if (pid < pid_list->pid_max)
629 		return (void *)(pid + 1);
630 
631 	return NULL;
632 }
633 
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647 	unsigned long pid;
648 	loff_t l = 0;
649 
650 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651 	if (pid >= pid_list->pid_max)
652 		return NULL;
653 
654 	/* Return pid + 1 so that zero can be the exit value */
655 	for (pid++; pid && l < *pos;
656 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657 		;
658 	return (void *)pid;
659 }
660 
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671 	unsigned long pid = (unsigned long)v - 1;
672 
673 	seq_printf(m, "%lu\n", pid);
674 	return 0;
675 }
676 
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE		127
679 
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 		    struct trace_pid_list **new_pid_list,
682 		    const char __user *ubuf, size_t cnt)
683 {
684 	struct trace_pid_list *pid_list;
685 	struct trace_parser parser;
686 	unsigned long val;
687 	int nr_pids = 0;
688 	ssize_t read = 0;
689 	ssize_t ret = 0;
690 	loff_t pos;
691 	pid_t pid;
692 
693 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694 		return -ENOMEM;
695 
696 	/*
697 	 * Always recreate a new array. The write is an all or nothing
698 	 * operation. Always create a new array when adding new pids by
699 	 * the user. If the operation fails, then the current list is
700 	 * not modified.
701 	 */
702 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703 	if (!pid_list) {
704 		trace_parser_put(&parser);
705 		return -ENOMEM;
706 	}
707 
708 	pid_list->pid_max = READ_ONCE(pid_max);
709 
710 	/* Only truncating will shrink pid_max */
711 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712 		pid_list->pid_max = filtered_pids->pid_max;
713 
714 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715 	if (!pid_list->pids) {
716 		trace_parser_put(&parser);
717 		kfree(pid_list);
718 		return -ENOMEM;
719 	}
720 
721 	if (filtered_pids) {
722 		/* copy the current bits to the new max */
723 		for_each_set_bit(pid, filtered_pids->pids,
724 				 filtered_pids->pid_max) {
725 			set_bit(pid, pid_list->pids);
726 			nr_pids++;
727 		}
728 	}
729 
730 	while (cnt > 0) {
731 
732 		pos = 0;
733 
734 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
735 		if (ret < 0 || !trace_parser_loaded(&parser))
736 			break;
737 
738 		read += ret;
739 		ubuf += ret;
740 		cnt -= ret;
741 
742 		ret = -EINVAL;
743 		if (kstrtoul(parser.buffer, 0, &val))
744 			break;
745 		if (val >= pid_list->pid_max)
746 			break;
747 
748 		pid = (pid_t)val;
749 
750 		set_bit(pid, pid_list->pids);
751 		nr_pids++;
752 
753 		trace_parser_clear(&parser);
754 		ret = 0;
755 	}
756 	trace_parser_put(&parser);
757 
758 	if (ret < 0) {
759 		trace_free_pid_list(pid_list);
760 		return ret;
761 	}
762 
763 	if (!nr_pids) {
764 		/* Cleared the list of pids */
765 		trace_free_pid_list(pid_list);
766 		read = ret;
767 		pid_list = NULL;
768 	}
769 
770 	*new_pid_list = pid_list;
771 
772 	return read;
773 }
774 
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777 	u64 ts;
778 
779 	/* Early boot up does not have a buffer yet */
780 	if (!buf->buffer)
781 		return trace_clock_local();
782 
783 	ts = ring_buffer_time_stamp(buf->buffer);
784 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785 
786 	return ts;
787 }
788 
789 u64 ftrace_now(int cpu)
790 {
791 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793 
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805 	/*
806 	 * For quick access (irqsoff uses this in fast path), just
807 	 * return the mirror variable of the state of the ring buffer.
808 	 * It's a little racy, but we don't really care.
809 	 */
810 	smp_rmb();
811 	return !global_trace.buffer_disabled;
812 }
813 
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
825 
826 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827 
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer		*trace_types __read_mostly;
830 
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835 
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857 
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861 
862 static inline void trace_access_lock(int cpu)
863 {
864 	if (cpu == RING_BUFFER_ALL_CPUS) {
865 		/* gain it for accessing the whole ring buffer. */
866 		down_write(&all_cpu_access_lock);
867 	} else {
868 		/* gain it for accessing a cpu ring buffer. */
869 
870 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871 		down_read(&all_cpu_access_lock);
872 
873 		/* Secondly block other access to this @cpu ring buffer. */
874 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
875 	}
876 }
877 
878 static inline void trace_access_unlock(int cpu)
879 {
880 	if (cpu == RING_BUFFER_ALL_CPUS) {
881 		up_write(&all_cpu_access_lock);
882 	} else {
883 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884 		up_read(&all_cpu_access_lock);
885 	}
886 }
887 
888 static inline void trace_access_lock_init(void)
889 {
890 	int cpu;
891 
892 	for_each_possible_cpu(cpu)
893 		mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895 
896 #else
897 
898 static DEFINE_MUTEX(access_lock);
899 
900 static inline void trace_access_lock(int cpu)
901 {
902 	(void)cpu;
903 	mutex_lock(&access_lock);
904 }
905 
906 static inline void trace_access_unlock(int cpu)
907 {
908 	(void)cpu;
909 	mutex_unlock(&access_lock);
910 }
911 
912 static inline void trace_access_lock_init(void)
913 {
914 }
915 
916 #endif
917 
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920 				 unsigned int trace_ctx,
921 				 int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923 				      struct trace_buffer *buffer,
924 				      unsigned int trace_ctx,
925 				      int skip, struct pt_regs *regs);
926 
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929 					unsigned int trace_ctx,
930 					int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934 				      struct trace_buffer *buffer,
935 				      unsigned long trace_ctx,
936 				      int skip, struct pt_regs *regs)
937 {
938 }
939 
940 #endif
941 
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944 		  int type, unsigned int trace_ctx)
945 {
946 	struct trace_entry *ent = ring_buffer_event_data(event);
947 
948 	tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950 
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953 			  int type,
954 			  unsigned long len,
955 			  unsigned int trace_ctx)
956 {
957 	struct ring_buffer_event *event;
958 
959 	event = ring_buffer_lock_reserve(buffer, len);
960 	if (event != NULL)
961 		trace_event_setup(event, type, trace_ctx);
962 
963 	return event;
964 }
965 
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968 	if (tr->array_buffer.buffer)
969 		ring_buffer_record_on(tr->array_buffer.buffer);
970 	/*
971 	 * This flag is looked at when buffers haven't been allocated
972 	 * yet, or by some tracers (like irqsoff), that just want to
973 	 * know if the ring buffer has been disabled, but it can handle
974 	 * races of where it gets disabled but we still do a record.
975 	 * As the check is in the fast path of the tracers, it is more
976 	 * important to be fast than accurate.
977 	 */
978 	tr->buffer_disabled = 0;
979 	/* Make the flag seen by readers */
980 	smp_wmb();
981 }
982 
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991 	tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994 
995 
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999 	__this_cpu_write(trace_taskinfo_save, true);
1000 
1001 	/* If this is the temp buffer, we need to commit fully */
1002 	if (this_cpu_read(trace_buffered_event) == event) {
1003 		/* Length is in event->array[0] */
1004 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005 		/* Release the temp buffer */
1006 		this_cpu_dec(trace_buffered_event_cnt);
1007 	} else
1008 		ring_buffer_unlock_commit(buffer, event);
1009 }
1010 
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:	   The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019 	struct ring_buffer_event *event;
1020 	struct trace_buffer *buffer;
1021 	struct print_entry *entry;
1022 	unsigned int trace_ctx;
1023 	int alloc;
1024 
1025 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026 		return 0;
1027 
1028 	if (unlikely(tracing_selftest_running || tracing_disabled))
1029 		return 0;
1030 
1031 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032 
1033 	trace_ctx = tracing_gen_ctx();
1034 	buffer = global_trace.array_buffer.buffer;
1035 	ring_buffer_nest_start(buffer);
1036 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037 					    trace_ctx);
1038 	if (!event) {
1039 		size = 0;
1040 		goto out;
1041 	}
1042 
1043 	entry = ring_buffer_event_data(event);
1044 	entry->ip = ip;
1045 
1046 	memcpy(&entry->buf, str, size);
1047 
1048 	/* Add a newline if necessary */
1049 	if (entry->buf[size - 1] != '\n') {
1050 		entry->buf[size] = '\n';
1051 		entry->buf[size + 1] = '\0';
1052 	} else
1053 		entry->buf[size] = '\0';
1054 
1055 	__buffer_unlock_commit(buffer, event);
1056 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058 	ring_buffer_nest_end(buffer);
1059 	return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062 
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:	   The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070 	struct ring_buffer_event *event;
1071 	struct trace_buffer *buffer;
1072 	struct bputs_entry *entry;
1073 	unsigned int trace_ctx;
1074 	int size = sizeof(struct bputs_entry);
1075 	int ret = 0;
1076 
1077 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078 		return 0;
1079 
1080 	if (unlikely(tracing_selftest_running || tracing_disabled))
1081 		return 0;
1082 
1083 	trace_ctx = tracing_gen_ctx();
1084 	buffer = global_trace.array_buffer.buffer;
1085 
1086 	ring_buffer_nest_start(buffer);
1087 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088 					    trace_ctx);
1089 	if (!event)
1090 		goto out;
1091 
1092 	entry = ring_buffer_event_data(event);
1093 	entry->ip			= ip;
1094 	entry->str			= str;
1095 
1096 	__buffer_unlock_commit(buffer, event);
1097 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098 
1099 	ret = 1;
1100  out:
1101 	ring_buffer_nest_end(buffer);
1102 	return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105 
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108 					   void *cond_data)
1109 {
1110 	struct tracer *tracer = tr->current_trace;
1111 	unsigned long flags;
1112 
1113 	if (in_nmi()) {
1114 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1116 		return;
1117 	}
1118 
1119 	if (!tr->allocated_snapshot) {
1120 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121 		internal_trace_puts("*** stopping trace here!   ***\n");
1122 		tracing_off();
1123 		return;
1124 	}
1125 
1126 	/* Note, snapshot can not be used when the tracer uses it */
1127 	if (tracer->use_max_tr) {
1128 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130 		return;
1131 	}
1132 
1133 	local_irq_save(flags);
1134 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1135 	local_irq_restore(flags);
1136 }
1137 
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140 	tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142 
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159 	struct trace_array *tr = &global_trace;
1160 
1161 	tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164 
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:		The tracing instance to snapshot
1168  * @cond_data:	The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180 	tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183 
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:		The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200 	void *cond_data = NULL;
1201 
1202 	arch_spin_lock(&tr->max_lock);
1203 
1204 	if (tr->cond_snapshot)
1205 		cond_data = tr->cond_snapshot->cond_data;
1206 
1207 	arch_spin_unlock(&tr->max_lock);
1208 
1209 	return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212 
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 					struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216 
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 	int ret;
1220 
1221 	if (!tr->allocated_snapshot) {
1222 
1223 		/* allocate spare buffer */
1224 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 		if (ret < 0)
1227 			return ret;
1228 
1229 		tr->allocated_snapshot = true;
1230 	}
1231 
1232 	return 0;
1233 }
1234 
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 	/*
1238 	 * We don't free the ring buffer. instead, resize it because
1239 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 	 * we want preserve it.
1241 	 */
1242 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 	set_buffer_entries(&tr->max_buffer, 1);
1244 	tracing_reset_online_cpus(&tr->max_buffer);
1245 	tr->allocated_snapshot = false;
1246 }
1247 
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260 	struct trace_array *tr = &global_trace;
1261 	int ret;
1262 
1263 	ret = tracing_alloc_snapshot_instance(tr);
1264 	WARN_ON(ret < 0);
1265 
1266 	return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269 
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283 	int ret;
1284 
1285 	ret = tracing_alloc_snapshot();
1286 	if (ret < 0)
1287 		return;
1288 
1289 	tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292 
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:		The tracing instance
1296  * @cond_data:	User data to associate with the snapshot
1297  * @update:	Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 				 cond_update_fn_t update)
1308 {
1309 	struct cond_snapshot *cond_snapshot;
1310 	int ret = 0;
1311 
1312 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 	if (!cond_snapshot)
1314 		return -ENOMEM;
1315 
1316 	cond_snapshot->cond_data = cond_data;
1317 	cond_snapshot->update = update;
1318 
1319 	mutex_lock(&trace_types_lock);
1320 
1321 	ret = tracing_alloc_snapshot_instance(tr);
1322 	if (ret)
1323 		goto fail_unlock;
1324 
1325 	if (tr->current_trace->use_max_tr) {
1326 		ret = -EBUSY;
1327 		goto fail_unlock;
1328 	}
1329 
1330 	/*
1331 	 * The cond_snapshot can only change to NULL without the
1332 	 * trace_types_lock. We don't care if we race with it going
1333 	 * to NULL, but we want to make sure that it's not set to
1334 	 * something other than NULL when we get here, which we can
1335 	 * do safely with only holding the trace_types_lock and not
1336 	 * having to take the max_lock.
1337 	 */
1338 	if (tr->cond_snapshot) {
1339 		ret = -EBUSY;
1340 		goto fail_unlock;
1341 	}
1342 
1343 	arch_spin_lock(&tr->max_lock);
1344 	tr->cond_snapshot = cond_snapshot;
1345 	arch_spin_unlock(&tr->max_lock);
1346 
1347 	mutex_unlock(&trace_types_lock);
1348 
1349 	return ret;
1350 
1351  fail_unlock:
1352 	mutex_unlock(&trace_types_lock);
1353 	kfree(cond_snapshot);
1354 	return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357 
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:		The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370 	int ret = 0;
1371 
1372 	arch_spin_lock(&tr->max_lock);
1373 
1374 	if (!tr->cond_snapshot)
1375 		ret = -EINVAL;
1376 	else {
1377 		kfree(tr->cond_snapshot);
1378 		tr->cond_snapshot = NULL;
1379 	}
1380 
1381 	arch_spin_unlock(&tr->max_lock);
1382 
1383 	return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400 	return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405 	/* Give warning */
1406 	tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411 	return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416 	return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421 	return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425 
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428 	if (tr->array_buffer.buffer)
1429 		ring_buffer_record_off(tr->array_buffer.buffer);
1430 	/*
1431 	 * This flag is looked at when buffers haven't been allocated
1432 	 * yet, or by some tracers (like irqsoff), that just want to
1433 	 * know if the ring buffer has been disabled, but it can handle
1434 	 * races of where it gets disabled but we still do a record.
1435 	 * As the check is in the fast path of the tracers, it is more
1436 	 * important to be fast than accurate.
1437 	 */
1438 	tr->buffer_disabled = 1;
1439 	/* Make the flag seen by readers */
1440 	smp_wmb();
1441 }
1442 
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453 	tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456 
1457 void disable_trace_on_warning(void)
1458 {
1459 	if (__disable_trace_on_warning) {
1460 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 			"Disabling tracing due to warning\n");
1462 		tracing_off();
1463 	}
1464 }
1465 
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474 	if (tr->array_buffer.buffer)
1475 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 	return !tr->buffer_disabled;
1477 }
1478 
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484 	return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487 
1488 static int __init set_buf_size(char *str)
1489 {
1490 	unsigned long buf_size;
1491 
1492 	if (!str)
1493 		return 0;
1494 	buf_size = memparse(str, &str);
1495 	/* nr_entries can not be zero */
1496 	if (buf_size == 0)
1497 		return 0;
1498 	trace_buf_size = buf_size;
1499 	return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502 
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 	unsigned long threshold;
1506 	int ret;
1507 
1508 	if (!str)
1509 		return 0;
1510 	ret = kstrtoul(str, 0, &threshold);
1511 	if (ret < 0)
1512 		return 0;
1513 	tracing_thresh = threshold * 1000;
1514 	return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517 
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 	return nsecs / 1000;
1521 }
1522 
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531 
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 	TRACE_FLAGS
1535 	NULL
1536 };
1537 
1538 static struct {
1539 	u64 (*func)(void);
1540 	const char *name;
1541 	int in_ns;		/* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 	{ trace_clock_local,		"local",	1 },
1544 	{ trace_clock_global,		"global",	1 },
1545 	{ trace_clock_counter,		"counter",	0 },
1546 	{ trace_clock_jiffies,		"uptime",	0 },
1547 	{ trace_clock,			"perf",		1 },
1548 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1549 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1550 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1551 	ARCH_TRACE_CLOCKS
1552 };
1553 
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 	if (trace_clocks[tr->clock_id].in_ns)
1557 		return true;
1558 
1559 	return false;
1560 }
1561 
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 	memset(parser, 0, sizeof(*parser));
1568 
1569 	parser->buffer = kmalloc(size, GFP_KERNEL);
1570 	if (!parser->buffer)
1571 		return 1;
1572 
1573 	parser->size = size;
1574 	return 0;
1575 }
1576 
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 	kfree(parser->buffer);
1583 	parser->buffer = NULL;
1584 }
1585 
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 	size_t cnt, loff_t *ppos)
1599 {
1600 	char ch;
1601 	size_t read = 0;
1602 	ssize_t ret;
1603 
1604 	if (!*ppos)
1605 		trace_parser_clear(parser);
1606 
1607 	ret = get_user(ch, ubuf++);
1608 	if (ret)
1609 		goto out;
1610 
1611 	read++;
1612 	cnt--;
1613 
1614 	/*
1615 	 * The parser is not finished with the last write,
1616 	 * continue reading the user input without skipping spaces.
1617 	 */
1618 	if (!parser->cont) {
1619 		/* skip white space */
1620 		while (cnt && isspace(ch)) {
1621 			ret = get_user(ch, ubuf++);
1622 			if (ret)
1623 				goto out;
1624 			read++;
1625 			cnt--;
1626 		}
1627 
1628 		parser->idx = 0;
1629 
1630 		/* only spaces were written */
1631 		if (isspace(ch) || !ch) {
1632 			*ppos += read;
1633 			ret = read;
1634 			goto out;
1635 		}
1636 	}
1637 
1638 	/* read the non-space input */
1639 	while (cnt && !isspace(ch) && ch) {
1640 		if (parser->idx < parser->size - 1)
1641 			parser->buffer[parser->idx++] = ch;
1642 		else {
1643 			ret = -EINVAL;
1644 			goto out;
1645 		}
1646 		ret = get_user(ch, ubuf++);
1647 		if (ret)
1648 			goto out;
1649 		read++;
1650 		cnt--;
1651 	}
1652 
1653 	/* We either got finished input or we have to wait for another call. */
1654 	if (isspace(ch) || !ch) {
1655 		parser->buffer[parser->idx] = 0;
1656 		parser->cont = false;
1657 	} else if (parser->idx < parser->size - 1) {
1658 		parser->cont = true;
1659 		parser->buffer[parser->idx++] = ch;
1660 		/* Make sure the parsed string always terminates with '\0'. */
1661 		parser->buffer[parser->idx] = 0;
1662 	} else {
1663 		ret = -EINVAL;
1664 		goto out;
1665 	}
1666 
1667 	*ppos += read;
1668 	ret = read;
1669 
1670 out:
1671 	return ret;
1672 }
1673 
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 	int len;
1678 
1679 	if (trace_seq_used(s) <= s->seq.readpos)
1680 		return -EBUSY;
1681 
1682 	len = trace_seq_used(s) - s->seq.readpos;
1683 	if (cnt > len)
1684 		cnt = len;
1685 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686 
1687 	s->seq.readpos += cnt;
1688 	return cnt;
1689 }
1690 
1691 unsigned long __read_mostly	tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693 
1694 #ifdef LATENCY_FS_NOTIFY
1695 
1696 static struct workqueue_struct *fsnotify_wq;
1697 
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700 	struct trace_array *tr = container_of(work, struct trace_array,
1701 					      fsnotify_work);
1702 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704 
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707 	struct trace_array *tr = container_of(iwork, struct trace_array,
1708 					      fsnotify_irqwork);
1709 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711 
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713 				     struct dentry *d_tracer)
1714 {
1715 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718 					      d_tracer, &tr->max_latency,
1719 					      &tracing_max_lat_fops);
1720 }
1721 
1722 __init static int latency_fsnotify_init(void)
1723 {
1724 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726 	if (!fsnotify_wq) {
1727 		pr_err("Unable to allocate tr_max_lat_wq\n");
1728 		return -ENOMEM;
1729 	}
1730 	return 0;
1731 }
1732 
1733 late_initcall_sync(latency_fsnotify_init);
1734 
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737 	if (!fsnotify_wq)
1738 		return;
1739 	/*
1740 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741 	 * possible that we are called from __schedule() or do_idle(), which
1742 	 * could cause a deadlock.
1743 	 */
1744 	irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746 
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752 
1753 #define trace_create_maxlat_file(tr, d_tracer)				\
1754 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1755 			  &tr->max_latency, &tracing_max_lat_fops)
1756 
1757 #endif
1758 
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768 	struct array_buffer *trace_buf = &tr->array_buffer;
1769 	struct array_buffer *max_buf = &tr->max_buffer;
1770 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772 
1773 	max_buf->cpu = cpu;
1774 	max_buf->time_start = data->preempt_timestamp;
1775 
1776 	max_data->saved_latency = tr->max_latency;
1777 	max_data->critical_start = data->critical_start;
1778 	max_data->critical_end = data->critical_end;
1779 
1780 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781 	max_data->pid = tsk->pid;
1782 	/*
1783 	 * If tsk == current, then use current_uid(), as that does not use
1784 	 * RCU. The irq tracer can be called out of RCU scope.
1785 	 */
1786 	if (tsk == current)
1787 		max_data->uid = current_uid();
1788 	else
1789 		max_data->uid = task_uid(tsk);
1790 
1791 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792 	max_data->policy = tsk->policy;
1793 	max_data->rt_priority = tsk->rt_priority;
1794 
1795 	/* record this tasks comm */
1796 	tracing_record_cmdline(tsk);
1797 	latency_fsnotify(tr);
1798 }
1799 
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812 	      void *cond_data)
1813 {
1814 	if (tr->stop_count)
1815 		return;
1816 
1817 	WARN_ON_ONCE(!irqs_disabled());
1818 
1819 	if (!tr->allocated_snapshot) {
1820 		/* Only the nop tracer should hit this when disabling */
1821 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822 		return;
1823 	}
1824 
1825 	arch_spin_lock(&tr->max_lock);
1826 
1827 	/* Inherit the recordable setting from array_buffer */
1828 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829 		ring_buffer_record_on(tr->max_buffer.buffer);
1830 	else
1831 		ring_buffer_record_off(tr->max_buffer.buffer);
1832 
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835 		goto out_unlock;
1836 #endif
1837 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838 
1839 	__update_max_tr(tr, tsk, cpu);
1840 
1841  out_unlock:
1842 	arch_spin_unlock(&tr->max_lock);
1843 }
1844 
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856 	int ret;
1857 
1858 	if (tr->stop_count)
1859 		return;
1860 
1861 	WARN_ON_ONCE(!irqs_disabled());
1862 	if (!tr->allocated_snapshot) {
1863 		/* Only the nop tracer should hit this when disabling */
1864 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865 		return;
1866 	}
1867 
1868 	arch_spin_lock(&tr->max_lock);
1869 
1870 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871 
1872 	if (ret == -EBUSY) {
1873 		/*
1874 		 * We failed to swap the buffer due to a commit taking
1875 		 * place on this CPU. We fail to record, but we reset
1876 		 * the max trace buffer (no one writes directly to it)
1877 		 * and flag that it failed.
1878 		 */
1879 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880 			"Failed to swap buffers due to commit in progress\n");
1881 	}
1882 
1883 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884 
1885 	__update_max_tr(tr, tsk, cpu);
1886 	arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889 
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892 	/* Iterators are static, they should be filled or empty */
1893 	if (trace_buffer_iter(iter, iter->cpu_file))
1894 		return 0;
1895 
1896 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897 				full);
1898 }
1899 
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902 
1903 struct trace_selftests {
1904 	struct list_head		list;
1905 	struct tracer			*type;
1906 };
1907 
1908 static LIST_HEAD(postponed_selftests);
1909 
1910 static int save_selftest(struct tracer *type)
1911 {
1912 	struct trace_selftests *selftest;
1913 
1914 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915 	if (!selftest)
1916 		return -ENOMEM;
1917 
1918 	selftest->type = type;
1919 	list_add(&selftest->list, &postponed_selftests);
1920 	return 0;
1921 }
1922 
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925 	struct trace_array *tr = &global_trace;
1926 	struct tracer *saved_tracer = tr->current_trace;
1927 	int ret;
1928 
1929 	if (!type->selftest || tracing_selftest_disabled)
1930 		return 0;
1931 
1932 	/*
1933 	 * If a tracer registers early in boot up (before scheduling is
1934 	 * initialized and such), then do not run its selftests yet.
1935 	 * Instead, run it a little later in the boot process.
1936 	 */
1937 	if (!selftests_can_run)
1938 		return save_selftest(type);
1939 
1940 	if (!tracing_is_on()) {
1941 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942 			type->name);
1943 		return 0;
1944 	}
1945 
1946 	/*
1947 	 * Run a selftest on this tracer.
1948 	 * Here we reset the trace buffer, and set the current
1949 	 * tracer to be this tracer. The tracer can then run some
1950 	 * internal tracing to verify that everything is in order.
1951 	 * If we fail, we do not register this tracer.
1952 	 */
1953 	tracing_reset_online_cpus(&tr->array_buffer);
1954 
1955 	tr->current_trace = type;
1956 
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958 	if (type->use_max_tr) {
1959 		/* If we expanded the buffers, make sure the max is expanded too */
1960 		if (ring_buffer_expanded)
1961 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962 					   RING_BUFFER_ALL_CPUS);
1963 		tr->allocated_snapshot = true;
1964 	}
1965 #endif
1966 
1967 	/* the test is responsible for initializing and enabling */
1968 	pr_info("Testing tracer %s: ", type->name);
1969 	ret = type->selftest(type, tr);
1970 	/* the test is responsible for resetting too */
1971 	tr->current_trace = saved_tracer;
1972 	if (ret) {
1973 		printk(KERN_CONT "FAILED!\n");
1974 		/* Add the warning after printing 'FAILED' */
1975 		WARN_ON(1);
1976 		return -1;
1977 	}
1978 	/* Only reset on passing, to avoid touching corrupted buffers */
1979 	tracing_reset_online_cpus(&tr->array_buffer);
1980 
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982 	if (type->use_max_tr) {
1983 		tr->allocated_snapshot = false;
1984 
1985 		/* Shrink the max buffer again */
1986 		if (ring_buffer_expanded)
1987 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1988 					   RING_BUFFER_ALL_CPUS);
1989 	}
1990 #endif
1991 
1992 	printk(KERN_CONT "PASSED\n");
1993 	return 0;
1994 }
1995 
1996 static __init int init_trace_selftests(void)
1997 {
1998 	struct trace_selftests *p, *n;
1999 	struct tracer *t, **last;
2000 	int ret;
2001 
2002 	selftests_can_run = true;
2003 
2004 	mutex_lock(&trace_types_lock);
2005 
2006 	if (list_empty(&postponed_selftests))
2007 		goto out;
2008 
2009 	pr_info("Running postponed tracer tests:\n");
2010 
2011 	tracing_selftest_running = true;
2012 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013 		/* This loop can take minutes when sanitizers are enabled, so
2014 		 * lets make sure we allow RCU processing.
2015 		 */
2016 		cond_resched();
2017 		ret = run_tracer_selftest(p->type);
2018 		/* If the test fails, then warn and remove from available_tracers */
2019 		if (ret < 0) {
2020 			WARN(1, "tracer: %s failed selftest, disabling\n",
2021 			     p->type->name);
2022 			last = &trace_types;
2023 			for (t = trace_types; t; t = t->next) {
2024 				if (t == p->type) {
2025 					*last = t->next;
2026 					break;
2027 				}
2028 				last = &t->next;
2029 			}
2030 		}
2031 		list_del(&p->list);
2032 		kfree(p);
2033 	}
2034 	tracing_selftest_running = false;
2035 
2036  out:
2037 	mutex_unlock(&trace_types_lock);
2038 
2039 	return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045 	return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048 
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050 
2051 static void __init apply_trace_boot_options(void);
2052 
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061 	struct tracer *t;
2062 	int ret = 0;
2063 
2064 	if (!type->name) {
2065 		pr_info("Tracer must have a name\n");
2066 		return -1;
2067 	}
2068 
2069 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071 		return -1;
2072 	}
2073 
2074 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075 		pr_warn("Can not register tracer %s due to lockdown\n",
2076 			   type->name);
2077 		return -EPERM;
2078 	}
2079 
2080 	mutex_lock(&trace_types_lock);
2081 
2082 	tracing_selftest_running = true;
2083 
2084 	for (t = trace_types; t; t = t->next) {
2085 		if (strcmp(type->name, t->name) == 0) {
2086 			/* already found */
2087 			pr_info("Tracer %s already registered\n",
2088 				type->name);
2089 			ret = -1;
2090 			goto out;
2091 		}
2092 	}
2093 
2094 	if (!type->set_flag)
2095 		type->set_flag = &dummy_set_flag;
2096 	if (!type->flags) {
2097 		/*allocate a dummy tracer_flags*/
2098 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099 		if (!type->flags) {
2100 			ret = -ENOMEM;
2101 			goto out;
2102 		}
2103 		type->flags->val = 0;
2104 		type->flags->opts = dummy_tracer_opt;
2105 	} else
2106 		if (!type->flags->opts)
2107 			type->flags->opts = dummy_tracer_opt;
2108 
2109 	/* store the tracer for __set_tracer_option */
2110 	type->flags->trace = type;
2111 
2112 	ret = run_tracer_selftest(type);
2113 	if (ret < 0)
2114 		goto out;
2115 
2116 	type->next = trace_types;
2117 	trace_types = type;
2118 	add_tracer_options(&global_trace, type);
2119 
2120  out:
2121 	tracing_selftest_running = false;
2122 	mutex_unlock(&trace_types_lock);
2123 
2124 	if (ret || !default_bootup_tracer)
2125 		goto out_unlock;
2126 
2127 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128 		goto out_unlock;
2129 
2130 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131 	/* Do we want this tracer to start on bootup? */
2132 	tracing_set_tracer(&global_trace, type->name);
2133 	default_bootup_tracer = NULL;
2134 
2135 	apply_trace_boot_options();
2136 
2137 	/* disable other selftests, since this will break it. */
2138 	disable_tracing_selftest("running a tracer");
2139 
2140  out_unlock:
2141 	return ret;
2142 }
2143 
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146 	struct trace_buffer *buffer = buf->buffer;
2147 
2148 	if (!buffer)
2149 		return;
2150 
2151 	ring_buffer_record_disable(buffer);
2152 
2153 	/* Make sure all commits have finished */
2154 	synchronize_rcu();
2155 	ring_buffer_reset_cpu(buffer, cpu);
2156 
2157 	ring_buffer_record_enable(buffer);
2158 }
2159 
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162 	struct trace_buffer *buffer = buf->buffer;
2163 
2164 	if (!buffer)
2165 		return;
2166 
2167 	ring_buffer_record_disable(buffer);
2168 
2169 	/* Make sure all commits have finished */
2170 	synchronize_rcu();
2171 
2172 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173 
2174 	ring_buffer_reset_online_cpus(buffer);
2175 
2176 	ring_buffer_record_enable(buffer);
2177 }
2178 
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182 	struct trace_array *tr;
2183 
2184 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185 		if (!tr->clear_trace)
2186 			continue;
2187 		tr->clear_trace = false;
2188 		tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190 		tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192 	}
2193 }
2194 
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200 
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203 
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209 	unsigned *map_cmdline_to_pid;
2210 	unsigned cmdline_num;
2211 	int cmdline_idx;
2212 	char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215 
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220 
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225 
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227 				    struct saved_cmdlines_buffer *s)
2228 {
2229 	s->map_cmdline_to_pid = kmalloc_array(val,
2230 					      sizeof(*s->map_cmdline_to_pid),
2231 					      GFP_KERNEL);
2232 	if (!s->map_cmdline_to_pid)
2233 		return -ENOMEM;
2234 
2235 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236 	if (!s->saved_cmdlines) {
2237 		kfree(s->map_cmdline_to_pid);
2238 		return -ENOMEM;
2239 	}
2240 
2241 	s->cmdline_idx = 0;
2242 	s->cmdline_num = val;
2243 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244 	       sizeof(s->map_pid_to_cmdline));
2245 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246 	       val * sizeof(*s->map_cmdline_to_pid));
2247 
2248 	return 0;
2249 }
2250 
2251 static int trace_create_savedcmd(void)
2252 {
2253 	int ret;
2254 
2255 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256 	if (!savedcmd)
2257 		return -ENOMEM;
2258 
2259 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260 	if (ret < 0) {
2261 		kfree(savedcmd);
2262 		savedcmd = NULL;
2263 		return -ENOMEM;
2264 	}
2265 
2266 	return 0;
2267 }
2268 
2269 int is_tracing_stopped(void)
2270 {
2271 	return global_trace.stop_count;
2272 }
2273 
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282 	struct trace_buffer *buffer;
2283 	unsigned long flags;
2284 
2285 	if (tracing_disabled)
2286 		return;
2287 
2288 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289 	if (--global_trace.stop_count) {
2290 		if (global_trace.stop_count < 0) {
2291 			/* Someone screwed up their debugging */
2292 			WARN_ON_ONCE(1);
2293 			global_trace.stop_count = 0;
2294 		}
2295 		goto out;
2296 	}
2297 
2298 	/* Prevent the buffers from switching */
2299 	arch_spin_lock(&global_trace.max_lock);
2300 
2301 	buffer = global_trace.array_buffer.buffer;
2302 	if (buffer)
2303 		ring_buffer_record_enable(buffer);
2304 
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306 	buffer = global_trace.max_buffer.buffer;
2307 	if (buffer)
2308 		ring_buffer_record_enable(buffer);
2309 #endif
2310 
2311 	arch_spin_unlock(&global_trace.max_lock);
2312 
2313  out:
2314 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316 
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319 	struct trace_buffer *buffer;
2320 	unsigned long flags;
2321 
2322 	if (tracing_disabled)
2323 		return;
2324 
2325 	/* If global, we need to also start the max tracer */
2326 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327 		return tracing_start();
2328 
2329 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2330 
2331 	if (--tr->stop_count) {
2332 		if (tr->stop_count < 0) {
2333 			/* Someone screwed up their debugging */
2334 			WARN_ON_ONCE(1);
2335 			tr->stop_count = 0;
2336 		}
2337 		goto out;
2338 	}
2339 
2340 	buffer = tr->array_buffer.buffer;
2341 	if (buffer)
2342 		ring_buffer_record_enable(buffer);
2343 
2344  out:
2345 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347 
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356 	struct trace_buffer *buffer;
2357 	unsigned long flags;
2358 
2359 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360 	if (global_trace.stop_count++)
2361 		goto out;
2362 
2363 	/* Prevent the buffers from switching */
2364 	arch_spin_lock(&global_trace.max_lock);
2365 
2366 	buffer = global_trace.array_buffer.buffer;
2367 	if (buffer)
2368 		ring_buffer_record_disable(buffer);
2369 
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371 	buffer = global_trace.max_buffer.buffer;
2372 	if (buffer)
2373 		ring_buffer_record_disable(buffer);
2374 #endif
2375 
2376 	arch_spin_unlock(&global_trace.max_lock);
2377 
2378  out:
2379 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381 
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384 	struct trace_buffer *buffer;
2385 	unsigned long flags;
2386 
2387 	/* If global, we need to also stop the max tracer */
2388 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389 		return tracing_stop();
2390 
2391 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2392 	if (tr->stop_count++)
2393 		goto out;
2394 
2395 	buffer = tr->array_buffer.buffer;
2396 	if (buffer)
2397 		ring_buffer_record_disable(buffer);
2398 
2399  out:
2400 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402 
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405 	unsigned tpid, idx;
2406 
2407 	/* treat recording of idle task as a success */
2408 	if (!tsk->pid)
2409 		return 1;
2410 
2411 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412 
2413 	/*
2414 	 * It's not the end of the world if we don't get
2415 	 * the lock, but we also don't want to spin
2416 	 * nor do we want to disable interrupts,
2417 	 * so if we miss here, then better luck next time.
2418 	 */
2419 	if (!arch_spin_trylock(&trace_cmdline_lock))
2420 		return 0;
2421 
2422 	idx = savedcmd->map_pid_to_cmdline[tpid];
2423 	if (idx == NO_CMDLINE_MAP) {
2424 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425 
2426 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2427 		savedcmd->cmdline_idx = idx;
2428 	}
2429 
2430 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431 	set_cmdline(idx, tsk->comm);
2432 
2433 	arch_spin_unlock(&trace_cmdline_lock);
2434 
2435 	return 1;
2436 }
2437 
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440 	unsigned map;
2441 	int tpid;
2442 
2443 	if (!pid) {
2444 		strcpy(comm, "<idle>");
2445 		return;
2446 	}
2447 
2448 	if (WARN_ON_ONCE(pid < 0)) {
2449 		strcpy(comm, "<XXX>");
2450 		return;
2451 	}
2452 
2453 	tpid = pid & (PID_MAX_DEFAULT - 1);
2454 	map = savedcmd->map_pid_to_cmdline[tpid];
2455 	if (map != NO_CMDLINE_MAP) {
2456 		tpid = savedcmd->map_cmdline_to_pid[map];
2457 		if (tpid == pid) {
2458 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459 			return;
2460 		}
2461 	}
2462 	strcpy(comm, "<...>");
2463 }
2464 
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467 	preempt_disable();
2468 	arch_spin_lock(&trace_cmdline_lock);
2469 
2470 	__trace_find_cmdline(pid, comm);
2471 
2472 	arch_spin_unlock(&trace_cmdline_lock);
2473 	preempt_enable();
2474 }
2475 
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478 	/*
2479 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480 	 * if we observe a non-NULL tgid_map then we also observe the correct
2481 	 * tgid_map_max.
2482 	 */
2483 	int *map = smp_load_acquire(&tgid_map);
2484 
2485 	if (unlikely(!map || pid > tgid_map_max))
2486 		return NULL;
2487 
2488 	return &map[pid];
2489 }
2490 
2491 int trace_find_tgid(int pid)
2492 {
2493 	int *ptr = trace_find_tgid_ptr(pid);
2494 
2495 	return ptr ? *ptr : 0;
2496 }
2497 
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500 	int *ptr;
2501 
2502 	/* treat recording of idle task as a success */
2503 	if (!tsk->pid)
2504 		return 1;
2505 
2506 	ptr = trace_find_tgid_ptr(tsk->pid);
2507 	if (!ptr)
2508 		return 0;
2509 
2510 	*ptr = tsk->tgid;
2511 	return 1;
2512 }
2513 
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517 		return true;
2518 	if (!__this_cpu_read(trace_taskinfo_save))
2519 		return true;
2520 	return false;
2521 }
2522 
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532 	bool done;
2533 
2534 	if (tracing_record_taskinfo_skip(flags))
2535 		return;
2536 
2537 	/*
2538 	 * Record as much task information as possible. If some fail, continue
2539 	 * to try to record the others.
2540 	 */
2541 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543 
2544 	/* If recording any information failed, retry again soon. */
2545 	if (!done)
2546 		return;
2547 
2548 	__this_cpu_write(trace_taskinfo_save, false);
2549 }
2550 
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560 					  struct task_struct *next, int flags)
2561 {
2562 	bool done;
2563 
2564 	if (tracing_record_taskinfo_skip(flags))
2565 		return;
2566 
2567 	/*
2568 	 * Record as much task information as possible. If some fail, continue
2569 	 * to try to record the others.
2570 	 */
2571 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575 
2576 	/* If recording any information failed, retry again soon. */
2577 	if (!done)
2578 		return;
2579 
2580 	__this_cpu_write(trace_taskinfo_save, false);
2581 }
2582 
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588 
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593 
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601 	return trace_seq_has_overflowed(s) ?
2602 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605 
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608 	unsigned int trace_flags = irqs_status;
2609 	unsigned int pc;
2610 
2611 	pc = preempt_count();
2612 
2613 	if (pc & NMI_MASK)
2614 		trace_flags |= TRACE_FLAG_NMI;
2615 	if (pc & HARDIRQ_MASK)
2616 		trace_flags |= TRACE_FLAG_HARDIRQ;
2617 	if (in_serving_softirq())
2618 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2619 
2620 	if (tif_need_resched())
2621 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622 	if (test_preempt_need_resched())
2623 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624 	return (trace_flags << 16) | (pc & 0xff);
2625 }
2626 
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629 			  int type,
2630 			  unsigned long len,
2631 			  unsigned int trace_ctx)
2632 {
2633 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635 
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639 
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656 	struct ring_buffer_event *event;
2657 	struct page *page;
2658 	int cpu;
2659 
2660 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661 
2662 	if (trace_buffered_event_ref++)
2663 		return;
2664 
2665 	for_each_tracing_cpu(cpu) {
2666 		page = alloc_pages_node(cpu_to_node(cpu),
2667 					GFP_KERNEL | __GFP_NORETRY, 0);
2668 		if (!page)
2669 			goto failed;
2670 
2671 		event = page_address(page);
2672 		memset(event, 0, sizeof(*event));
2673 
2674 		per_cpu(trace_buffered_event, cpu) = event;
2675 
2676 		preempt_disable();
2677 		if (cpu == smp_processor_id() &&
2678 		    __this_cpu_read(trace_buffered_event) !=
2679 		    per_cpu(trace_buffered_event, cpu))
2680 			WARN_ON_ONCE(1);
2681 		preempt_enable();
2682 	}
2683 
2684 	return;
2685  failed:
2686 	trace_buffered_event_disable();
2687 }
2688 
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691 	/* Probably not needed, but do it anyway */
2692 	smp_rmb();
2693 	this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695 
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698 	this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700 
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711 	int cpu;
2712 
2713 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714 
2715 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716 		return;
2717 
2718 	if (--trace_buffered_event_ref)
2719 		return;
2720 
2721 	preempt_disable();
2722 	/* For each CPU, set the buffer as used. */
2723 	smp_call_function_many(tracing_buffer_mask,
2724 			       disable_trace_buffered_event, NULL, 1);
2725 	preempt_enable();
2726 
2727 	/* Wait for all current users to finish */
2728 	synchronize_rcu();
2729 
2730 	for_each_tracing_cpu(cpu) {
2731 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732 		per_cpu(trace_buffered_event, cpu) = NULL;
2733 	}
2734 	/*
2735 	 * Make sure trace_buffered_event is NULL before clearing
2736 	 * trace_buffered_event_cnt.
2737 	 */
2738 	smp_wmb();
2739 
2740 	preempt_disable();
2741 	/* Do the work on each cpu */
2742 	smp_call_function_many(tracing_buffer_mask,
2743 			       enable_trace_buffered_event, NULL, 1);
2744 	preempt_enable();
2745 }
2746 
2747 static struct trace_buffer *temp_buffer;
2748 
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751 			  struct trace_event_file *trace_file,
2752 			  int type, unsigned long len,
2753 			  unsigned int trace_ctx)
2754 {
2755 	struct ring_buffer_event *entry;
2756 	struct trace_array *tr = trace_file->tr;
2757 	int val;
2758 
2759 	*current_rb = tr->array_buffer.buffer;
2760 
2761 	if (!tr->no_filter_buffering_ref &&
2762 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763 	    (entry = this_cpu_read(trace_buffered_event))) {
2764 		/*
2765 		 * Filtering is on, so try to use the per cpu buffer first.
2766 		 * This buffer will simulate a ring_buffer_event,
2767 		 * where the type_len is zero and the array[0] will
2768 		 * hold the full length.
2769 		 * (see include/linux/ring-buffer.h for details on
2770 		 *  how the ring_buffer_event is structured).
2771 		 *
2772 		 * Using a temp buffer during filtering and copying it
2773 		 * on a matched filter is quicker than writing directly
2774 		 * into the ring buffer and then discarding it when
2775 		 * it doesn't match. That is because the discard
2776 		 * requires several atomic operations to get right.
2777 		 * Copying on match and doing nothing on a failed match
2778 		 * is still quicker than no copy on match, but having
2779 		 * to discard out of the ring buffer on a failed match.
2780 		 */
2781 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782 
2783 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2784 
2785 		/*
2786 		 * Preemption is disabled, but interrupts and NMIs
2787 		 * can still come in now. If that happens after
2788 		 * the above increment, then it will have to go
2789 		 * back to the old method of allocating the event
2790 		 * on the ring buffer, and if the filter fails, it
2791 		 * will have to call ring_buffer_discard_commit()
2792 		 * to remove it.
2793 		 *
2794 		 * Need to also check the unlikely case that the
2795 		 * length is bigger than the temp buffer size.
2796 		 * If that happens, then the reserve is pretty much
2797 		 * guaranteed to fail, as the ring buffer currently
2798 		 * only allows events less than a page. But that may
2799 		 * change in the future, so let the ring buffer reserve
2800 		 * handle the failure in that case.
2801 		 */
2802 		if (val == 1 && likely(len <= max_len)) {
2803 			trace_event_setup(entry, type, trace_ctx);
2804 			entry->array[0] = len;
2805 			return entry;
2806 		}
2807 		this_cpu_dec(trace_buffered_event_cnt);
2808 	}
2809 
2810 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811 					    trace_ctx);
2812 	/*
2813 	 * If tracing is off, but we have triggers enabled
2814 	 * we still need to look at the event data. Use the temp_buffer
2815 	 * to store the trace event for the trigger to use. It's recursive
2816 	 * safe and will not be recorded anywhere.
2817 	 */
2818 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819 		*current_rb = temp_buffer;
2820 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821 						    trace_ctx);
2822 	}
2823 	return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826 
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829 
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832 	struct trace_event_call *event_call;
2833 	struct trace_event_file *file;
2834 	struct trace_event *event;
2835 	unsigned long flags;
2836 	struct trace_iterator *iter = tracepoint_print_iter;
2837 
2838 	/* We should never get here if iter is NULL */
2839 	if (WARN_ON_ONCE(!iter))
2840 		return;
2841 
2842 	event_call = fbuffer->trace_file->event_call;
2843 	if (!event_call || !event_call->event.funcs ||
2844 	    !event_call->event.funcs->trace)
2845 		return;
2846 
2847 	file = fbuffer->trace_file;
2848 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850 	     !filter_match_preds(file->filter, fbuffer->entry)))
2851 		return;
2852 
2853 	event = &fbuffer->trace_file->event_call->event;
2854 
2855 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856 	trace_seq_init(&iter->seq);
2857 	iter->ent = fbuffer->entry;
2858 	event_call->event.funcs->trace(iter, 0, event);
2859 	trace_seq_putc(&iter->seq, 0);
2860 	printk("%s", iter->seq.buffer);
2861 
2862 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864 
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866 			     void *buffer, size_t *lenp,
2867 			     loff_t *ppos)
2868 {
2869 	int save_tracepoint_printk;
2870 	int ret;
2871 
2872 	mutex_lock(&tracepoint_printk_mutex);
2873 	save_tracepoint_printk = tracepoint_printk;
2874 
2875 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876 
2877 	/*
2878 	 * This will force exiting early, as tracepoint_printk
2879 	 * is always zero when tracepoint_printk_iter is not allocated
2880 	 */
2881 	if (!tracepoint_print_iter)
2882 		tracepoint_printk = 0;
2883 
2884 	if (save_tracepoint_printk == tracepoint_printk)
2885 		goto out;
2886 
2887 	if (tracepoint_printk)
2888 		static_key_enable(&tracepoint_printk_key.key);
2889 	else
2890 		static_key_disable(&tracepoint_printk_key.key);
2891 
2892  out:
2893 	mutex_unlock(&tracepoint_printk_mutex);
2894 
2895 	return ret;
2896 }
2897 
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900 	enum event_trigger_type tt = ETT_NONE;
2901 	struct trace_event_file *file = fbuffer->trace_file;
2902 
2903 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2904 			fbuffer->entry, &tt))
2905 		goto discard;
2906 
2907 	if (static_key_false(&tracepoint_printk_key.key))
2908 		output_printk(fbuffer);
2909 
2910 	if (static_branch_unlikely(&trace_event_exports_enabled))
2911 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2912 
2913 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2914 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2915 
2916 discard:
2917 	if (tt)
2918 		event_triggers_post_call(file, tt);
2919 
2920 }
2921 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2922 
2923 /*
2924  * Skip 3:
2925  *
2926  *   trace_buffer_unlock_commit_regs()
2927  *   trace_event_buffer_commit()
2928  *   trace_event_raw_event_xxx()
2929  */
2930 # define STACK_SKIP 3
2931 
2932 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2933 				     struct trace_buffer *buffer,
2934 				     struct ring_buffer_event *event,
2935 				     unsigned int trace_ctx,
2936 				     struct pt_regs *regs)
2937 {
2938 	__buffer_unlock_commit(buffer, event);
2939 
2940 	/*
2941 	 * If regs is not set, then skip the necessary functions.
2942 	 * Note, we can still get here via blktrace, wakeup tracer
2943 	 * and mmiotrace, but that's ok if they lose a function or
2944 	 * two. They are not that meaningful.
2945 	 */
2946 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2947 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2948 }
2949 
2950 /*
2951  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2952  */
2953 void
2954 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2955 				   struct ring_buffer_event *event)
2956 {
2957 	__buffer_unlock_commit(buffer, event);
2958 }
2959 
2960 void
2961 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2962 	       parent_ip, unsigned int trace_ctx)
2963 {
2964 	struct trace_event_call *call = &event_function;
2965 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2966 	struct ring_buffer_event *event;
2967 	struct ftrace_entry *entry;
2968 
2969 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2970 					    trace_ctx);
2971 	if (!event)
2972 		return;
2973 	entry	= ring_buffer_event_data(event);
2974 	entry->ip			= ip;
2975 	entry->parent_ip		= parent_ip;
2976 
2977 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2978 		if (static_branch_unlikely(&trace_function_exports_enabled))
2979 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2980 		__buffer_unlock_commit(buffer, event);
2981 	}
2982 }
2983 
2984 #ifdef CONFIG_STACKTRACE
2985 
2986 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2987 #define FTRACE_KSTACK_NESTING	4
2988 
2989 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2990 
2991 struct ftrace_stack {
2992 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2993 };
2994 
2995 
2996 struct ftrace_stacks {
2997 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2998 };
2999 
3000 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3001 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3002 
3003 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3004 				 unsigned int trace_ctx,
3005 				 int skip, struct pt_regs *regs)
3006 {
3007 	struct trace_event_call *call = &event_kernel_stack;
3008 	struct ring_buffer_event *event;
3009 	unsigned int size, nr_entries;
3010 	struct ftrace_stack *fstack;
3011 	struct stack_entry *entry;
3012 	int stackidx;
3013 
3014 	/*
3015 	 * Add one, for this function and the call to save_stack_trace()
3016 	 * If regs is set, then these functions will not be in the way.
3017 	 */
3018 #ifndef CONFIG_UNWINDER_ORC
3019 	if (!regs)
3020 		skip++;
3021 #endif
3022 
3023 	preempt_disable_notrace();
3024 
3025 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3026 
3027 	/* This should never happen. If it does, yell once and skip */
3028 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3029 		goto out;
3030 
3031 	/*
3032 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3033 	 * interrupt will either see the value pre increment or post
3034 	 * increment. If the interrupt happens pre increment it will have
3035 	 * restored the counter when it returns.  We just need a barrier to
3036 	 * keep gcc from moving things around.
3037 	 */
3038 	barrier();
3039 
3040 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3041 	size = ARRAY_SIZE(fstack->calls);
3042 
3043 	if (regs) {
3044 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3045 						   size, skip);
3046 	} else {
3047 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3048 	}
3049 
3050 	size = nr_entries * sizeof(unsigned long);
3051 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3052 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3053 				    trace_ctx);
3054 	if (!event)
3055 		goto out;
3056 	entry = ring_buffer_event_data(event);
3057 
3058 	memcpy(&entry->caller, fstack->calls, size);
3059 	entry->size = nr_entries;
3060 
3061 	if (!call_filter_check_discard(call, entry, buffer, event))
3062 		__buffer_unlock_commit(buffer, event);
3063 
3064  out:
3065 	/* Again, don't let gcc optimize things here */
3066 	barrier();
3067 	__this_cpu_dec(ftrace_stack_reserve);
3068 	preempt_enable_notrace();
3069 
3070 }
3071 
3072 static inline void ftrace_trace_stack(struct trace_array *tr,
3073 				      struct trace_buffer *buffer,
3074 				      unsigned int trace_ctx,
3075 				      int skip, struct pt_regs *regs)
3076 {
3077 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3078 		return;
3079 
3080 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3081 }
3082 
3083 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3084 		   int skip)
3085 {
3086 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3087 
3088 	if (rcu_is_watching()) {
3089 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090 		return;
3091 	}
3092 
3093 	/*
3094 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3095 	 * but if the above rcu_is_watching() failed, then the NMI
3096 	 * triggered someplace critical, and rcu_irq_enter() should
3097 	 * not be called from NMI.
3098 	 */
3099 	if (unlikely(in_nmi()))
3100 		return;
3101 
3102 	rcu_irq_enter_irqson();
3103 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104 	rcu_irq_exit_irqson();
3105 }
3106 
3107 /**
3108  * trace_dump_stack - record a stack back trace in the trace buffer
3109  * @skip: Number of functions to skip (helper handlers)
3110  */
3111 void trace_dump_stack(int skip)
3112 {
3113 	if (tracing_disabled || tracing_selftest_running)
3114 		return;
3115 
3116 #ifndef CONFIG_UNWINDER_ORC
3117 	/* Skip 1 to skip this function. */
3118 	skip++;
3119 #endif
3120 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3121 			     tracing_gen_ctx(), skip, NULL);
3122 }
3123 EXPORT_SYMBOL_GPL(trace_dump_stack);
3124 
3125 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3126 static DEFINE_PER_CPU(int, user_stack_count);
3127 
3128 static void
3129 ftrace_trace_userstack(struct trace_array *tr,
3130 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3131 {
3132 	struct trace_event_call *call = &event_user_stack;
3133 	struct ring_buffer_event *event;
3134 	struct userstack_entry *entry;
3135 
3136 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3137 		return;
3138 
3139 	/*
3140 	 * NMIs can not handle page faults, even with fix ups.
3141 	 * The save user stack can (and often does) fault.
3142 	 */
3143 	if (unlikely(in_nmi()))
3144 		return;
3145 
3146 	/*
3147 	 * prevent recursion, since the user stack tracing may
3148 	 * trigger other kernel events.
3149 	 */
3150 	preempt_disable();
3151 	if (__this_cpu_read(user_stack_count))
3152 		goto out;
3153 
3154 	__this_cpu_inc(user_stack_count);
3155 
3156 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3157 					    sizeof(*entry), trace_ctx);
3158 	if (!event)
3159 		goto out_drop_count;
3160 	entry	= ring_buffer_event_data(event);
3161 
3162 	entry->tgid		= current->tgid;
3163 	memset(&entry->caller, 0, sizeof(entry->caller));
3164 
3165 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3166 	if (!call_filter_check_discard(call, entry, buffer, event))
3167 		__buffer_unlock_commit(buffer, event);
3168 
3169  out_drop_count:
3170 	__this_cpu_dec(user_stack_count);
3171  out:
3172 	preempt_enable();
3173 }
3174 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3175 static void ftrace_trace_userstack(struct trace_array *tr,
3176 				   struct trace_buffer *buffer,
3177 				   unsigned int trace_ctx)
3178 {
3179 }
3180 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3181 
3182 #endif /* CONFIG_STACKTRACE */
3183 
3184 static inline void
3185 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3186 			  unsigned long long delta)
3187 {
3188 	entry->bottom_delta_ts = delta & U32_MAX;
3189 	entry->top_delta_ts = (delta >> 32);
3190 }
3191 
3192 void trace_last_func_repeats(struct trace_array *tr,
3193 			     struct trace_func_repeats *last_info,
3194 			     unsigned int trace_ctx)
3195 {
3196 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3197 	struct func_repeats_entry *entry;
3198 	struct ring_buffer_event *event;
3199 	u64 delta;
3200 
3201 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3202 					    sizeof(*entry), trace_ctx);
3203 	if (!event)
3204 		return;
3205 
3206 	delta = ring_buffer_event_time_stamp(buffer, event) -
3207 		last_info->ts_last_call;
3208 
3209 	entry = ring_buffer_event_data(event);
3210 	entry->ip = last_info->ip;
3211 	entry->parent_ip = last_info->parent_ip;
3212 	entry->count = last_info->count;
3213 	func_repeats_set_delta_ts(entry, delta);
3214 
3215 	__buffer_unlock_commit(buffer, event);
3216 }
3217 
3218 /* created for use with alloc_percpu */
3219 struct trace_buffer_struct {
3220 	int nesting;
3221 	char buffer[4][TRACE_BUF_SIZE];
3222 };
3223 
3224 static struct trace_buffer_struct *trace_percpu_buffer;
3225 
3226 /*
3227  * This allows for lockless recording.  If we're nested too deeply, then
3228  * this returns NULL.
3229  */
3230 static char *get_trace_buf(void)
3231 {
3232 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3233 
3234 	if (!buffer || buffer->nesting >= 4)
3235 		return NULL;
3236 
3237 	buffer->nesting++;
3238 
3239 	/* Interrupts must see nesting incremented before we use the buffer */
3240 	barrier();
3241 	return &buffer->buffer[buffer->nesting - 1][0];
3242 }
3243 
3244 static void put_trace_buf(void)
3245 {
3246 	/* Don't let the decrement of nesting leak before this */
3247 	barrier();
3248 	this_cpu_dec(trace_percpu_buffer->nesting);
3249 }
3250 
3251 static int alloc_percpu_trace_buffer(void)
3252 {
3253 	struct trace_buffer_struct *buffers;
3254 
3255 	if (trace_percpu_buffer)
3256 		return 0;
3257 
3258 	buffers = alloc_percpu(struct trace_buffer_struct);
3259 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3260 		return -ENOMEM;
3261 
3262 	trace_percpu_buffer = buffers;
3263 	return 0;
3264 }
3265 
3266 static int buffers_allocated;
3267 
3268 void trace_printk_init_buffers(void)
3269 {
3270 	if (buffers_allocated)
3271 		return;
3272 
3273 	if (alloc_percpu_trace_buffer())
3274 		return;
3275 
3276 	/* trace_printk() is for debug use only. Don't use it in production. */
3277 
3278 	pr_warn("\n");
3279 	pr_warn("**********************************************************\n");
3280 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3281 	pr_warn("**                                                      **\n");
3282 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3283 	pr_warn("**                                                      **\n");
3284 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3285 	pr_warn("** unsafe for production use.                           **\n");
3286 	pr_warn("**                                                      **\n");
3287 	pr_warn("** If you see this message and you are not debugging    **\n");
3288 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3289 	pr_warn("**                                                      **\n");
3290 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3291 	pr_warn("**********************************************************\n");
3292 
3293 	/* Expand the buffers to set size */
3294 	tracing_update_buffers();
3295 
3296 	buffers_allocated = 1;
3297 
3298 	/*
3299 	 * trace_printk_init_buffers() can be called by modules.
3300 	 * If that happens, then we need to start cmdline recording
3301 	 * directly here. If the global_trace.buffer is already
3302 	 * allocated here, then this was called by module code.
3303 	 */
3304 	if (global_trace.array_buffer.buffer)
3305 		tracing_start_cmdline_record();
3306 }
3307 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3308 
3309 void trace_printk_start_comm(void)
3310 {
3311 	/* Start tracing comms if trace printk is set */
3312 	if (!buffers_allocated)
3313 		return;
3314 	tracing_start_cmdline_record();
3315 }
3316 
3317 static void trace_printk_start_stop_comm(int enabled)
3318 {
3319 	if (!buffers_allocated)
3320 		return;
3321 
3322 	if (enabled)
3323 		tracing_start_cmdline_record();
3324 	else
3325 		tracing_stop_cmdline_record();
3326 }
3327 
3328 /**
3329  * trace_vbprintk - write binary msg to tracing buffer
3330  * @ip:    The address of the caller
3331  * @fmt:   The string format to write to the buffer
3332  * @args:  Arguments for @fmt
3333  */
3334 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3335 {
3336 	struct trace_event_call *call = &event_bprint;
3337 	struct ring_buffer_event *event;
3338 	struct trace_buffer *buffer;
3339 	struct trace_array *tr = &global_trace;
3340 	struct bprint_entry *entry;
3341 	unsigned int trace_ctx;
3342 	char *tbuffer;
3343 	int len = 0, size;
3344 
3345 	if (unlikely(tracing_selftest_running || tracing_disabled))
3346 		return 0;
3347 
3348 	/* Don't pollute graph traces with trace_vprintk internals */
3349 	pause_graph_tracing();
3350 
3351 	trace_ctx = tracing_gen_ctx();
3352 	preempt_disable_notrace();
3353 
3354 	tbuffer = get_trace_buf();
3355 	if (!tbuffer) {
3356 		len = 0;
3357 		goto out_nobuffer;
3358 	}
3359 
3360 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3361 
3362 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3363 		goto out_put;
3364 
3365 	size = sizeof(*entry) + sizeof(u32) * len;
3366 	buffer = tr->array_buffer.buffer;
3367 	ring_buffer_nest_start(buffer);
3368 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3369 					    trace_ctx);
3370 	if (!event)
3371 		goto out;
3372 	entry = ring_buffer_event_data(event);
3373 	entry->ip			= ip;
3374 	entry->fmt			= fmt;
3375 
3376 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3377 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3378 		__buffer_unlock_commit(buffer, event);
3379 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3380 	}
3381 
3382 out:
3383 	ring_buffer_nest_end(buffer);
3384 out_put:
3385 	put_trace_buf();
3386 
3387 out_nobuffer:
3388 	preempt_enable_notrace();
3389 	unpause_graph_tracing();
3390 
3391 	return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394 
3395 __printf(3, 0)
3396 static int
3397 __trace_array_vprintk(struct trace_buffer *buffer,
3398 		      unsigned long ip, const char *fmt, va_list args)
3399 {
3400 	struct trace_event_call *call = &event_print;
3401 	struct ring_buffer_event *event;
3402 	int len = 0, size;
3403 	struct print_entry *entry;
3404 	unsigned int trace_ctx;
3405 	char *tbuffer;
3406 
3407 	if (tracing_disabled || tracing_selftest_running)
3408 		return 0;
3409 
3410 	/* Don't pollute graph traces with trace_vprintk internals */
3411 	pause_graph_tracing();
3412 
3413 	trace_ctx = tracing_gen_ctx();
3414 	preempt_disable_notrace();
3415 
3416 
3417 	tbuffer = get_trace_buf();
3418 	if (!tbuffer) {
3419 		len = 0;
3420 		goto out_nobuffer;
3421 	}
3422 
3423 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3424 
3425 	size = sizeof(*entry) + len + 1;
3426 	ring_buffer_nest_start(buffer);
3427 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3428 					    trace_ctx);
3429 	if (!event)
3430 		goto out;
3431 	entry = ring_buffer_event_data(event);
3432 	entry->ip = ip;
3433 
3434 	memcpy(&entry->buf, tbuffer, len + 1);
3435 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3436 		__buffer_unlock_commit(buffer, event);
3437 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3438 	}
3439 
3440 out:
3441 	ring_buffer_nest_end(buffer);
3442 	put_trace_buf();
3443 
3444 out_nobuffer:
3445 	preempt_enable_notrace();
3446 	unpause_graph_tracing();
3447 
3448 	return len;
3449 }
3450 
3451 __printf(3, 0)
3452 int trace_array_vprintk(struct trace_array *tr,
3453 			unsigned long ip, const char *fmt, va_list args)
3454 {
3455 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3456 }
3457 
3458 /**
3459  * trace_array_printk - Print a message to a specific instance
3460  * @tr: The instance trace_array descriptor
3461  * @ip: The instruction pointer that this is called from.
3462  * @fmt: The format to print (printf format)
3463  *
3464  * If a subsystem sets up its own instance, they have the right to
3465  * printk strings into their tracing instance buffer using this
3466  * function. Note, this function will not write into the top level
3467  * buffer (use trace_printk() for that), as writing into the top level
3468  * buffer should only have events that can be individually disabled.
3469  * trace_printk() is only used for debugging a kernel, and should not
3470  * be ever incorporated in normal use.
3471  *
3472  * trace_array_printk() can be used, as it will not add noise to the
3473  * top level tracing buffer.
3474  *
3475  * Note, trace_array_init_printk() must be called on @tr before this
3476  * can be used.
3477  */
3478 __printf(3, 0)
3479 int trace_array_printk(struct trace_array *tr,
3480 		       unsigned long ip, const char *fmt, ...)
3481 {
3482 	int ret;
3483 	va_list ap;
3484 
3485 	if (!tr)
3486 		return -ENOENT;
3487 
3488 	/* This is only allowed for created instances */
3489 	if (tr == &global_trace)
3490 		return 0;
3491 
3492 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3493 		return 0;
3494 
3495 	va_start(ap, fmt);
3496 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3497 	va_end(ap);
3498 	return ret;
3499 }
3500 EXPORT_SYMBOL_GPL(trace_array_printk);
3501 
3502 /**
3503  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3504  * @tr: The trace array to initialize the buffers for
3505  *
3506  * As trace_array_printk() only writes into instances, they are OK to
3507  * have in the kernel (unlike trace_printk()). This needs to be called
3508  * before trace_array_printk() can be used on a trace_array.
3509  */
3510 int trace_array_init_printk(struct trace_array *tr)
3511 {
3512 	if (!tr)
3513 		return -ENOENT;
3514 
3515 	/* This is only allowed for created instances */
3516 	if (tr == &global_trace)
3517 		return -EINVAL;
3518 
3519 	return alloc_percpu_trace_buffer();
3520 }
3521 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3522 
3523 __printf(3, 4)
3524 int trace_array_printk_buf(struct trace_buffer *buffer,
3525 			   unsigned long ip, const char *fmt, ...)
3526 {
3527 	int ret;
3528 	va_list ap;
3529 
3530 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3531 		return 0;
3532 
3533 	va_start(ap, fmt);
3534 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3535 	va_end(ap);
3536 	return ret;
3537 }
3538 
3539 __printf(2, 0)
3540 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3541 {
3542 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3543 }
3544 EXPORT_SYMBOL_GPL(trace_vprintk);
3545 
3546 static void trace_iterator_increment(struct trace_iterator *iter)
3547 {
3548 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3549 
3550 	iter->idx++;
3551 	if (buf_iter)
3552 		ring_buffer_iter_advance(buf_iter);
3553 }
3554 
3555 static struct trace_entry *
3556 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3557 		unsigned long *lost_events)
3558 {
3559 	struct ring_buffer_event *event;
3560 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3561 
3562 	if (buf_iter) {
3563 		event = ring_buffer_iter_peek(buf_iter, ts);
3564 		if (lost_events)
3565 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3566 				(unsigned long)-1 : 0;
3567 	} else {
3568 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3569 					 lost_events);
3570 	}
3571 
3572 	if (event) {
3573 		iter->ent_size = ring_buffer_event_length(event);
3574 		return ring_buffer_event_data(event);
3575 	}
3576 	iter->ent_size = 0;
3577 	return NULL;
3578 }
3579 
3580 static struct trace_entry *
3581 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3582 		  unsigned long *missing_events, u64 *ent_ts)
3583 {
3584 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3585 	struct trace_entry *ent, *next = NULL;
3586 	unsigned long lost_events = 0, next_lost = 0;
3587 	int cpu_file = iter->cpu_file;
3588 	u64 next_ts = 0, ts;
3589 	int next_cpu = -1;
3590 	int next_size = 0;
3591 	int cpu;
3592 
3593 	/*
3594 	 * If we are in a per_cpu trace file, don't bother by iterating over
3595 	 * all cpu and peek directly.
3596 	 */
3597 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3598 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3599 			return NULL;
3600 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3601 		if (ent_cpu)
3602 			*ent_cpu = cpu_file;
3603 
3604 		return ent;
3605 	}
3606 
3607 	for_each_tracing_cpu(cpu) {
3608 
3609 		if (ring_buffer_empty_cpu(buffer, cpu))
3610 			continue;
3611 
3612 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3613 
3614 		/*
3615 		 * Pick the entry with the smallest timestamp:
3616 		 */
3617 		if (ent && (!next || ts < next_ts)) {
3618 			next = ent;
3619 			next_cpu = cpu;
3620 			next_ts = ts;
3621 			next_lost = lost_events;
3622 			next_size = iter->ent_size;
3623 		}
3624 	}
3625 
3626 	iter->ent_size = next_size;
3627 
3628 	if (ent_cpu)
3629 		*ent_cpu = next_cpu;
3630 
3631 	if (ent_ts)
3632 		*ent_ts = next_ts;
3633 
3634 	if (missing_events)
3635 		*missing_events = next_lost;
3636 
3637 	return next;
3638 }
3639 
3640 #define STATIC_FMT_BUF_SIZE	128
3641 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3642 
3643 static char *trace_iter_expand_format(struct trace_iterator *iter)
3644 {
3645 	char *tmp;
3646 
3647 	/*
3648 	 * iter->tr is NULL when used with tp_printk, which makes
3649 	 * this get called where it is not safe to call krealloc().
3650 	 */
3651 	if (!iter->tr || iter->fmt == static_fmt_buf)
3652 		return NULL;
3653 
3654 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3655 		       GFP_KERNEL);
3656 	if (tmp) {
3657 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3658 		iter->fmt = tmp;
3659 	}
3660 
3661 	return tmp;
3662 }
3663 
3664 /* Returns true if the string is safe to dereference from an event */
3665 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3666 {
3667 	unsigned long addr = (unsigned long)str;
3668 	struct trace_event *trace_event;
3669 	struct trace_event_call *event;
3670 
3671 	/* OK if part of the event data */
3672 	if ((addr >= (unsigned long)iter->ent) &&
3673 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3674 		return true;
3675 
3676 	/* OK if part of the temp seq buffer */
3677 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3678 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3679 		return true;
3680 
3681 	/* Core rodata can not be freed */
3682 	if (is_kernel_rodata(addr))
3683 		return true;
3684 
3685 	if (trace_is_tracepoint_string(str))
3686 		return true;
3687 
3688 	/*
3689 	 * Now this could be a module event, referencing core module
3690 	 * data, which is OK.
3691 	 */
3692 	if (!iter->ent)
3693 		return false;
3694 
3695 	trace_event = ftrace_find_event(iter->ent->type);
3696 	if (!trace_event)
3697 		return false;
3698 
3699 	event = container_of(trace_event, struct trace_event_call, event);
3700 	if (!event->mod)
3701 		return false;
3702 
3703 	/* Would rather have rodata, but this will suffice */
3704 	if (within_module_core(addr, event->mod))
3705 		return true;
3706 
3707 	return false;
3708 }
3709 
3710 static const char *show_buffer(struct trace_seq *s)
3711 {
3712 	struct seq_buf *seq = &s->seq;
3713 
3714 	seq_buf_terminate(seq);
3715 
3716 	return seq->buffer;
3717 }
3718 
3719 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3720 
3721 static int test_can_verify_check(const char *fmt, ...)
3722 {
3723 	char buf[16];
3724 	va_list ap;
3725 	int ret;
3726 
3727 	/*
3728 	 * The verifier is dependent on vsnprintf() modifies the va_list
3729 	 * passed to it, where it is sent as a reference. Some architectures
3730 	 * (like x86_32) passes it by value, which means that vsnprintf()
3731 	 * does not modify the va_list passed to it, and the verifier
3732 	 * would then need to be able to understand all the values that
3733 	 * vsnprintf can use. If it is passed by value, then the verifier
3734 	 * is disabled.
3735 	 */
3736 	va_start(ap, fmt);
3737 	vsnprintf(buf, 16, "%d", ap);
3738 	ret = va_arg(ap, int);
3739 	va_end(ap);
3740 
3741 	return ret;
3742 }
3743 
3744 static void test_can_verify(void)
3745 {
3746 	if (!test_can_verify_check("%d %d", 0, 1)) {
3747 		pr_info("trace event string verifier disabled\n");
3748 		static_branch_inc(&trace_no_verify);
3749 	}
3750 }
3751 
3752 /**
3753  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3754  * @iter: The iterator that holds the seq buffer and the event being printed
3755  * @fmt: The format used to print the event
3756  * @ap: The va_list holding the data to print from @fmt.
3757  *
3758  * This writes the data into the @iter->seq buffer using the data from
3759  * @fmt and @ap. If the format has a %s, then the source of the string
3760  * is examined to make sure it is safe to print, otherwise it will
3761  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3762  * pointer.
3763  */
3764 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3765 			 va_list ap)
3766 {
3767 	const char *p = fmt;
3768 	const char *str;
3769 	int i, j;
3770 
3771 	if (WARN_ON_ONCE(!fmt))
3772 		return;
3773 
3774 	if (static_branch_unlikely(&trace_no_verify))
3775 		goto print;
3776 
3777 	/* Don't bother checking when doing a ftrace_dump() */
3778 	if (iter->fmt == static_fmt_buf)
3779 		goto print;
3780 
3781 	while (*p) {
3782 		bool star = false;
3783 		int len = 0;
3784 
3785 		j = 0;
3786 
3787 		/* We only care about %s and variants */
3788 		for (i = 0; p[i]; i++) {
3789 			if (i + 1 >= iter->fmt_size) {
3790 				/*
3791 				 * If we can't expand the copy buffer,
3792 				 * just print it.
3793 				 */
3794 				if (!trace_iter_expand_format(iter))
3795 					goto print;
3796 			}
3797 
3798 			if (p[i] == '\\' && p[i+1]) {
3799 				i++;
3800 				continue;
3801 			}
3802 			if (p[i] == '%') {
3803 				/* Need to test cases like %08.*s */
3804 				for (j = 1; p[i+j]; j++) {
3805 					if (isdigit(p[i+j]) ||
3806 					    p[i+j] == '.')
3807 						continue;
3808 					if (p[i+j] == '*') {
3809 						star = true;
3810 						continue;
3811 					}
3812 					break;
3813 				}
3814 				if (p[i+j] == 's')
3815 					break;
3816 				star = false;
3817 			}
3818 			j = 0;
3819 		}
3820 		/* If no %s found then just print normally */
3821 		if (!p[i])
3822 			break;
3823 
3824 		/* Copy up to the %s, and print that */
3825 		strncpy(iter->fmt, p, i);
3826 		iter->fmt[i] = '\0';
3827 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3828 
3829 		if (star)
3830 			len = va_arg(ap, int);
3831 
3832 		/* The ap now points to the string data of the %s */
3833 		str = va_arg(ap, const char *);
3834 
3835 		/*
3836 		 * If you hit this warning, it is likely that the
3837 		 * trace event in question used %s on a string that
3838 		 * was saved at the time of the event, but may not be
3839 		 * around when the trace is read. Use __string(),
3840 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3841 		 * instead. See samples/trace_events/trace-events-sample.h
3842 		 * for reference.
3843 		 */
3844 		if (WARN_ONCE(!trace_safe_str(iter, str),
3845 			      "fmt: '%s' current_buffer: '%s'",
3846 			      fmt, show_buffer(&iter->seq))) {
3847 			int ret;
3848 
3849 			/* Try to safely read the string */
3850 			if (star) {
3851 				if (len + 1 > iter->fmt_size)
3852 					len = iter->fmt_size - 1;
3853 				if (len < 0)
3854 					len = 0;
3855 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3856 				iter->fmt[len] = 0;
3857 				star = false;
3858 			} else {
3859 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3860 								  iter->fmt_size);
3861 			}
3862 			if (ret < 0)
3863 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3864 			else
3865 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3866 						 str, iter->fmt);
3867 			str = "[UNSAFE-MEMORY]";
3868 			strcpy(iter->fmt, "%s");
3869 		} else {
3870 			strncpy(iter->fmt, p + i, j + 1);
3871 			iter->fmt[j+1] = '\0';
3872 		}
3873 		if (star)
3874 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3875 		else
3876 			trace_seq_printf(&iter->seq, iter->fmt, str);
3877 
3878 		p += i + j + 1;
3879 	}
3880  print:
3881 	if (*p)
3882 		trace_seq_vprintf(&iter->seq, p, ap);
3883 }
3884 
3885 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3886 {
3887 	const char *p, *new_fmt;
3888 	char *q;
3889 
3890 	if (WARN_ON_ONCE(!fmt))
3891 		return fmt;
3892 
3893 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3894 		return fmt;
3895 
3896 	p = fmt;
3897 	new_fmt = q = iter->fmt;
3898 	while (*p) {
3899 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3900 			if (!trace_iter_expand_format(iter))
3901 				return fmt;
3902 
3903 			q += iter->fmt - new_fmt;
3904 			new_fmt = iter->fmt;
3905 		}
3906 
3907 		*q++ = *p++;
3908 
3909 		/* Replace %p with %px */
3910 		if (p[-1] == '%') {
3911 			if (p[0] == '%') {
3912 				*q++ = *p++;
3913 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3914 				*q++ = *p++;
3915 				*q++ = 'x';
3916 			}
3917 		}
3918 	}
3919 	*q = '\0';
3920 
3921 	return new_fmt;
3922 }
3923 
3924 #define STATIC_TEMP_BUF_SIZE	128
3925 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3926 
3927 /* Find the next real entry, without updating the iterator itself */
3928 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3929 					  int *ent_cpu, u64 *ent_ts)
3930 {
3931 	/* __find_next_entry will reset ent_size */
3932 	int ent_size = iter->ent_size;
3933 	struct trace_entry *entry;
3934 
3935 	/*
3936 	 * If called from ftrace_dump(), then the iter->temp buffer
3937 	 * will be the static_temp_buf and not created from kmalloc.
3938 	 * If the entry size is greater than the buffer, we can
3939 	 * not save it. Just return NULL in that case. This is only
3940 	 * used to add markers when two consecutive events' time
3941 	 * stamps have a large delta. See trace_print_lat_context()
3942 	 */
3943 	if (iter->temp == static_temp_buf &&
3944 	    STATIC_TEMP_BUF_SIZE < ent_size)
3945 		return NULL;
3946 
3947 	/*
3948 	 * The __find_next_entry() may call peek_next_entry(), which may
3949 	 * call ring_buffer_peek() that may make the contents of iter->ent
3950 	 * undefined. Need to copy iter->ent now.
3951 	 */
3952 	if (iter->ent && iter->ent != iter->temp) {
3953 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3954 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3955 			void *temp;
3956 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3957 			if (!temp)
3958 				return NULL;
3959 			kfree(iter->temp);
3960 			iter->temp = temp;
3961 			iter->temp_size = iter->ent_size;
3962 		}
3963 		memcpy(iter->temp, iter->ent, iter->ent_size);
3964 		iter->ent = iter->temp;
3965 	}
3966 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3967 	/* Put back the original ent_size */
3968 	iter->ent_size = ent_size;
3969 
3970 	return entry;
3971 }
3972 
3973 /* Find the next real entry, and increment the iterator to the next entry */
3974 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3975 {
3976 	iter->ent = __find_next_entry(iter, &iter->cpu,
3977 				      &iter->lost_events, &iter->ts);
3978 
3979 	if (iter->ent)
3980 		trace_iterator_increment(iter);
3981 
3982 	return iter->ent ? iter : NULL;
3983 }
3984 
3985 static void trace_consume(struct trace_iterator *iter)
3986 {
3987 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3988 			    &iter->lost_events);
3989 }
3990 
3991 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3992 {
3993 	struct trace_iterator *iter = m->private;
3994 	int i = (int)*pos;
3995 	void *ent;
3996 
3997 	WARN_ON_ONCE(iter->leftover);
3998 
3999 	(*pos)++;
4000 
4001 	/* can't go backwards */
4002 	if (iter->idx > i)
4003 		return NULL;
4004 
4005 	if (iter->idx < 0)
4006 		ent = trace_find_next_entry_inc(iter);
4007 	else
4008 		ent = iter;
4009 
4010 	while (ent && iter->idx < i)
4011 		ent = trace_find_next_entry_inc(iter);
4012 
4013 	iter->pos = *pos;
4014 
4015 	return ent;
4016 }
4017 
4018 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4019 {
4020 	struct ring_buffer_iter *buf_iter;
4021 	unsigned long entries = 0;
4022 	u64 ts;
4023 
4024 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4025 
4026 	buf_iter = trace_buffer_iter(iter, cpu);
4027 	if (!buf_iter)
4028 		return;
4029 
4030 	ring_buffer_iter_reset(buf_iter);
4031 
4032 	/*
4033 	 * We could have the case with the max latency tracers
4034 	 * that a reset never took place on a cpu. This is evident
4035 	 * by the timestamp being before the start of the buffer.
4036 	 */
4037 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4038 		if (ts >= iter->array_buffer->time_start)
4039 			break;
4040 		entries++;
4041 		ring_buffer_iter_advance(buf_iter);
4042 	}
4043 
4044 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4045 }
4046 
4047 /*
4048  * The current tracer is copied to avoid a global locking
4049  * all around.
4050  */
4051 static void *s_start(struct seq_file *m, loff_t *pos)
4052 {
4053 	struct trace_iterator *iter = m->private;
4054 	struct trace_array *tr = iter->tr;
4055 	int cpu_file = iter->cpu_file;
4056 	void *p = NULL;
4057 	loff_t l = 0;
4058 	int cpu;
4059 
4060 	/*
4061 	 * copy the tracer to avoid using a global lock all around.
4062 	 * iter->trace is a copy of current_trace, the pointer to the
4063 	 * name may be used instead of a strcmp(), as iter->trace->name
4064 	 * will point to the same string as current_trace->name.
4065 	 */
4066 	mutex_lock(&trace_types_lock);
4067 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4068 		*iter->trace = *tr->current_trace;
4069 	mutex_unlock(&trace_types_lock);
4070 
4071 #ifdef CONFIG_TRACER_MAX_TRACE
4072 	if (iter->snapshot && iter->trace->use_max_tr)
4073 		return ERR_PTR(-EBUSY);
4074 #endif
4075 
4076 	if (*pos != iter->pos) {
4077 		iter->ent = NULL;
4078 		iter->cpu = 0;
4079 		iter->idx = -1;
4080 
4081 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4082 			for_each_tracing_cpu(cpu)
4083 				tracing_iter_reset(iter, cpu);
4084 		} else
4085 			tracing_iter_reset(iter, cpu_file);
4086 
4087 		iter->leftover = 0;
4088 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4089 			;
4090 
4091 	} else {
4092 		/*
4093 		 * If we overflowed the seq_file before, then we want
4094 		 * to just reuse the trace_seq buffer again.
4095 		 */
4096 		if (iter->leftover)
4097 			p = iter;
4098 		else {
4099 			l = *pos - 1;
4100 			p = s_next(m, p, &l);
4101 		}
4102 	}
4103 
4104 	trace_event_read_lock();
4105 	trace_access_lock(cpu_file);
4106 	return p;
4107 }
4108 
4109 static void s_stop(struct seq_file *m, void *p)
4110 {
4111 	struct trace_iterator *iter = m->private;
4112 
4113 #ifdef CONFIG_TRACER_MAX_TRACE
4114 	if (iter->snapshot && iter->trace->use_max_tr)
4115 		return;
4116 #endif
4117 
4118 	trace_access_unlock(iter->cpu_file);
4119 	trace_event_read_unlock();
4120 }
4121 
4122 static void
4123 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4124 		      unsigned long *entries, int cpu)
4125 {
4126 	unsigned long count;
4127 
4128 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4129 	/*
4130 	 * If this buffer has skipped entries, then we hold all
4131 	 * entries for the trace and we need to ignore the
4132 	 * ones before the time stamp.
4133 	 */
4134 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4135 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4136 		/* total is the same as the entries */
4137 		*total = count;
4138 	} else
4139 		*total = count +
4140 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4141 	*entries = count;
4142 }
4143 
4144 static void
4145 get_total_entries(struct array_buffer *buf,
4146 		  unsigned long *total, unsigned long *entries)
4147 {
4148 	unsigned long t, e;
4149 	int cpu;
4150 
4151 	*total = 0;
4152 	*entries = 0;
4153 
4154 	for_each_tracing_cpu(cpu) {
4155 		get_total_entries_cpu(buf, &t, &e, cpu);
4156 		*total += t;
4157 		*entries += e;
4158 	}
4159 }
4160 
4161 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4162 {
4163 	unsigned long total, entries;
4164 
4165 	if (!tr)
4166 		tr = &global_trace;
4167 
4168 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4169 
4170 	return entries;
4171 }
4172 
4173 unsigned long trace_total_entries(struct trace_array *tr)
4174 {
4175 	unsigned long total, entries;
4176 
4177 	if (!tr)
4178 		tr = &global_trace;
4179 
4180 	get_total_entries(&tr->array_buffer, &total, &entries);
4181 
4182 	return entries;
4183 }
4184 
4185 static void print_lat_help_header(struct seq_file *m)
4186 {
4187 	seq_puts(m, "#                    _------=> CPU#            \n"
4188 		    "#                   / _-----=> irqs-off        \n"
4189 		    "#                  | / _----=> need-resched    \n"
4190 		    "#                  || / _---=> hardirq/softirq \n"
4191 		    "#                  ||| / _--=> preempt-depth   \n"
4192 		    "#                  |||| /     delay            \n"
4193 		    "#  cmd     pid     ||||| time  |   caller      \n"
4194 		    "#     \\   /        |||||  \\    |   /         \n");
4195 }
4196 
4197 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4198 {
4199 	unsigned long total;
4200 	unsigned long entries;
4201 
4202 	get_total_entries(buf, &total, &entries);
4203 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4204 		   entries, total, num_online_cpus());
4205 	seq_puts(m, "#\n");
4206 }
4207 
4208 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4209 				   unsigned int flags)
4210 {
4211 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4212 
4213 	print_event_info(buf, m);
4214 
4215 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4216 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4217 }
4218 
4219 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4220 				       unsigned int flags)
4221 {
4222 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4223 	const char *space = "            ";
4224 	int prec = tgid ? 12 : 2;
4225 
4226 	print_event_info(buf, m);
4227 
4228 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4229 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4230 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4231 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4232 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4233 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4235 }
4236 
4237 void
4238 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239 {
4240 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241 	struct array_buffer *buf = iter->array_buffer;
4242 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243 	struct tracer *type = iter->trace;
4244 	unsigned long entries;
4245 	unsigned long total;
4246 	const char *name = "preemption";
4247 
4248 	name = type->name;
4249 
4250 	get_total_entries(buf, &total, &entries);
4251 
4252 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253 		   name, UTS_RELEASE);
4254 	seq_puts(m, "# -----------------------------------"
4255 		 "---------------------------------\n");
4256 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258 		   nsecs_to_usecs(data->saved_latency),
4259 		   entries,
4260 		   total,
4261 		   buf->cpu,
4262 #if defined(CONFIG_PREEMPT_NONE)
4263 		   "server",
4264 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265 		   "desktop",
4266 #elif defined(CONFIG_PREEMPT)
4267 		   "preempt",
4268 #elif defined(CONFIG_PREEMPT_RT)
4269 		   "preempt_rt",
4270 #else
4271 		   "unknown",
4272 #endif
4273 		   /* These are reserved for later use */
4274 		   0, 0, 0, 0);
4275 #ifdef CONFIG_SMP
4276 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4277 #else
4278 	seq_puts(m, ")\n");
4279 #endif
4280 	seq_puts(m, "#    -----------------\n");
4281 	seq_printf(m, "#    | task: %.16s-%d "
4282 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283 		   data->comm, data->pid,
4284 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285 		   data->policy, data->rt_priority);
4286 	seq_puts(m, "#    -----------------\n");
4287 
4288 	if (data->critical_start) {
4289 		seq_puts(m, "#  => started at: ");
4290 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291 		trace_print_seq(m, &iter->seq);
4292 		seq_puts(m, "\n#  => ended at:   ");
4293 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294 		trace_print_seq(m, &iter->seq);
4295 		seq_puts(m, "\n#\n");
4296 	}
4297 
4298 	seq_puts(m, "#\n");
4299 }
4300 
4301 static void test_cpu_buff_start(struct trace_iterator *iter)
4302 {
4303 	struct trace_seq *s = &iter->seq;
4304 	struct trace_array *tr = iter->tr;
4305 
4306 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307 		return;
4308 
4309 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310 		return;
4311 
4312 	if (cpumask_available(iter->started) &&
4313 	    cpumask_test_cpu(iter->cpu, iter->started))
4314 		return;
4315 
4316 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317 		return;
4318 
4319 	if (cpumask_available(iter->started))
4320 		cpumask_set_cpu(iter->cpu, iter->started);
4321 
4322 	/* Don't print started cpu buffer for the first entry of the trace */
4323 	if (iter->idx > 1)
4324 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325 				iter->cpu);
4326 }
4327 
4328 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329 {
4330 	struct trace_array *tr = iter->tr;
4331 	struct trace_seq *s = &iter->seq;
4332 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333 	struct trace_entry *entry;
4334 	struct trace_event *event;
4335 
4336 	entry = iter->ent;
4337 
4338 	test_cpu_buff_start(iter);
4339 
4340 	event = ftrace_find_event(entry->type);
4341 
4342 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344 			trace_print_lat_context(iter);
4345 		else
4346 			trace_print_context(iter);
4347 	}
4348 
4349 	if (trace_seq_has_overflowed(s))
4350 		return TRACE_TYPE_PARTIAL_LINE;
4351 
4352 	if (event)
4353 		return event->funcs->trace(iter, sym_flags, event);
4354 
4355 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356 
4357 	return trace_handle_return(s);
4358 }
4359 
4360 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361 {
4362 	struct trace_array *tr = iter->tr;
4363 	struct trace_seq *s = &iter->seq;
4364 	struct trace_entry *entry;
4365 	struct trace_event *event;
4366 
4367 	entry = iter->ent;
4368 
4369 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370 		trace_seq_printf(s, "%d %d %llu ",
4371 				 entry->pid, iter->cpu, iter->ts);
4372 
4373 	if (trace_seq_has_overflowed(s))
4374 		return TRACE_TYPE_PARTIAL_LINE;
4375 
4376 	event = ftrace_find_event(entry->type);
4377 	if (event)
4378 		return event->funcs->raw(iter, 0, event);
4379 
4380 	trace_seq_printf(s, "%d ?\n", entry->type);
4381 
4382 	return trace_handle_return(s);
4383 }
4384 
4385 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386 {
4387 	struct trace_array *tr = iter->tr;
4388 	struct trace_seq *s = &iter->seq;
4389 	unsigned char newline = '\n';
4390 	struct trace_entry *entry;
4391 	struct trace_event *event;
4392 
4393 	entry = iter->ent;
4394 
4395 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4397 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4399 		if (trace_seq_has_overflowed(s))
4400 			return TRACE_TYPE_PARTIAL_LINE;
4401 	}
4402 
4403 	event = ftrace_find_event(entry->type);
4404 	if (event) {
4405 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406 		if (ret != TRACE_TYPE_HANDLED)
4407 			return ret;
4408 	}
4409 
4410 	SEQ_PUT_FIELD(s, newline);
4411 
4412 	return trace_handle_return(s);
4413 }
4414 
4415 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416 {
4417 	struct trace_array *tr = iter->tr;
4418 	struct trace_seq *s = &iter->seq;
4419 	struct trace_entry *entry;
4420 	struct trace_event *event;
4421 
4422 	entry = iter->ent;
4423 
4424 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425 		SEQ_PUT_FIELD(s, entry->pid);
4426 		SEQ_PUT_FIELD(s, iter->cpu);
4427 		SEQ_PUT_FIELD(s, iter->ts);
4428 		if (trace_seq_has_overflowed(s))
4429 			return TRACE_TYPE_PARTIAL_LINE;
4430 	}
4431 
4432 	event = ftrace_find_event(entry->type);
4433 	return event ? event->funcs->binary(iter, 0, event) :
4434 		TRACE_TYPE_HANDLED;
4435 }
4436 
4437 int trace_empty(struct trace_iterator *iter)
4438 {
4439 	struct ring_buffer_iter *buf_iter;
4440 	int cpu;
4441 
4442 	/* If we are looking at one CPU buffer, only check that one */
4443 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444 		cpu = iter->cpu_file;
4445 		buf_iter = trace_buffer_iter(iter, cpu);
4446 		if (buf_iter) {
4447 			if (!ring_buffer_iter_empty(buf_iter))
4448 				return 0;
4449 		} else {
4450 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451 				return 0;
4452 		}
4453 		return 1;
4454 	}
4455 
4456 	for_each_tracing_cpu(cpu) {
4457 		buf_iter = trace_buffer_iter(iter, cpu);
4458 		if (buf_iter) {
4459 			if (!ring_buffer_iter_empty(buf_iter))
4460 				return 0;
4461 		} else {
4462 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463 				return 0;
4464 		}
4465 	}
4466 
4467 	return 1;
4468 }
4469 
4470 /*  Called with trace_event_read_lock() held. */
4471 enum print_line_t print_trace_line(struct trace_iterator *iter)
4472 {
4473 	struct trace_array *tr = iter->tr;
4474 	unsigned long trace_flags = tr->trace_flags;
4475 	enum print_line_t ret;
4476 
4477 	if (iter->lost_events) {
4478 		if (iter->lost_events == (unsigned long)-1)
4479 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480 					 iter->cpu);
4481 		else
4482 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483 					 iter->cpu, iter->lost_events);
4484 		if (trace_seq_has_overflowed(&iter->seq))
4485 			return TRACE_TYPE_PARTIAL_LINE;
4486 	}
4487 
4488 	if (iter->trace && iter->trace->print_line) {
4489 		ret = iter->trace->print_line(iter);
4490 		if (ret != TRACE_TYPE_UNHANDLED)
4491 			return ret;
4492 	}
4493 
4494 	if (iter->ent->type == TRACE_BPUTS &&
4495 			trace_flags & TRACE_ITER_PRINTK &&
4496 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497 		return trace_print_bputs_msg_only(iter);
4498 
4499 	if (iter->ent->type == TRACE_BPRINT &&
4500 			trace_flags & TRACE_ITER_PRINTK &&
4501 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502 		return trace_print_bprintk_msg_only(iter);
4503 
4504 	if (iter->ent->type == TRACE_PRINT &&
4505 			trace_flags & TRACE_ITER_PRINTK &&
4506 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507 		return trace_print_printk_msg_only(iter);
4508 
4509 	if (trace_flags & TRACE_ITER_BIN)
4510 		return print_bin_fmt(iter);
4511 
4512 	if (trace_flags & TRACE_ITER_HEX)
4513 		return print_hex_fmt(iter);
4514 
4515 	if (trace_flags & TRACE_ITER_RAW)
4516 		return print_raw_fmt(iter);
4517 
4518 	return print_trace_fmt(iter);
4519 }
4520 
4521 void trace_latency_header(struct seq_file *m)
4522 {
4523 	struct trace_iterator *iter = m->private;
4524 	struct trace_array *tr = iter->tr;
4525 
4526 	/* print nothing if the buffers are empty */
4527 	if (trace_empty(iter))
4528 		return;
4529 
4530 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531 		print_trace_header(m, iter);
4532 
4533 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534 		print_lat_help_header(m);
4535 }
4536 
4537 void trace_default_header(struct seq_file *m)
4538 {
4539 	struct trace_iterator *iter = m->private;
4540 	struct trace_array *tr = iter->tr;
4541 	unsigned long trace_flags = tr->trace_flags;
4542 
4543 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544 		return;
4545 
4546 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547 		/* print nothing if the buffers are empty */
4548 		if (trace_empty(iter))
4549 			return;
4550 		print_trace_header(m, iter);
4551 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4552 			print_lat_help_header(m);
4553 	} else {
4554 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4556 				print_func_help_header_irq(iter->array_buffer,
4557 							   m, trace_flags);
4558 			else
4559 				print_func_help_header(iter->array_buffer, m,
4560 						       trace_flags);
4561 		}
4562 	}
4563 }
4564 
4565 static void test_ftrace_alive(struct seq_file *m)
4566 {
4567 	if (!ftrace_is_dead())
4568 		return;
4569 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4571 }
4572 
4573 #ifdef CONFIG_TRACER_MAX_TRACE
4574 static void show_snapshot_main_help(struct seq_file *m)
4575 {
4576 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578 		    "#                      Takes a snapshot of the main buffer.\n"
4579 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580 		    "#                      (Doesn't have to be '2' works with any number that\n"
4581 		    "#                       is not a '0' or '1')\n");
4582 }
4583 
4584 static void show_snapshot_percpu_help(struct seq_file *m)
4585 {
4586 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590 #else
4591 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592 		    "#                     Must use main snapshot file to allocate.\n");
4593 #endif
4594 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595 		    "#                      (Doesn't have to be '2' works with any number that\n"
4596 		    "#                       is not a '0' or '1')\n");
4597 }
4598 
4599 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600 {
4601 	if (iter->tr->allocated_snapshot)
4602 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603 	else
4604 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605 
4606 	seq_puts(m, "# Snapshot commands:\n");
4607 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608 		show_snapshot_main_help(m);
4609 	else
4610 		show_snapshot_percpu_help(m);
4611 }
4612 #else
4613 /* Should never be called */
4614 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615 #endif
4616 
4617 static int s_show(struct seq_file *m, void *v)
4618 {
4619 	struct trace_iterator *iter = v;
4620 	int ret;
4621 
4622 	if (iter->ent == NULL) {
4623 		if (iter->tr) {
4624 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625 			seq_puts(m, "#\n");
4626 			test_ftrace_alive(m);
4627 		}
4628 		if (iter->snapshot && trace_empty(iter))
4629 			print_snapshot_help(m, iter);
4630 		else if (iter->trace && iter->trace->print_header)
4631 			iter->trace->print_header(m);
4632 		else
4633 			trace_default_header(m);
4634 
4635 	} else if (iter->leftover) {
4636 		/*
4637 		 * If we filled the seq_file buffer earlier, we
4638 		 * want to just show it now.
4639 		 */
4640 		ret = trace_print_seq(m, &iter->seq);
4641 
4642 		/* ret should this time be zero, but you never know */
4643 		iter->leftover = ret;
4644 
4645 	} else {
4646 		print_trace_line(iter);
4647 		ret = trace_print_seq(m, &iter->seq);
4648 		/*
4649 		 * If we overflow the seq_file buffer, then it will
4650 		 * ask us for this data again at start up.
4651 		 * Use that instead.
4652 		 *  ret is 0 if seq_file write succeeded.
4653 		 *        -1 otherwise.
4654 		 */
4655 		iter->leftover = ret;
4656 	}
4657 
4658 	return 0;
4659 }
4660 
4661 /*
4662  * Should be used after trace_array_get(), trace_types_lock
4663  * ensures that i_cdev was already initialized.
4664  */
4665 static inline int tracing_get_cpu(struct inode *inode)
4666 {
4667 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4668 		return (long)inode->i_cdev - 1;
4669 	return RING_BUFFER_ALL_CPUS;
4670 }
4671 
4672 static const struct seq_operations tracer_seq_ops = {
4673 	.start		= s_start,
4674 	.next		= s_next,
4675 	.stop		= s_stop,
4676 	.show		= s_show,
4677 };
4678 
4679 static struct trace_iterator *
4680 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681 {
4682 	struct trace_array *tr = inode->i_private;
4683 	struct trace_iterator *iter;
4684 	int cpu;
4685 
4686 	if (tracing_disabled)
4687 		return ERR_PTR(-ENODEV);
4688 
4689 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690 	if (!iter)
4691 		return ERR_PTR(-ENOMEM);
4692 
4693 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694 				    GFP_KERNEL);
4695 	if (!iter->buffer_iter)
4696 		goto release;
4697 
4698 	/*
4699 	 * trace_find_next_entry() may need to save off iter->ent.
4700 	 * It will place it into the iter->temp buffer. As most
4701 	 * events are less than 128, allocate a buffer of that size.
4702 	 * If one is greater, then trace_find_next_entry() will
4703 	 * allocate a new buffer to adjust for the bigger iter->ent.
4704 	 * It's not critical if it fails to get allocated here.
4705 	 */
4706 	iter->temp = kmalloc(128, GFP_KERNEL);
4707 	if (iter->temp)
4708 		iter->temp_size = 128;
4709 
4710 	/*
4711 	 * trace_event_printf() may need to modify given format
4712 	 * string to replace %p with %px so that it shows real address
4713 	 * instead of hash value. However, that is only for the event
4714 	 * tracing, other tracer may not need. Defer the allocation
4715 	 * until it is needed.
4716 	 */
4717 	iter->fmt = NULL;
4718 	iter->fmt_size = 0;
4719 
4720 	/*
4721 	 * We make a copy of the current tracer to avoid concurrent
4722 	 * changes on it while we are reading.
4723 	 */
4724 	mutex_lock(&trace_types_lock);
4725 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726 	if (!iter->trace)
4727 		goto fail;
4728 
4729 	*iter->trace = *tr->current_trace;
4730 
4731 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732 		goto fail;
4733 
4734 	iter->tr = tr;
4735 
4736 #ifdef CONFIG_TRACER_MAX_TRACE
4737 	/* Currently only the top directory has a snapshot */
4738 	if (tr->current_trace->print_max || snapshot)
4739 		iter->array_buffer = &tr->max_buffer;
4740 	else
4741 #endif
4742 		iter->array_buffer = &tr->array_buffer;
4743 	iter->snapshot = snapshot;
4744 	iter->pos = -1;
4745 	iter->cpu_file = tracing_get_cpu(inode);
4746 	mutex_init(&iter->mutex);
4747 
4748 	/* Notify the tracer early; before we stop tracing. */
4749 	if (iter->trace->open)
4750 		iter->trace->open(iter);
4751 
4752 	/* Annotate start of buffers if we had overruns */
4753 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4754 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755 
4756 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757 	if (trace_clocks[tr->clock_id].in_ns)
4758 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4759 
4760 	/*
4761 	 * If pause-on-trace is enabled, then stop the trace while
4762 	 * dumping, unless this is the "snapshot" file
4763 	 */
4764 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4765 		tracing_stop_tr(tr);
4766 
4767 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4768 		for_each_tracing_cpu(cpu) {
4769 			iter->buffer_iter[cpu] =
4770 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4771 							 cpu, GFP_KERNEL);
4772 		}
4773 		ring_buffer_read_prepare_sync();
4774 		for_each_tracing_cpu(cpu) {
4775 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4776 			tracing_iter_reset(iter, cpu);
4777 		}
4778 	} else {
4779 		cpu = iter->cpu_file;
4780 		iter->buffer_iter[cpu] =
4781 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4782 						 cpu, GFP_KERNEL);
4783 		ring_buffer_read_prepare_sync();
4784 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4785 		tracing_iter_reset(iter, cpu);
4786 	}
4787 
4788 	mutex_unlock(&trace_types_lock);
4789 
4790 	return iter;
4791 
4792  fail:
4793 	mutex_unlock(&trace_types_lock);
4794 	kfree(iter->trace);
4795 	kfree(iter->temp);
4796 	kfree(iter->buffer_iter);
4797 release:
4798 	seq_release_private(inode, file);
4799 	return ERR_PTR(-ENOMEM);
4800 }
4801 
4802 int tracing_open_generic(struct inode *inode, struct file *filp)
4803 {
4804 	int ret;
4805 
4806 	ret = tracing_check_open_get_tr(NULL);
4807 	if (ret)
4808 		return ret;
4809 
4810 	filp->private_data = inode->i_private;
4811 	return 0;
4812 }
4813 
4814 bool tracing_is_disabled(void)
4815 {
4816 	return (tracing_disabled) ? true: false;
4817 }
4818 
4819 /*
4820  * Open and update trace_array ref count.
4821  * Must have the current trace_array passed to it.
4822  */
4823 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4824 {
4825 	struct trace_array *tr = inode->i_private;
4826 	int ret;
4827 
4828 	ret = tracing_check_open_get_tr(tr);
4829 	if (ret)
4830 		return ret;
4831 
4832 	filp->private_data = inode->i_private;
4833 
4834 	return 0;
4835 }
4836 
4837 static int tracing_release(struct inode *inode, struct file *file)
4838 {
4839 	struct trace_array *tr = inode->i_private;
4840 	struct seq_file *m = file->private_data;
4841 	struct trace_iterator *iter;
4842 	int cpu;
4843 
4844 	if (!(file->f_mode & FMODE_READ)) {
4845 		trace_array_put(tr);
4846 		return 0;
4847 	}
4848 
4849 	/* Writes do not use seq_file */
4850 	iter = m->private;
4851 	mutex_lock(&trace_types_lock);
4852 
4853 	for_each_tracing_cpu(cpu) {
4854 		if (iter->buffer_iter[cpu])
4855 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4856 	}
4857 
4858 	if (iter->trace && iter->trace->close)
4859 		iter->trace->close(iter);
4860 
4861 	if (!iter->snapshot && tr->stop_count)
4862 		/* reenable tracing if it was previously enabled */
4863 		tracing_start_tr(tr);
4864 
4865 	__trace_array_put(tr);
4866 
4867 	mutex_unlock(&trace_types_lock);
4868 
4869 	mutex_destroy(&iter->mutex);
4870 	free_cpumask_var(iter->started);
4871 	kfree(iter->fmt);
4872 	kfree(iter->temp);
4873 	kfree(iter->trace);
4874 	kfree(iter->buffer_iter);
4875 	seq_release_private(inode, file);
4876 
4877 	return 0;
4878 }
4879 
4880 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4881 {
4882 	struct trace_array *tr = inode->i_private;
4883 
4884 	trace_array_put(tr);
4885 	return 0;
4886 }
4887 
4888 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4889 {
4890 	struct trace_array *tr = inode->i_private;
4891 
4892 	trace_array_put(tr);
4893 
4894 	return single_release(inode, file);
4895 }
4896 
4897 static int tracing_open(struct inode *inode, struct file *file)
4898 {
4899 	struct trace_array *tr = inode->i_private;
4900 	struct trace_iterator *iter;
4901 	int ret;
4902 
4903 	ret = tracing_check_open_get_tr(tr);
4904 	if (ret)
4905 		return ret;
4906 
4907 	/* If this file was open for write, then erase contents */
4908 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4909 		int cpu = tracing_get_cpu(inode);
4910 		struct array_buffer *trace_buf = &tr->array_buffer;
4911 
4912 #ifdef CONFIG_TRACER_MAX_TRACE
4913 		if (tr->current_trace->print_max)
4914 			trace_buf = &tr->max_buffer;
4915 #endif
4916 
4917 		if (cpu == RING_BUFFER_ALL_CPUS)
4918 			tracing_reset_online_cpus(trace_buf);
4919 		else
4920 			tracing_reset_cpu(trace_buf, cpu);
4921 	}
4922 
4923 	if (file->f_mode & FMODE_READ) {
4924 		iter = __tracing_open(inode, file, false);
4925 		if (IS_ERR(iter))
4926 			ret = PTR_ERR(iter);
4927 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4928 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4929 	}
4930 
4931 	if (ret < 0)
4932 		trace_array_put(tr);
4933 
4934 	return ret;
4935 }
4936 
4937 /*
4938  * Some tracers are not suitable for instance buffers.
4939  * A tracer is always available for the global array (toplevel)
4940  * or if it explicitly states that it is.
4941  */
4942 static bool
4943 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4944 {
4945 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4946 }
4947 
4948 /* Find the next tracer that this trace array may use */
4949 static struct tracer *
4950 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4951 {
4952 	while (t && !trace_ok_for_array(t, tr))
4953 		t = t->next;
4954 
4955 	return t;
4956 }
4957 
4958 static void *
4959 t_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961 	struct trace_array *tr = m->private;
4962 	struct tracer *t = v;
4963 
4964 	(*pos)++;
4965 
4966 	if (t)
4967 		t = get_tracer_for_array(tr, t->next);
4968 
4969 	return t;
4970 }
4971 
4972 static void *t_start(struct seq_file *m, loff_t *pos)
4973 {
4974 	struct trace_array *tr = m->private;
4975 	struct tracer *t;
4976 	loff_t l = 0;
4977 
4978 	mutex_lock(&trace_types_lock);
4979 
4980 	t = get_tracer_for_array(tr, trace_types);
4981 	for (; t && l < *pos; t = t_next(m, t, &l))
4982 			;
4983 
4984 	return t;
4985 }
4986 
4987 static void t_stop(struct seq_file *m, void *p)
4988 {
4989 	mutex_unlock(&trace_types_lock);
4990 }
4991 
4992 static int t_show(struct seq_file *m, void *v)
4993 {
4994 	struct tracer *t = v;
4995 
4996 	if (!t)
4997 		return 0;
4998 
4999 	seq_puts(m, t->name);
5000 	if (t->next)
5001 		seq_putc(m, ' ');
5002 	else
5003 		seq_putc(m, '\n');
5004 
5005 	return 0;
5006 }
5007 
5008 static const struct seq_operations show_traces_seq_ops = {
5009 	.start		= t_start,
5010 	.next		= t_next,
5011 	.stop		= t_stop,
5012 	.show		= t_show,
5013 };
5014 
5015 static int show_traces_open(struct inode *inode, struct file *file)
5016 {
5017 	struct trace_array *tr = inode->i_private;
5018 	struct seq_file *m;
5019 	int ret;
5020 
5021 	ret = tracing_check_open_get_tr(tr);
5022 	if (ret)
5023 		return ret;
5024 
5025 	ret = seq_open(file, &show_traces_seq_ops);
5026 	if (ret) {
5027 		trace_array_put(tr);
5028 		return ret;
5029 	}
5030 
5031 	m = file->private_data;
5032 	m->private = tr;
5033 
5034 	return 0;
5035 }
5036 
5037 static int show_traces_release(struct inode *inode, struct file *file)
5038 {
5039 	struct trace_array *tr = inode->i_private;
5040 
5041 	trace_array_put(tr);
5042 	return seq_release(inode, file);
5043 }
5044 
5045 static ssize_t
5046 tracing_write_stub(struct file *filp, const char __user *ubuf,
5047 		   size_t count, loff_t *ppos)
5048 {
5049 	return count;
5050 }
5051 
5052 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5053 {
5054 	int ret;
5055 
5056 	if (file->f_mode & FMODE_READ)
5057 		ret = seq_lseek(file, offset, whence);
5058 	else
5059 		file->f_pos = ret = 0;
5060 
5061 	return ret;
5062 }
5063 
5064 static const struct file_operations tracing_fops = {
5065 	.open		= tracing_open,
5066 	.read		= seq_read,
5067 	.write		= tracing_write_stub,
5068 	.llseek		= tracing_lseek,
5069 	.release	= tracing_release,
5070 };
5071 
5072 static const struct file_operations show_traces_fops = {
5073 	.open		= show_traces_open,
5074 	.read		= seq_read,
5075 	.llseek		= seq_lseek,
5076 	.release	= show_traces_release,
5077 };
5078 
5079 static ssize_t
5080 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5081 		     size_t count, loff_t *ppos)
5082 {
5083 	struct trace_array *tr = file_inode(filp)->i_private;
5084 	char *mask_str;
5085 	int len;
5086 
5087 	len = snprintf(NULL, 0, "%*pb\n",
5088 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5089 	mask_str = kmalloc(len, GFP_KERNEL);
5090 	if (!mask_str)
5091 		return -ENOMEM;
5092 
5093 	len = snprintf(mask_str, len, "%*pb\n",
5094 		       cpumask_pr_args(tr->tracing_cpumask));
5095 	if (len >= count) {
5096 		count = -EINVAL;
5097 		goto out_err;
5098 	}
5099 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5100 
5101 out_err:
5102 	kfree(mask_str);
5103 
5104 	return count;
5105 }
5106 
5107 int tracing_set_cpumask(struct trace_array *tr,
5108 			cpumask_var_t tracing_cpumask_new)
5109 {
5110 	int cpu;
5111 
5112 	if (!tr)
5113 		return -EINVAL;
5114 
5115 	local_irq_disable();
5116 	arch_spin_lock(&tr->max_lock);
5117 	for_each_tracing_cpu(cpu) {
5118 		/*
5119 		 * Increase/decrease the disabled counter if we are
5120 		 * about to flip a bit in the cpumask:
5121 		 */
5122 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5123 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5124 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5125 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5126 		}
5127 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5128 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5129 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5130 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5131 		}
5132 	}
5133 	arch_spin_unlock(&tr->max_lock);
5134 	local_irq_enable();
5135 
5136 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5137 
5138 	return 0;
5139 }
5140 
5141 static ssize_t
5142 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5143 		      size_t count, loff_t *ppos)
5144 {
5145 	struct trace_array *tr = file_inode(filp)->i_private;
5146 	cpumask_var_t tracing_cpumask_new;
5147 	int err;
5148 
5149 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5150 		return -ENOMEM;
5151 
5152 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5153 	if (err)
5154 		goto err_free;
5155 
5156 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5157 	if (err)
5158 		goto err_free;
5159 
5160 	free_cpumask_var(tracing_cpumask_new);
5161 
5162 	return count;
5163 
5164 err_free:
5165 	free_cpumask_var(tracing_cpumask_new);
5166 
5167 	return err;
5168 }
5169 
5170 static const struct file_operations tracing_cpumask_fops = {
5171 	.open		= tracing_open_generic_tr,
5172 	.read		= tracing_cpumask_read,
5173 	.write		= tracing_cpumask_write,
5174 	.release	= tracing_release_generic_tr,
5175 	.llseek		= generic_file_llseek,
5176 };
5177 
5178 static int tracing_trace_options_show(struct seq_file *m, void *v)
5179 {
5180 	struct tracer_opt *trace_opts;
5181 	struct trace_array *tr = m->private;
5182 	u32 tracer_flags;
5183 	int i;
5184 
5185 	mutex_lock(&trace_types_lock);
5186 	tracer_flags = tr->current_trace->flags->val;
5187 	trace_opts = tr->current_trace->flags->opts;
5188 
5189 	for (i = 0; trace_options[i]; i++) {
5190 		if (tr->trace_flags & (1 << i))
5191 			seq_printf(m, "%s\n", trace_options[i]);
5192 		else
5193 			seq_printf(m, "no%s\n", trace_options[i]);
5194 	}
5195 
5196 	for (i = 0; trace_opts[i].name; i++) {
5197 		if (tracer_flags & trace_opts[i].bit)
5198 			seq_printf(m, "%s\n", trace_opts[i].name);
5199 		else
5200 			seq_printf(m, "no%s\n", trace_opts[i].name);
5201 	}
5202 	mutex_unlock(&trace_types_lock);
5203 
5204 	return 0;
5205 }
5206 
5207 static int __set_tracer_option(struct trace_array *tr,
5208 			       struct tracer_flags *tracer_flags,
5209 			       struct tracer_opt *opts, int neg)
5210 {
5211 	struct tracer *trace = tracer_flags->trace;
5212 	int ret;
5213 
5214 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215 	if (ret)
5216 		return ret;
5217 
5218 	if (neg)
5219 		tracer_flags->val &= ~opts->bit;
5220 	else
5221 		tracer_flags->val |= opts->bit;
5222 	return 0;
5223 }
5224 
5225 /* Try to assign a tracer specific option */
5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227 {
5228 	struct tracer *trace = tr->current_trace;
5229 	struct tracer_flags *tracer_flags = trace->flags;
5230 	struct tracer_opt *opts = NULL;
5231 	int i;
5232 
5233 	for (i = 0; tracer_flags->opts[i].name; i++) {
5234 		opts = &tracer_flags->opts[i];
5235 
5236 		if (strcmp(cmp, opts->name) == 0)
5237 			return __set_tracer_option(tr, trace->flags, opts, neg);
5238 	}
5239 
5240 	return -EINVAL;
5241 }
5242 
5243 /* Some tracers require overwrite to stay enabled */
5244 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5245 {
5246 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5247 		return -1;
5248 
5249 	return 0;
5250 }
5251 
5252 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5253 {
5254 	int *map;
5255 
5256 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5257 	    (mask == TRACE_ITER_RECORD_CMD))
5258 		lockdep_assert_held(&event_mutex);
5259 
5260 	/* do nothing if flag is already set */
5261 	if (!!(tr->trace_flags & mask) == !!enabled)
5262 		return 0;
5263 
5264 	/* Give the tracer a chance to approve the change */
5265 	if (tr->current_trace->flag_changed)
5266 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5267 			return -EINVAL;
5268 
5269 	if (enabled)
5270 		tr->trace_flags |= mask;
5271 	else
5272 		tr->trace_flags &= ~mask;
5273 
5274 	if (mask == TRACE_ITER_RECORD_CMD)
5275 		trace_event_enable_cmd_record(enabled);
5276 
5277 	if (mask == TRACE_ITER_RECORD_TGID) {
5278 		if (!tgid_map) {
5279 			tgid_map_max = pid_max;
5280 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5281 				       GFP_KERNEL);
5282 
5283 			/*
5284 			 * Pairs with smp_load_acquire() in
5285 			 * trace_find_tgid_ptr() to ensure that if it observes
5286 			 * the tgid_map we just allocated then it also observes
5287 			 * the corresponding tgid_map_max value.
5288 			 */
5289 			smp_store_release(&tgid_map, map);
5290 		}
5291 		if (!tgid_map) {
5292 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5293 			return -ENOMEM;
5294 		}
5295 
5296 		trace_event_enable_tgid_record(enabled);
5297 	}
5298 
5299 	if (mask == TRACE_ITER_EVENT_FORK)
5300 		trace_event_follow_fork(tr, enabled);
5301 
5302 	if (mask == TRACE_ITER_FUNC_FORK)
5303 		ftrace_pid_follow_fork(tr, enabled);
5304 
5305 	if (mask == TRACE_ITER_OVERWRITE) {
5306 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5307 #ifdef CONFIG_TRACER_MAX_TRACE
5308 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5309 #endif
5310 	}
5311 
5312 	if (mask == TRACE_ITER_PRINTK) {
5313 		trace_printk_start_stop_comm(enabled);
5314 		trace_printk_control(enabled);
5315 	}
5316 
5317 	return 0;
5318 }
5319 
5320 int trace_set_options(struct trace_array *tr, char *option)
5321 {
5322 	char *cmp;
5323 	int neg = 0;
5324 	int ret;
5325 	size_t orig_len = strlen(option);
5326 	int len;
5327 
5328 	cmp = strstrip(option);
5329 
5330 	len = str_has_prefix(cmp, "no");
5331 	if (len)
5332 		neg = 1;
5333 
5334 	cmp += len;
5335 
5336 	mutex_lock(&event_mutex);
5337 	mutex_lock(&trace_types_lock);
5338 
5339 	ret = match_string(trace_options, -1, cmp);
5340 	/* If no option could be set, test the specific tracer options */
5341 	if (ret < 0)
5342 		ret = set_tracer_option(tr, cmp, neg);
5343 	else
5344 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5345 
5346 	mutex_unlock(&trace_types_lock);
5347 	mutex_unlock(&event_mutex);
5348 
5349 	/*
5350 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5351 	 * turn it back into a space.
5352 	 */
5353 	if (orig_len > strlen(option))
5354 		option[strlen(option)] = ' ';
5355 
5356 	return ret;
5357 }
5358 
5359 static void __init apply_trace_boot_options(void)
5360 {
5361 	char *buf = trace_boot_options_buf;
5362 	char *option;
5363 
5364 	while (true) {
5365 		option = strsep(&buf, ",");
5366 
5367 		if (!option)
5368 			break;
5369 
5370 		if (*option)
5371 			trace_set_options(&global_trace, option);
5372 
5373 		/* Put back the comma to allow this to be called again */
5374 		if (buf)
5375 			*(buf - 1) = ',';
5376 	}
5377 }
5378 
5379 static ssize_t
5380 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5381 			size_t cnt, loff_t *ppos)
5382 {
5383 	struct seq_file *m = filp->private_data;
5384 	struct trace_array *tr = m->private;
5385 	char buf[64];
5386 	int ret;
5387 
5388 	if (cnt >= sizeof(buf))
5389 		return -EINVAL;
5390 
5391 	if (copy_from_user(buf, ubuf, cnt))
5392 		return -EFAULT;
5393 
5394 	buf[cnt] = 0;
5395 
5396 	ret = trace_set_options(tr, buf);
5397 	if (ret < 0)
5398 		return ret;
5399 
5400 	*ppos += cnt;
5401 
5402 	return cnt;
5403 }
5404 
5405 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5406 {
5407 	struct trace_array *tr = inode->i_private;
5408 	int ret;
5409 
5410 	ret = tracing_check_open_get_tr(tr);
5411 	if (ret)
5412 		return ret;
5413 
5414 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5415 	if (ret < 0)
5416 		trace_array_put(tr);
5417 
5418 	return ret;
5419 }
5420 
5421 static const struct file_operations tracing_iter_fops = {
5422 	.open		= tracing_trace_options_open,
5423 	.read		= seq_read,
5424 	.llseek		= seq_lseek,
5425 	.release	= tracing_single_release_tr,
5426 	.write		= tracing_trace_options_write,
5427 };
5428 
5429 static const char readme_msg[] =
5430 	"tracing mini-HOWTO:\n\n"
5431 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5432 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5433 	" Important files:\n"
5434 	"  trace\t\t\t- The static contents of the buffer\n"
5435 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5436 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5437 	"  current_tracer\t- function and latency tracers\n"
5438 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5439 	"  error_log\t- error log for failed commands (that support it)\n"
5440 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5441 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5442 	"  trace_clock\t\t-change the clock used to order events\n"
5443 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5444 	"      global:   Synced across CPUs but slows tracing down.\n"
5445 	"     counter:   Not a clock, but just an increment\n"
5446 	"      uptime:   Jiffy counter from time of boot\n"
5447 	"        perf:   Same clock that perf events use\n"
5448 #ifdef CONFIG_X86_64
5449 	"     x86-tsc:   TSC cycle counter\n"
5450 #endif
5451 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5452 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5453 	"    absolute:   Absolute (standalone) timestamp\n"
5454 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5455 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5456 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5457 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5458 	"\t\t\t  Remove sub-buffer with rmdir\n"
5459 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5460 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5461 	"\t\t\t  option name\n"
5462 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5463 #ifdef CONFIG_DYNAMIC_FTRACE
5464 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5465 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5466 	"\t\t\t  functions\n"
5467 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5468 	"\t     modules: Can select a group via module\n"
5469 	"\t      Format: :mod:<module-name>\n"
5470 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5471 	"\t    triggers: a command to perform when function is hit\n"
5472 	"\t      Format: <function>:<trigger>[:count]\n"
5473 	"\t     trigger: traceon, traceoff\n"
5474 	"\t\t      enable_event:<system>:<event>\n"
5475 	"\t\t      disable_event:<system>:<event>\n"
5476 #ifdef CONFIG_STACKTRACE
5477 	"\t\t      stacktrace\n"
5478 #endif
5479 #ifdef CONFIG_TRACER_SNAPSHOT
5480 	"\t\t      snapshot\n"
5481 #endif
5482 	"\t\t      dump\n"
5483 	"\t\t      cpudump\n"
5484 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5485 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5486 	"\t     The first one will disable tracing every time do_fault is hit\n"
5487 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5488 	"\t       The first time do trap is hit and it disables tracing, the\n"
5489 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5490 	"\t       the counter will not decrement. It only decrements when the\n"
5491 	"\t       trigger did work\n"
5492 	"\t     To remove trigger without count:\n"
5493 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5494 	"\t     To remove trigger with a count:\n"
5495 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5496 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5497 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5498 	"\t    modules: Can select a group via module command :mod:\n"
5499 	"\t    Does not accept triggers\n"
5500 #endif /* CONFIG_DYNAMIC_FTRACE */
5501 #ifdef CONFIG_FUNCTION_TRACER
5502 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5503 	"\t\t    (function)\n"
5504 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5505 	"\t\t    (function)\n"
5506 #endif
5507 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5508 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5509 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5510 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5511 #endif
5512 #ifdef CONFIG_TRACER_SNAPSHOT
5513 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5514 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5515 	"\t\t\t  information\n"
5516 #endif
5517 #ifdef CONFIG_STACK_TRACER
5518 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5519 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5520 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5521 	"\t\t\t  new trace)\n"
5522 #ifdef CONFIG_DYNAMIC_FTRACE
5523 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5524 	"\t\t\t  traces\n"
5525 #endif
5526 #endif /* CONFIG_STACK_TRACER */
5527 #ifdef CONFIG_DYNAMIC_EVENTS
5528 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5529 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5530 #endif
5531 #ifdef CONFIG_KPROBE_EVENTS
5532 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5533 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5534 #endif
5535 #ifdef CONFIG_UPROBE_EVENTS
5536 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5537 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5538 #endif
5539 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5540 	"\t  accepts: event-definitions (one definition per line)\n"
5541 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5542 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5543 #ifdef CONFIG_HIST_TRIGGERS
5544 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5545 #endif
5546 	"\t           -:[<group>/]<event>\n"
5547 #ifdef CONFIG_KPROBE_EVENTS
5548 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5549   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5550 #endif
5551 #ifdef CONFIG_UPROBE_EVENTS
5552   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5553 #endif
5554 	"\t     args: <name>=fetcharg[:type]\n"
5555 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5556 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5557 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5558 #else
5559 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5560 #endif
5561 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5562 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5563 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5564 	"\t           <type>\\[<array-size>\\]\n"
5565 #ifdef CONFIG_HIST_TRIGGERS
5566 	"\t    field: <stype> <name>;\n"
5567 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5568 	"\t           [unsigned] char/int/long\n"
5569 #endif
5570 #endif
5571 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5572 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5573 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5574 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5575 	"\t\t\t  events\n"
5576 	"      filter\t\t- If set, only events passing filter are traced\n"
5577 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5578 	"\t\t\t  <event>:\n"
5579 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5580 	"      filter\t\t- If set, only events passing filter are traced\n"
5581 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5582 	"\t    Format: <trigger>[:count][if <filter>]\n"
5583 	"\t   trigger: traceon, traceoff\n"
5584 	"\t            enable_event:<system>:<event>\n"
5585 	"\t            disable_event:<system>:<event>\n"
5586 #ifdef CONFIG_HIST_TRIGGERS
5587 	"\t            enable_hist:<system>:<event>\n"
5588 	"\t            disable_hist:<system>:<event>\n"
5589 #endif
5590 #ifdef CONFIG_STACKTRACE
5591 	"\t\t    stacktrace\n"
5592 #endif
5593 #ifdef CONFIG_TRACER_SNAPSHOT
5594 	"\t\t    snapshot\n"
5595 #endif
5596 #ifdef CONFIG_HIST_TRIGGERS
5597 	"\t\t    hist (see below)\n"
5598 #endif
5599 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5600 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5601 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5602 	"\t                  events/block/block_unplug/trigger\n"
5603 	"\t   The first disables tracing every time block_unplug is hit.\n"
5604 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5605 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5606 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5607 	"\t   Like function triggers, the counter is only decremented if it\n"
5608 	"\t    enabled or disabled tracing.\n"
5609 	"\t   To remove a trigger without a count:\n"
5610 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5611 	"\t   To remove a trigger with a count:\n"
5612 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5613 	"\t   Filters can be ignored when removing a trigger.\n"
5614 #ifdef CONFIG_HIST_TRIGGERS
5615 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5616 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5617 	"\t            [:values=<field1[,field2,...]>]\n"
5618 	"\t            [:sort=<field1[,field2,...]>]\n"
5619 	"\t            [:size=#entries]\n"
5620 	"\t            [:pause][:continue][:clear]\n"
5621 	"\t            [:name=histname1]\n"
5622 	"\t            [:<handler>.<action>]\n"
5623 	"\t            [if <filter>]\n\n"
5624 	"\t    Note, special fields can be used as well:\n"
5625 	"\t            common_timestamp - to record current timestamp\n"
5626 	"\t            common_cpu - to record the CPU the event happened on\n"
5627 	"\n"
5628 	"\t    When a matching event is hit, an entry is added to a hash\n"
5629 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5630 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5631 	"\t    correspond to fields in the event's format description.  Keys\n"
5632 	"\t    can be any field, or the special string 'stacktrace'.\n"
5633 	"\t    Compound keys consisting of up to two fields can be specified\n"
5634 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5635 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5636 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5637 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5638 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5639 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5640 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5641 	"\t    its histogram data will be shared with other triggers of the\n"
5642 	"\t    same name, and trigger hits will update this common data.\n\n"
5643 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5644 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5645 	"\t    triggers attached to an event, there will be a table for each\n"
5646 	"\t    trigger in the output.  The table displayed for a named\n"
5647 	"\t    trigger will be the same as any other instance having the\n"
5648 	"\t    same name.  The default format used to display a given field\n"
5649 	"\t    can be modified by appending any of the following modifiers\n"
5650 	"\t    to the field name, as applicable:\n\n"
5651 	"\t            .hex        display a number as a hex value\n"
5652 	"\t            .sym        display an address as a symbol\n"
5653 	"\t            .sym-offset display an address as a symbol and offset\n"
5654 	"\t            .execname   display a common_pid as a program name\n"
5655 	"\t            .syscall    display a syscall id as a syscall name\n"
5656 	"\t            .log2       display log2 value rather than raw number\n"
5657 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5658 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5659 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5660 	"\t    trigger or to start a hist trigger but not log any events\n"
5661 	"\t    until told to do so.  'continue' can be used to start or\n"
5662 	"\t    restart a paused hist trigger.\n\n"
5663 	"\t    The 'clear' parameter will clear the contents of a running\n"
5664 	"\t    hist trigger and leave its current paused/active state\n"
5665 	"\t    unchanged.\n\n"
5666 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5667 	"\t    have one event conditionally start and stop another event's\n"
5668 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5669 	"\t    the enable_event and disable_event triggers.\n\n"
5670 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5671 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5672 	"\t        <handler>.<action>\n\n"
5673 	"\t    The available handlers are:\n\n"
5674 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5675 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5676 	"\t        onchange(var)            - invoke action if var changes\n\n"
5677 	"\t    The available actions are:\n\n"
5678 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5679 	"\t        save(field,...)                      - save current event fields\n"
5680 #ifdef CONFIG_TRACER_SNAPSHOT
5681 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5682 #endif
5683 #ifdef CONFIG_SYNTH_EVENTS
5684 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5685 	"\t  Write into this file to define/undefine new synthetic events.\n"
5686 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5687 #endif
5688 #endif
5689 ;
5690 
5691 static ssize_t
5692 tracing_readme_read(struct file *filp, char __user *ubuf,
5693 		       size_t cnt, loff_t *ppos)
5694 {
5695 	return simple_read_from_buffer(ubuf, cnt, ppos,
5696 					readme_msg, strlen(readme_msg));
5697 }
5698 
5699 static const struct file_operations tracing_readme_fops = {
5700 	.open		= tracing_open_generic,
5701 	.read		= tracing_readme_read,
5702 	.llseek		= generic_file_llseek,
5703 };
5704 
5705 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5706 {
5707 	int pid = ++(*pos);
5708 
5709 	return trace_find_tgid_ptr(pid);
5710 }
5711 
5712 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5713 {
5714 	int pid = *pos;
5715 
5716 	return trace_find_tgid_ptr(pid);
5717 }
5718 
5719 static void saved_tgids_stop(struct seq_file *m, void *v)
5720 {
5721 }
5722 
5723 static int saved_tgids_show(struct seq_file *m, void *v)
5724 {
5725 	int *entry = (int *)v;
5726 	int pid = entry - tgid_map;
5727 	int tgid = *entry;
5728 
5729 	if (tgid == 0)
5730 		return SEQ_SKIP;
5731 
5732 	seq_printf(m, "%d %d\n", pid, tgid);
5733 	return 0;
5734 }
5735 
5736 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5737 	.start		= saved_tgids_start,
5738 	.stop		= saved_tgids_stop,
5739 	.next		= saved_tgids_next,
5740 	.show		= saved_tgids_show,
5741 };
5742 
5743 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5744 {
5745 	int ret;
5746 
5747 	ret = tracing_check_open_get_tr(NULL);
5748 	if (ret)
5749 		return ret;
5750 
5751 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5752 }
5753 
5754 
5755 static const struct file_operations tracing_saved_tgids_fops = {
5756 	.open		= tracing_saved_tgids_open,
5757 	.read		= seq_read,
5758 	.llseek		= seq_lseek,
5759 	.release	= seq_release,
5760 };
5761 
5762 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5763 {
5764 	unsigned int *ptr = v;
5765 
5766 	if (*pos || m->count)
5767 		ptr++;
5768 
5769 	(*pos)++;
5770 
5771 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5772 	     ptr++) {
5773 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5774 			continue;
5775 
5776 		return ptr;
5777 	}
5778 
5779 	return NULL;
5780 }
5781 
5782 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5783 {
5784 	void *v;
5785 	loff_t l = 0;
5786 
5787 	preempt_disable();
5788 	arch_spin_lock(&trace_cmdline_lock);
5789 
5790 	v = &savedcmd->map_cmdline_to_pid[0];
5791 	while (l <= *pos) {
5792 		v = saved_cmdlines_next(m, v, &l);
5793 		if (!v)
5794 			return NULL;
5795 	}
5796 
5797 	return v;
5798 }
5799 
5800 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5801 {
5802 	arch_spin_unlock(&trace_cmdline_lock);
5803 	preempt_enable();
5804 }
5805 
5806 static int saved_cmdlines_show(struct seq_file *m, void *v)
5807 {
5808 	char buf[TASK_COMM_LEN];
5809 	unsigned int *pid = v;
5810 
5811 	__trace_find_cmdline(*pid, buf);
5812 	seq_printf(m, "%d %s\n", *pid, buf);
5813 	return 0;
5814 }
5815 
5816 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5817 	.start		= saved_cmdlines_start,
5818 	.next		= saved_cmdlines_next,
5819 	.stop		= saved_cmdlines_stop,
5820 	.show		= saved_cmdlines_show,
5821 };
5822 
5823 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5824 {
5825 	int ret;
5826 
5827 	ret = tracing_check_open_get_tr(NULL);
5828 	if (ret)
5829 		return ret;
5830 
5831 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5832 }
5833 
5834 static const struct file_operations tracing_saved_cmdlines_fops = {
5835 	.open		= tracing_saved_cmdlines_open,
5836 	.read		= seq_read,
5837 	.llseek		= seq_lseek,
5838 	.release	= seq_release,
5839 };
5840 
5841 static ssize_t
5842 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5843 				 size_t cnt, loff_t *ppos)
5844 {
5845 	char buf[64];
5846 	int r;
5847 
5848 	arch_spin_lock(&trace_cmdline_lock);
5849 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5850 	arch_spin_unlock(&trace_cmdline_lock);
5851 
5852 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5853 }
5854 
5855 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5856 {
5857 	kfree(s->saved_cmdlines);
5858 	kfree(s->map_cmdline_to_pid);
5859 	kfree(s);
5860 }
5861 
5862 static int tracing_resize_saved_cmdlines(unsigned int val)
5863 {
5864 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5865 
5866 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5867 	if (!s)
5868 		return -ENOMEM;
5869 
5870 	if (allocate_cmdlines_buffer(val, s) < 0) {
5871 		kfree(s);
5872 		return -ENOMEM;
5873 	}
5874 
5875 	arch_spin_lock(&trace_cmdline_lock);
5876 	savedcmd_temp = savedcmd;
5877 	savedcmd = s;
5878 	arch_spin_unlock(&trace_cmdline_lock);
5879 	free_saved_cmdlines_buffer(savedcmd_temp);
5880 
5881 	return 0;
5882 }
5883 
5884 static ssize_t
5885 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5886 				  size_t cnt, loff_t *ppos)
5887 {
5888 	unsigned long val;
5889 	int ret;
5890 
5891 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5892 	if (ret)
5893 		return ret;
5894 
5895 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5896 	if (!val || val > PID_MAX_DEFAULT)
5897 		return -EINVAL;
5898 
5899 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5900 	if (ret < 0)
5901 		return ret;
5902 
5903 	*ppos += cnt;
5904 
5905 	return cnt;
5906 }
5907 
5908 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5909 	.open		= tracing_open_generic,
5910 	.read		= tracing_saved_cmdlines_size_read,
5911 	.write		= tracing_saved_cmdlines_size_write,
5912 };
5913 
5914 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5915 static union trace_eval_map_item *
5916 update_eval_map(union trace_eval_map_item *ptr)
5917 {
5918 	if (!ptr->map.eval_string) {
5919 		if (ptr->tail.next) {
5920 			ptr = ptr->tail.next;
5921 			/* Set ptr to the next real item (skip head) */
5922 			ptr++;
5923 		} else
5924 			return NULL;
5925 	}
5926 	return ptr;
5927 }
5928 
5929 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5930 {
5931 	union trace_eval_map_item *ptr = v;
5932 
5933 	/*
5934 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5935 	 * This really should never happen.
5936 	 */
5937 	(*pos)++;
5938 	ptr = update_eval_map(ptr);
5939 	if (WARN_ON_ONCE(!ptr))
5940 		return NULL;
5941 
5942 	ptr++;
5943 	ptr = update_eval_map(ptr);
5944 
5945 	return ptr;
5946 }
5947 
5948 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5949 {
5950 	union trace_eval_map_item *v;
5951 	loff_t l = 0;
5952 
5953 	mutex_lock(&trace_eval_mutex);
5954 
5955 	v = trace_eval_maps;
5956 	if (v)
5957 		v++;
5958 
5959 	while (v && l < *pos) {
5960 		v = eval_map_next(m, v, &l);
5961 	}
5962 
5963 	return v;
5964 }
5965 
5966 static void eval_map_stop(struct seq_file *m, void *v)
5967 {
5968 	mutex_unlock(&trace_eval_mutex);
5969 }
5970 
5971 static int eval_map_show(struct seq_file *m, void *v)
5972 {
5973 	union trace_eval_map_item *ptr = v;
5974 
5975 	seq_printf(m, "%s %ld (%s)\n",
5976 		   ptr->map.eval_string, ptr->map.eval_value,
5977 		   ptr->map.system);
5978 
5979 	return 0;
5980 }
5981 
5982 static const struct seq_operations tracing_eval_map_seq_ops = {
5983 	.start		= eval_map_start,
5984 	.next		= eval_map_next,
5985 	.stop		= eval_map_stop,
5986 	.show		= eval_map_show,
5987 };
5988 
5989 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5990 {
5991 	int ret;
5992 
5993 	ret = tracing_check_open_get_tr(NULL);
5994 	if (ret)
5995 		return ret;
5996 
5997 	return seq_open(filp, &tracing_eval_map_seq_ops);
5998 }
5999 
6000 static const struct file_operations tracing_eval_map_fops = {
6001 	.open		= tracing_eval_map_open,
6002 	.read		= seq_read,
6003 	.llseek		= seq_lseek,
6004 	.release	= seq_release,
6005 };
6006 
6007 static inline union trace_eval_map_item *
6008 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6009 {
6010 	/* Return tail of array given the head */
6011 	return ptr + ptr->head.length + 1;
6012 }
6013 
6014 static void
6015 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6016 			   int len)
6017 {
6018 	struct trace_eval_map **stop;
6019 	struct trace_eval_map **map;
6020 	union trace_eval_map_item *map_array;
6021 	union trace_eval_map_item *ptr;
6022 
6023 	stop = start + len;
6024 
6025 	/*
6026 	 * The trace_eval_maps contains the map plus a head and tail item,
6027 	 * where the head holds the module and length of array, and the
6028 	 * tail holds a pointer to the next list.
6029 	 */
6030 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6031 	if (!map_array) {
6032 		pr_warn("Unable to allocate trace eval mapping\n");
6033 		return;
6034 	}
6035 
6036 	mutex_lock(&trace_eval_mutex);
6037 
6038 	if (!trace_eval_maps)
6039 		trace_eval_maps = map_array;
6040 	else {
6041 		ptr = trace_eval_maps;
6042 		for (;;) {
6043 			ptr = trace_eval_jmp_to_tail(ptr);
6044 			if (!ptr->tail.next)
6045 				break;
6046 			ptr = ptr->tail.next;
6047 
6048 		}
6049 		ptr->tail.next = map_array;
6050 	}
6051 	map_array->head.mod = mod;
6052 	map_array->head.length = len;
6053 	map_array++;
6054 
6055 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6056 		map_array->map = **map;
6057 		map_array++;
6058 	}
6059 	memset(map_array, 0, sizeof(*map_array));
6060 
6061 	mutex_unlock(&trace_eval_mutex);
6062 }
6063 
6064 static void trace_create_eval_file(struct dentry *d_tracer)
6065 {
6066 	trace_create_file("eval_map", 0444, d_tracer,
6067 			  NULL, &tracing_eval_map_fops);
6068 }
6069 
6070 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6071 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6072 static inline void trace_insert_eval_map_file(struct module *mod,
6073 			      struct trace_eval_map **start, int len) { }
6074 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6075 
6076 static void trace_insert_eval_map(struct module *mod,
6077 				  struct trace_eval_map **start, int len)
6078 {
6079 	struct trace_eval_map **map;
6080 
6081 	if (len <= 0)
6082 		return;
6083 
6084 	map = start;
6085 
6086 	trace_event_eval_update(map, len);
6087 
6088 	trace_insert_eval_map_file(mod, start, len);
6089 }
6090 
6091 static ssize_t
6092 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6093 		       size_t cnt, loff_t *ppos)
6094 {
6095 	struct trace_array *tr = filp->private_data;
6096 	char buf[MAX_TRACER_SIZE+2];
6097 	int r;
6098 
6099 	mutex_lock(&trace_types_lock);
6100 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6101 	mutex_unlock(&trace_types_lock);
6102 
6103 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6104 }
6105 
6106 int tracer_init(struct tracer *t, struct trace_array *tr)
6107 {
6108 	tracing_reset_online_cpus(&tr->array_buffer);
6109 	return t->init(tr);
6110 }
6111 
6112 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6113 {
6114 	int cpu;
6115 
6116 	for_each_tracing_cpu(cpu)
6117 		per_cpu_ptr(buf->data, cpu)->entries = val;
6118 }
6119 
6120 #ifdef CONFIG_TRACER_MAX_TRACE
6121 /* resize @tr's buffer to the size of @size_tr's entries */
6122 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6123 					struct array_buffer *size_buf, int cpu_id)
6124 {
6125 	int cpu, ret = 0;
6126 
6127 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6128 		for_each_tracing_cpu(cpu) {
6129 			ret = ring_buffer_resize(trace_buf->buffer,
6130 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6131 			if (ret < 0)
6132 				break;
6133 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6134 				per_cpu_ptr(size_buf->data, cpu)->entries;
6135 		}
6136 	} else {
6137 		ret = ring_buffer_resize(trace_buf->buffer,
6138 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6139 		if (ret == 0)
6140 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6141 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6142 	}
6143 
6144 	return ret;
6145 }
6146 #endif /* CONFIG_TRACER_MAX_TRACE */
6147 
6148 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6149 					unsigned long size, int cpu)
6150 {
6151 	int ret;
6152 
6153 	/*
6154 	 * If kernel or user changes the size of the ring buffer
6155 	 * we use the size that was given, and we can forget about
6156 	 * expanding it later.
6157 	 */
6158 	ring_buffer_expanded = true;
6159 
6160 	/* May be called before buffers are initialized */
6161 	if (!tr->array_buffer.buffer)
6162 		return 0;
6163 
6164 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6165 	if (ret < 0)
6166 		return ret;
6167 
6168 #ifdef CONFIG_TRACER_MAX_TRACE
6169 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6170 	    !tr->current_trace->use_max_tr)
6171 		goto out;
6172 
6173 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6174 	if (ret < 0) {
6175 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6176 						     &tr->array_buffer, cpu);
6177 		if (r < 0) {
6178 			/*
6179 			 * AARGH! We are left with different
6180 			 * size max buffer!!!!
6181 			 * The max buffer is our "snapshot" buffer.
6182 			 * When a tracer needs a snapshot (one of the
6183 			 * latency tracers), it swaps the max buffer
6184 			 * with the saved snap shot. We succeeded to
6185 			 * update the size of the main buffer, but failed to
6186 			 * update the size of the max buffer. But when we tried
6187 			 * to reset the main buffer to the original size, we
6188 			 * failed there too. This is very unlikely to
6189 			 * happen, but if it does, warn and kill all
6190 			 * tracing.
6191 			 */
6192 			WARN_ON(1);
6193 			tracing_disabled = 1;
6194 		}
6195 		return ret;
6196 	}
6197 
6198 	if (cpu == RING_BUFFER_ALL_CPUS)
6199 		set_buffer_entries(&tr->max_buffer, size);
6200 	else
6201 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6202 
6203  out:
6204 #endif /* CONFIG_TRACER_MAX_TRACE */
6205 
6206 	if (cpu == RING_BUFFER_ALL_CPUS)
6207 		set_buffer_entries(&tr->array_buffer, size);
6208 	else
6209 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6210 
6211 	return ret;
6212 }
6213 
6214 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6215 				  unsigned long size, int cpu_id)
6216 {
6217 	int ret;
6218 
6219 	mutex_lock(&trace_types_lock);
6220 
6221 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6222 		/* make sure, this cpu is enabled in the mask */
6223 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6224 			ret = -EINVAL;
6225 			goto out;
6226 		}
6227 	}
6228 
6229 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6230 	if (ret < 0)
6231 		ret = -ENOMEM;
6232 
6233 out:
6234 	mutex_unlock(&trace_types_lock);
6235 
6236 	return ret;
6237 }
6238 
6239 
6240 /**
6241  * tracing_update_buffers - used by tracing facility to expand ring buffers
6242  *
6243  * To save on memory when the tracing is never used on a system with it
6244  * configured in. The ring buffers are set to a minimum size. But once
6245  * a user starts to use the tracing facility, then they need to grow
6246  * to their default size.
6247  *
6248  * This function is to be called when a tracer is about to be used.
6249  */
6250 int tracing_update_buffers(void)
6251 {
6252 	int ret = 0;
6253 
6254 	mutex_lock(&trace_types_lock);
6255 	if (!ring_buffer_expanded)
6256 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6257 						RING_BUFFER_ALL_CPUS);
6258 	mutex_unlock(&trace_types_lock);
6259 
6260 	return ret;
6261 }
6262 
6263 struct trace_option_dentry;
6264 
6265 static void
6266 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6267 
6268 /*
6269  * Used to clear out the tracer before deletion of an instance.
6270  * Must have trace_types_lock held.
6271  */
6272 static void tracing_set_nop(struct trace_array *tr)
6273 {
6274 	if (tr->current_trace == &nop_trace)
6275 		return;
6276 
6277 	tr->current_trace->enabled--;
6278 
6279 	if (tr->current_trace->reset)
6280 		tr->current_trace->reset(tr);
6281 
6282 	tr->current_trace = &nop_trace;
6283 }
6284 
6285 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6286 {
6287 	/* Only enable if the directory has been created already. */
6288 	if (!tr->dir)
6289 		return;
6290 
6291 	create_trace_option_files(tr, t);
6292 }
6293 
6294 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6295 {
6296 	struct tracer *t;
6297 #ifdef CONFIG_TRACER_MAX_TRACE
6298 	bool had_max_tr;
6299 #endif
6300 	int ret = 0;
6301 
6302 	mutex_lock(&trace_types_lock);
6303 
6304 	if (!ring_buffer_expanded) {
6305 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6306 						RING_BUFFER_ALL_CPUS);
6307 		if (ret < 0)
6308 			goto out;
6309 		ret = 0;
6310 	}
6311 
6312 	for (t = trace_types; t; t = t->next) {
6313 		if (strcmp(t->name, buf) == 0)
6314 			break;
6315 	}
6316 	if (!t) {
6317 		ret = -EINVAL;
6318 		goto out;
6319 	}
6320 	if (t == tr->current_trace)
6321 		goto out;
6322 
6323 #ifdef CONFIG_TRACER_SNAPSHOT
6324 	if (t->use_max_tr) {
6325 		arch_spin_lock(&tr->max_lock);
6326 		if (tr->cond_snapshot)
6327 			ret = -EBUSY;
6328 		arch_spin_unlock(&tr->max_lock);
6329 		if (ret)
6330 			goto out;
6331 	}
6332 #endif
6333 	/* Some tracers won't work on kernel command line */
6334 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6335 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6336 			t->name);
6337 		goto out;
6338 	}
6339 
6340 	/* Some tracers are only allowed for the top level buffer */
6341 	if (!trace_ok_for_array(t, tr)) {
6342 		ret = -EINVAL;
6343 		goto out;
6344 	}
6345 
6346 	/* If trace pipe files are being read, we can't change the tracer */
6347 	if (tr->trace_ref) {
6348 		ret = -EBUSY;
6349 		goto out;
6350 	}
6351 
6352 	trace_branch_disable();
6353 
6354 	tr->current_trace->enabled--;
6355 
6356 	if (tr->current_trace->reset)
6357 		tr->current_trace->reset(tr);
6358 
6359 	/* Current trace needs to be nop_trace before synchronize_rcu */
6360 	tr->current_trace = &nop_trace;
6361 
6362 #ifdef CONFIG_TRACER_MAX_TRACE
6363 	had_max_tr = tr->allocated_snapshot;
6364 
6365 	if (had_max_tr && !t->use_max_tr) {
6366 		/*
6367 		 * We need to make sure that the update_max_tr sees that
6368 		 * current_trace changed to nop_trace to keep it from
6369 		 * swapping the buffers after we resize it.
6370 		 * The update_max_tr is called from interrupts disabled
6371 		 * so a synchronized_sched() is sufficient.
6372 		 */
6373 		synchronize_rcu();
6374 		free_snapshot(tr);
6375 	}
6376 #endif
6377 
6378 #ifdef CONFIG_TRACER_MAX_TRACE
6379 	if (t->use_max_tr && !had_max_tr) {
6380 		ret = tracing_alloc_snapshot_instance(tr);
6381 		if (ret < 0)
6382 			goto out;
6383 	}
6384 #endif
6385 
6386 	if (t->init) {
6387 		ret = tracer_init(t, tr);
6388 		if (ret)
6389 			goto out;
6390 	}
6391 
6392 	tr->current_trace = t;
6393 	tr->current_trace->enabled++;
6394 	trace_branch_enable(tr);
6395  out:
6396 	mutex_unlock(&trace_types_lock);
6397 
6398 	return ret;
6399 }
6400 
6401 static ssize_t
6402 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6403 			size_t cnt, loff_t *ppos)
6404 {
6405 	struct trace_array *tr = filp->private_data;
6406 	char buf[MAX_TRACER_SIZE+1];
6407 	int i;
6408 	size_t ret;
6409 	int err;
6410 
6411 	ret = cnt;
6412 
6413 	if (cnt > MAX_TRACER_SIZE)
6414 		cnt = MAX_TRACER_SIZE;
6415 
6416 	if (copy_from_user(buf, ubuf, cnt))
6417 		return -EFAULT;
6418 
6419 	buf[cnt] = 0;
6420 
6421 	/* strip ending whitespace. */
6422 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6423 		buf[i] = 0;
6424 
6425 	err = tracing_set_tracer(tr, buf);
6426 	if (err)
6427 		return err;
6428 
6429 	*ppos += ret;
6430 
6431 	return ret;
6432 }
6433 
6434 static ssize_t
6435 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6436 		   size_t cnt, loff_t *ppos)
6437 {
6438 	char buf[64];
6439 	int r;
6440 
6441 	r = snprintf(buf, sizeof(buf), "%ld\n",
6442 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6443 	if (r > sizeof(buf))
6444 		r = sizeof(buf);
6445 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6446 }
6447 
6448 static ssize_t
6449 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6450 		    size_t cnt, loff_t *ppos)
6451 {
6452 	unsigned long val;
6453 	int ret;
6454 
6455 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6456 	if (ret)
6457 		return ret;
6458 
6459 	*ptr = val * 1000;
6460 
6461 	return cnt;
6462 }
6463 
6464 static ssize_t
6465 tracing_thresh_read(struct file *filp, char __user *ubuf,
6466 		    size_t cnt, loff_t *ppos)
6467 {
6468 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6469 }
6470 
6471 static ssize_t
6472 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6473 		     size_t cnt, loff_t *ppos)
6474 {
6475 	struct trace_array *tr = filp->private_data;
6476 	int ret;
6477 
6478 	mutex_lock(&trace_types_lock);
6479 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6480 	if (ret < 0)
6481 		goto out;
6482 
6483 	if (tr->current_trace->update_thresh) {
6484 		ret = tr->current_trace->update_thresh(tr);
6485 		if (ret < 0)
6486 			goto out;
6487 	}
6488 
6489 	ret = cnt;
6490 out:
6491 	mutex_unlock(&trace_types_lock);
6492 
6493 	return ret;
6494 }
6495 
6496 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6497 
6498 static ssize_t
6499 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6500 		     size_t cnt, loff_t *ppos)
6501 {
6502 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6503 }
6504 
6505 static ssize_t
6506 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6507 		      size_t cnt, loff_t *ppos)
6508 {
6509 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6510 }
6511 
6512 #endif
6513 
6514 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6515 {
6516 	struct trace_array *tr = inode->i_private;
6517 	struct trace_iterator *iter;
6518 	int ret;
6519 
6520 	ret = tracing_check_open_get_tr(tr);
6521 	if (ret)
6522 		return ret;
6523 
6524 	mutex_lock(&trace_types_lock);
6525 
6526 	/* create a buffer to store the information to pass to userspace */
6527 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6528 	if (!iter) {
6529 		ret = -ENOMEM;
6530 		__trace_array_put(tr);
6531 		goto out;
6532 	}
6533 
6534 	trace_seq_init(&iter->seq);
6535 	iter->trace = tr->current_trace;
6536 
6537 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6538 		ret = -ENOMEM;
6539 		goto fail;
6540 	}
6541 
6542 	/* trace pipe does not show start of buffer */
6543 	cpumask_setall(iter->started);
6544 
6545 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6546 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6547 
6548 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6549 	if (trace_clocks[tr->clock_id].in_ns)
6550 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6551 
6552 	iter->tr = tr;
6553 	iter->array_buffer = &tr->array_buffer;
6554 	iter->cpu_file = tracing_get_cpu(inode);
6555 	mutex_init(&iter->mutex);
6556 	filp->private_data = iter;
6557 
6558 	if (iter->trace->pipe_open)
6559 		iter->trace->pipe_open(iter);
6560 
6561 	nonseekable_open(inode, filp);
6562 
6563 	tr->trace_ref++;
6564 out:
6565 	mutex_unlock(&trace_types_lock);
6566 	return ret;
6567 
6568 fail:
6569 	kfree(iter);
6570 	__trace_array_put(tr);
6571 	mutex_unlock(&trace_types_lock);
6572 	return ret;
6573 }
6574 
6575 static int tracing_release_pipe(struct inode *inode, struct file *file)
6576 {
6577 	struct trace_iterator *iter = file->private_data;
6578 	struct trace_array *tr = inode->i_private;
6579 
6580 	mutex_lock(&trace_types_lock);
6581 
6582 	tr->trace_ref--;
6583 
6584 	if (iter->trace->pipe_close)
6585 		iter->trace->pipe_close(iter);
6586 
6587 	mutex_unlock(&trace_types_lock);
6588 
6589 	free_cpumask_var(iter->started);
6590 	mutex_destroy(&iter->mutex);
6591 	kfree(iter);
6592 
6593 	trace_array_put(tr);
6594 
6595 	return 0;
6596 }
6597 
6598 static __poll_t
6599 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6600 {
6601 	struct trace_array *tr = iter->tr;
6602 
6603 	/* Iterators are static, they should be filled or empty */
6604 	if (trace_buffer_iter(iter, iter->cpu_file))
6605 		return EPOLLIN | EPOLLRDNORM;
6606 
6607 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6608 		/*
6609 		 * Always select as readable when in blocking mode
6610 		 */
6611 		return EPOLLIN | EPOLLRDNORM;
6612 	else
6613 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6614 					     filp, poll_table);
6615 }
6616 
6617 static __poll_t
6618 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6619 {
6620 	struct trace_iterator *iter = filp->private_data;
6621 
6622 	return trace_poll(iter, filp, poll_table);
6623 }
6624 
6625 /* Must be called with iter->mutex held. */
6626 static int tracing_wait_pipe(struct file *filp)
6627 {
6628 	struct trace_iterator *iter = filp->private_data;
6629 	int ret;
6630 
6631 	while (trace_empty(iter)) {
6632 
6633 		if ((filp->f_flags & O_NONBLOCK)) {
6634 			return -EAGAIN;
6635 		}
6636 
6637 		/*
6638 		 * We block until we read something and tracing is disabled.
6639 		 * We still block if tracing is disabled, but we have never
6640 		 * read anything. This allows a user to cat this file, and
6641 		 * then enable tracing. But after we have read something,
6642 		 * we give an EOF when tracing is again disabled.
6643 		 *
6644 		 * iter->pos will be 0 if we haven't read anything.
6645 		 */
6646 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6647 			break;
6648 
6649 		mutex_unlock(&iter->mutex);
6650 
6651 		ret = wait_on_pipe(iter, 0);
6652 
6653 		mutex_lock(&iter->mutex);
6654 
6655 		if (ret)
6656 			return ret;
6657 	}
6658 
6659 	return 1;
6660 }
6661 
6662 /*
6663  * Consumer reader.
6664  */
6665 static ssize_t
6666 tracing_read_pipe(struct file *filp, char __user *ubuf,
6667 		  size_t cnt, loff_t *ppos)
6668 {
6669 	struct trace_iterator *iter = filp->private_data;
6670 	ssize_t sret;
6671 
6672 	/*
6673 	 * Avoid more than one consumer on a single file descriptor
6674 	 * This is just a matter of traces coherency, the ring buffer itself
6675 	 * is protected.
6676 	 */
6677 	mutex_lock(&iter->mutex);
6678 
6679 	/* return any leftover data */
6680 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6681 	if (sret != -EBUSY)
6682 		goto out;
6683 
6684 	trace_seq_init(&iter->seq);
6685 
6686 	if (iter->trace->read) {
6687 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6688 		if (sret)
6689 			goto out;
6690 	}
6691 
6692 waitagain:
6693 	sret = tracing_wait_pipe(filp);
6694 	if (sret <= 0)
6695 		goto out;
6696 
6697 	/* stop when tracing is finished */
6698 	if (trace_empty(iter)) {
6699 		sret = 0;
6700 		goto out;
6701 	}
6702 
6703 	if (cnt >= PAGE_SIZE)
6704 		cnt = PAGE_SIZE - 1;
6705 
6706 	/* reset all but tr, trace, and overruns */
6707 	memset(&iter->seq, 0,
6708 	       sizeof(struct trace_iterator) -
6709 	       offsetof(struct trace_iterator, seq));
6710 	cpumask_clear(iter->started);
6711 	trace_seq_init(&iter->seq);
6712 	iter->pos = -1;
6713 
6714 	trace_event_read_lock();
6715 	trace_access_lock(iter->cpu_file);
6716 	while (trace_find_next_entry_inc(iter) != NULL) {
6717 		enum print_line_t ret;
6718 		int save_len = iter->seq.seq.len;
6719 
6720 		ret = print_trace_line(iter);
6721 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6722 			/* don't print partial lines */
6723 			iter->seq.seq.len = save_len;
6724 			break;
6725 		}
6726 		if (ret != TRACE_TYPE_NO_CONSUME)
6727 			trace_consume(iter);
6728 
6729 		if (trace_seq_used(&iter->seq) >= cnt)
6730 			break;
6731 
6732 		/*
6733 		 * Setting the full flag means we reached the trace_seq buffer
6734 		 * size and we should leave by partial output condition above.
6735 		 * One of the trace_seq_* functions is not used properly.
6736 		 */
6737 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6738 			  iter->ent->type);
6739 	}
6740 	trace_access_unlock(iter->cpu_file);
6741 	trace_event_read_unlock();
6742 
6743 	/* Now copy what we have to the user */
6744 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6745 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6746 		trace_seq_init(&iter->seq);
6747 
6748 	/*
6749 	 * If there was nothing to send to user, in spite of consuming trace
6750 	 * entries, go back to wait for more entries.
6751 	 */
6752 	if (sret == -EBUSY)
6753 		goto waitagain;
6754 
6755 out:
6756 	mutex_unlock(&iter->mutex);
6757 
6758 	return sret;
6759 }
6760 
6761 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6762 				     unsigned int idx)
6763 {
6764 	__free_page(spd->pages[idx]);
6765 }
6766 
6767 static size_t
6768 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6769 {
6770 	size_t count;
6771 	int save_len;
6772 	int ret;
6773 
6774 	/* Seq buffer is page-sized, exactly what we need. */
6775 	for (;;) {
6776 		save_len = iter->seq.seq.len;
6777 		ret = print_trace_line(iter);
6778 
6779 		if (trace_seq_has_overflowed(&iter->seq)) {
6780 			iter->seq.seq.len = save_len;
6781 			break;
6782 		}
6783 
6784 		/*
6785 		 * This should not be hit, because it should only
6786 		 * be set if the iter->seq overflowed. But check it
6787 		 * anyway to be safe.
6788 		 */
6789 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6790 			iter->seq.seq.len = save_len;
6791 			break;
6792 		}
6793 
6794 		count = trace_seq_used(&iter->seq) - save_len;
6795 		if (rem < count) {
6796 			rem = 0;
6797 			iter->seq.seq.len = save_len;
6798 			break;
6799 		}
6800 
6801 		if (ret != TRACE_TYPE_NO_CONSUME)
6802 			trace_consume(iter);
6803 		rem -= count;
6804 		if (!trace_find_next_entry_inc(iter))	{
6805 			rem = 0;
6806 			iter->ent = NULL;
6807 			break;
6808 		}
6809 	}
6810 
6811 	return rem;
6812 }
6813 
6814 static ssize_t tracing_splice_read_pipe(struct file *filp,
6815 					loff_t *ppos,
6816 					struct pipe_inode_info *pipe,
6817 					size_t len,
6818 					unsigned int flags)
6819 {
6820 	struct page *pages_def[PIPE_DEF_BUFFERS];
6821 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6822 	struct trace_iterator *iter = filp->private_data;
6823 	struct splice_pipe_desc spd = {
6824 		.pages		= pages_def,
6825 		.partial	= partial_def,
6826 		.nr_pages	= 0, /* This gets updated below. */
6827 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6828 		.ops		= &default_pipe_buf_ops,
6829 		.spd_release	= tracing_spd_release_pipe,
6830 	};
6831 	ssize_t ret;
6832 	size_t rem;
6833 	unsigned int i;
6834 
6835 	if (splice_grow_spd(pipe, &spd))
6836 		return -ENOMEM;
6837 
6838 	mutex_lock(&iter->mutex);
6839 
6840 	if (iter->trace->splice_read) {
6841 		ret = iter->trace->splice_read(iter, filp,
6842 					       ppos, pipe, len, flags);
6843 		if (ret)
6844 			goto out_err;
6845 	}
6846 
6847 	ret = tracing_wait_pipe(filp);
6848 	if (ret <= 0)
6849 		goto out_err;
6850 
6851 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6852 		ret = -EFAULT;
6853 		goto out_err;
6854 	}
6855 
6856 	trace_event_read_lock();
6857 	trace_access_lock(iter->cpu_file);
6858 
6859 	/* Fill as many pages as possible. */
6860 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6861 		spd.pages[i] = alloc_page(GFP_KERNEL);
6862 		if (!spd.pages[i])
6863 			break;
6864 
6865 		rem = tracing_fill_pipe_page(rem, iter);
6866 
6867 		/* Copy the data into the page, so we can start over. */
6868 		ret = trace_seq_to_buffer(&iter->seq,
6869 					  page_address(spd.pages[i]),
6870 					  trace_seq_used(&iter->seq));
6871 		if (ret < 0) {
6872 			__free_page(spd.pages[i]);
6873 			break;
6874 		}
6875 		spd.partial[i].offset = 0;
6876 		spd.partial[i].len = trace_seq_used(&iter->seq);
6877 
6878 		trace_seq_init(&iter->seq);
6879 	}
6880 
6881 	trace_access_unlock(iter->cpu_file);
6882 	trace_event_read_unlock();
6883 	mutex_unlock(&iter->mutex);
6884 
6885 	spd.nr_pages = i;
6886 
6887 	if (i)
6888 		ret = splice_to_pipe(pipe, &spd);
6889 	else
6890 		ret = 0;
6891 out:
6892 	splice_shrink_spd(&spd);
6893 	return ret;
6894 
6895 out_err:
6896 	mutex_unlock(&iter->mutex);
6897 	goto out;
6898 }
6899 
6900 static ssize_t
6901 tracing_entries_read(struct file *filp, char __user *ubuf,
6902 		     size_t cnt, loff_t *ppos)
6903 {
6904 	struct inode *inode = file_inode(filp);
6905 	struct trace_array *tr = inode->i_private;
6906 	int cpu = tracing_get_cpu(inode);
6907 	char buf[64];
6908 	int r = 0;
6909 	ssize_t ret;
6910 
6911 	mutex_lock(&trace_types_lock);
6912 
6913 	if (cpu == RING_BUFFER_ALL_CPUS) {
6914 		int cpu, buf_size_same;
6915 		unsigned long size;
6916 
6917 		size = 0;
6918 		buf_size_same = 1;
6919 		/* check if all cpu sizes are same */
6920 		for_each_tracing_cpu(cpu) {
6921 			/* fill in the size from first enabled cpu */
6922 			if (size == 0)
6923 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6924 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6925 				buf_size_same = 0;
6926 				break;
6927 			}
6928 		}
6929 
6930 		if (buf_size_same) {
6931 			if (!ring_buffer_expanded)
6932 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6933 					    size >> 10,
6934 					    trace_buf_size >> 10);
6935 			else
6936 				r = sprintf(buf, "%lu\n", size >> 10);
6937 		} else
6938 			r = sprintf(buf, "X\n");
6939 	} else
6940 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6941 
6942 	mutex_unlock(&trace_types_lock);
6943 
6944 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6945 	return ret;
6946 }
6947 
6948 static ssize_t
6949 tracing_entries_write(struct file *filp, const char __user *ubuf,
6950 		      size_t cnt, loff_t *ppos)
6951 {
6952 	struct inode *inode = file_inode(filp);
6953 	struct trace_array *tr = inode->i_private;
6954 	unsigned long val;
6955 	int ret;
6956 
6957 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6958 	if (ret)
6959 		return ret;
6960 
6961 	/* must have at least 1 entry */
6962 	if (!val)
6963 		return -EINVAL;
6964 
6965 	/* value is in KB */
6966 	val <<= 10;
6967 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6968 	if (ret < 0)
6969 		return ret;
6970 
6971 	*ppos += cnt;
6972 
6973 	return cnt;
6974 }
6975 
6976 static ssize_t
6977 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6978 				size_t cnt, loff_t *ppos)
6979 {
6980 	struct trace_array *tr = filp->private_data;
6981 	char buf[64];
6982 	int r, cpu;
6983 	unsigned long size = 0, expanded_size = 0;
6984 
6985 	mutex_lock(&trace_types_lock);
6986 	for_each_tracing_cpu(cpu) {
6987 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6988 		if (!ring_buffer_expanded)
6989 			expanded_size += trace_buf_size >> 10;
6990 	}
6991 	if (ring_buffer_expanded)
6992 		r = sprintf(buf, "%lu\n", size);
6993 	else
6994 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6995 	mutex_unlock(&trace_types_lock);
6996 
6997 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6998 }
6999 
7000 static ssize_t
7001 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7002 			  size_t cnt, loff_t *ppos)
7003 {
7004 	/*
7005 	 * There is no need to read what the user has written, this function
7006 	 * is just to make sure that there is no error when "echo" is used
7007 	 */
7008 
7009 	*ppos += cnt;
7010 
7011 	return cnt;
7012 }
7013 
7014 static int
7015 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7016 {
7017 	struct trace_array *tr = inode->i_private;
7018 
7019 	/* disable tracing ? */
7020 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7021 		tracer_tracing_off(tr);
7022 	/* resize the ring buffer to 0 */
7023 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7024 
7025 	trace_array_put(tr);
7026 
7027 	return 0;
7028 }
7029 
7030 static ssize_t
7031 tracing_mark_write(struct file *filp, const char __user *ubuf,
7032 					size_t cnt, loff_t *fpos)
7033 {
7034 	struct trace_array *tr = filp->private_data;
7035 	struct ring_buffer_event *event;
7036 	enum event_trigger_type tt = ETT_NONE;
7037 	struct trace_buffer *buffer;
7038 	struct print_entry *entry;
7039 	ssize_t written;
7040 	int size;
7041 	int len;
7042 
7043 /* Used in tracing_mark_raw_write() as well */
7044 #define FAULTED_STR "<faulted>"
7045 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7046 
7047 	if (tracing_disabled)
7048 		return -EINVAL;
7049 
7050 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7051 		return -EINVAL;
7052 
7053 	if (cnt > TRACE_BUF_SIZE)
7054 		cnt = TRACE_BUF_SIZE;
7055 
7056 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7057 
7058 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7059 
7060 	/* If less than "<faulted>", then make sure we can still add that */
7061 	if (cnt < FAULTED_SIZE)
7062 		size += FAULTED_SIZE - cnt;
7063 
7064 	buffer = tr->array_buffer.buffer;
7065 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7066 					    tracing_gen_ctx());
7067 	if (unlikely(!event))
7068 		/* Ring buffer disabled, return as if not open for write */
7069 		return -EBADF;
7070 
7071 	entry = ring_buffer_event_data(event);
7072 	entry->ip = _THIS_IP_;
7073 
7074 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7075 	if (len) {
7076 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7077 		cnt = FAULTED_SIZE;
7078 		written = -EFAULT;
7079 	} else
7080 		written = cnt;
7081 
7082 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7083 		/* do not add \n before testing triggers, but add \0 */
7084 		entry->buf[cnt] = '\0';
7085 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7086 	}
7087 
7088 	if (entry->buf[cnt - 1] != '\n') {
7089 		entry->buf[cnt] = '\n';
7090 		entry->buf[cnt + 1] = '\0';
7091 	} else
7092 		entry->buf[cnt] = '\0';
7093 
7094 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7095 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7096 	__buffer_unlock_commit(buffer, event);
7097 
7098 	if (tt)
7099 		event_triggers_post_call(tr->trace_marker_file, tt);
7100 
7101 	if (written > 0)
7102 		*fpos += written;
7103 
7104 	return written;
7105 }
7106 
7107 /* Limit it for now to 3K (including tag) */
7108 #define RAW_DATA_MAX_SIZE (1024*3)
7109 
7110 static ssize_t
7111 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7112 					size_t cnt, loff_t *fpos)
7113 {
7114 	struct trace_array *tr = filp->private_data;
7115 	struct ring_buffer_event *event;
7116 	struct trace_buffer *buffer;
7117 	struct raw_data_entry *entry;
7118 	ssize_t written;
7119 	int size;
7120 	int len;
7121 
7122 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7123 
7124 	if (tracing_disabled)
7125 		return -EINVAL;
7126 
7127 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7128 		return -EINVAL;
7129 
7130 	/* The marker must at least have a tag id */
7131 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7132 		return -EINVAL;
7133 
7134 	if (cnt > TRACE_BUF_SIZE)
7135 		cnt = TRACE_BUF_SIZE;
7136 
7137 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7138 
7139 	size = sizeof(*entry) + cnt;
7140 	if (cnt < FAULT_SIZE_ID)
7141 		size += FAULT_SIZE_ID - cnt;
7142 
7143 	buffer = tr->array_buffer.buffer;
7144 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7145 					    tracing_gen_ctx());
7146 	if (!event)
7147 		/* Ring buffer disabled, return as if not open for write */
7148 		return -EBADF;
7149 
7150 	entry = ring_buffer_event_data(event);
7151 
7152 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7153 	if (len) {
7154 		entry->id = -1;
7155 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7156 		written = -EFAULT;
7157 	} else
7158 		written = cnt;
7159 
7160 	__buffer_unlock_commit(buffer, event);
7161 
7162 	if (written > 0)
7163 		*fpos += written;
7164 
7165 	return written;
7166 }
7167 
7168 static int tracing_clock_show(struct seq_file *m, void *v)
7169 {
7170 	struct trace_array *tr = m->private;
7171 	int i;
7172 
7173 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7174 		seq_printf(m,
7175 			"%s%s%s%s", i ? " " : "",
7176 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7177 			i == tr->clock_id ? "]" : "");
7178 	seq_putc(m, '\n');
7179 
7180 	return 0;
7181 }
7182 
7183 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7184 {
7185 	int i;
7186 
7187 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7188 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7189 			break;
7190 	}
7191 	if (i == ARRAY_SIZE(trace_clocks))
7192 		return -EINVAL;
7193 
7194 	mutex_lock(&trace_types_lock);
7195 
7196 	tr->clock_id = i;
7197 
7198 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7199 
7200 	/*
7201 	 * New clock may not be consistent with the previous clock.
7202 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7203 	 */
7204 	tracing_reset_online_cpus(&tr->array_buffer);
7205 
7206 #ifdef CONFIG_TRACER_MAX_TRACE
7207 	if (tr->max_buffer.buffer)
7208 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7209 	tracing_reset_online_cpus(&tr->max_buffer);
7210 #endif
7211 
7212 	mutex_unlock(&trace_types_lock);
7213 
7214 	return 0;
7215 }
7216 
7217 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7218 				   size_t cnt, loff_t *fpos)
7219 {
7220 	struct seq_file *m = filp->private_data;
7221 	struct trace_array *tr = m->private;
7222 	char buf[64];
7223 	const char *clockstr;
7224 	int ret;
7225 
7226 	if (cnt >= sizeof(buf))
7227 		return -EINVAL;
7228 
7229 	if (copy_from_user(buf, ubuf, cnt))
7230 		return -EFAULT;
7231 
7232 	buf[cnt] = 0;
7233 
7234 	clockstr = strstrip(buf);
7235 
7236 	ret = tracing_set_clock(tr, clockstr);
7237 	if (ret)
7238 		return ret;
7239 
7240 	*fpos += cnt;
7241 
7242 	return cnt;
7243 }
7244 
7245 static int tracing_clock_open(struct inode *inode, struct file *file)
7246 {
7247 	struct trace_array *tr = inode->i_private;
7248 	int ret;
7249 
7250 	ret = tracing_check_open_get_tr(tr);
7251 	if (ret)
7252 		return ret;
7253 
7254 	ret = single_open(file, tracing_clock_show, inode->i_private);
7255 	if (ret < 0)
7256 		trace_array_put(tr);
7257 
7258 	return ret;
7259 }
7260 
7261 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7262 {
7263 	struct trace_array *tr = m->private;
7264 
7265 	mutex_lock(&trace_types_lock);
7266 
7267 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7268 		seq_puts(m, "delta [absolute]\n");
7269 	else
7270 		seq_puts(m, "[delta] absolute\n");
7271 
7272 	mutex_unlock(&trace_types_lock);
7273 
7274 	return 0;
7275 }
7276 
7277 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7278 {
7279 	struct trace_array *tr = inode->i_private;
7280 	int ret;
7281 
7282 	ret = tracing_check_open_get_tr(tr);
7283 	if (ret)
7284 		return ret;
7285 
7286 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7287 	if (ret < 0)
7288 		trace_array_put(tr);
7289 
7290 	return ret;
7291 }
7292 
7293 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7294 {
7295 	if (rbe == this_cpu_read(trace_buffered_event))
7296 		return ring_buffer_time_stamp(buffer);
7297 
7298 	return ring_buffer_event_time_stamp(buffer, rbe);
7299 }
7300 
7301 /*
7302  * Set or disable using the per CPU trace_buffer_event when possible.
7303  */
7304 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7305 {
7306 	int ret = 0;
7307 
7308 	mutex_lock(&trace_types_lock);
7309 
7310 	if (set && tr->no_filter_buffering_ref++)
7311 		goto out;
7312 
7313 	if (!set) {
7314 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7315 			ret = -EINVAL;
7316 			goto out;
7317 		}
7318 
7319 		--tr->no_filter_buffering_ref;
7320 	}
7321  out:
7322 	mutex_unlock(&trace_types_lock);
7323 
7324 	return ret;
7325 }
7326 
7327 struct ftrace_buffer_info {
7328 	struct trace_iterator	iter;
7329 	void			*spare;
7330 	unsigned int		spare_cpu;
7331 	unsigned int		read;
7332 };
7333 
7334 #ifdef CONFIG_TRACER_SNAPSHOT
7335 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7336 {
7337 	struct trace_array *tr = inode->i_private;
7338 	struct trace_iterator *iter;
7339 	struct seq_file *m;
7340 	int ret;
7341 
7342 	ret = tracing_check_open_get_tr(tr);
7343 	if (ret)
7344 		return ret;
7345 
7346 	if (file->f_mode & FMODE_READ) {
7347 		iter = __tracing_open(inode, file, true);
7348 		if (IS_ERR(iter))
7349 			ret = PTR_ERR(iter);
7350 	} else {
7351 		/* Writes still need the seq_file to hold the private data */
7352 		ret = -ENOMEM;
7353 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7354 		if (!m)
7355 			goto out;
7356 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7357 		if (!iter) {
7358 			kfree(m);
7359 			goto out;
7360 		}
7361 		ret = 0;
7362 
7363 		iter->tr = tr;
7364 		iter->array_buffer = &tr->max_buffer;
7365 		iter->cpu_file = tracing_get_cpu(inode);
7366 		m->private = iter;
7367 		file->private_data = m;
7368 	}
7369 out:
7370 	if (ret < 0)
7371 		trace_array_put(tr);
7372 
7373 	return ret;
7374 }
7375 
7376 static ssize_t
7377 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7378 		       loff_t *ppos)
7379 {
7380 	struct seq_file *m = filp->private_data;
7381 	struct trace_iterator *iter = m->private;
7382 	struct trace_array *tr = iter->tr;
7383 	unsigned long val;
7384 	int ret;
7385 
7386 	ret = tracing_update_buffers();
7387 	if (ret < 0)
7388 		return ret;
7389 
7390 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7391 	if (ret)
7392 		return ret;
7393 
7394 	mutex_lock(&trace_types_lock);
7395 
7396 	if (tr->current_trace->use_max_tr) {
7397 		ret = -EBUSY;
7398 		goto out;
7399 	}
7400 
7401 	arch_spin_lock(&tr->max_lock);
7402 	if (tr->cond_snapshot)
7403 		ret = -EBUSY;
7404 	arch_spin_unlock(&tr->max_lock);
7405 	if (ret)
7406 		goto out;
7407 
7408 	switch (val) {
7409 	case 0:
7410 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7411 			ret = -EINVAL;
7412 			break;
7413 		}
7414 		if (tr->allocated_snapshot)
7415 			free_snapshot(tr);
7416 		break;
7417 	case 1:
7418 /* Only allow per-cpu swap if the ring buffer supports it */
7419 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7420 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7421 			ret = -EINVAL;
7422 			break;
7423 		}
7424 #endif
7425 		if (tr->allocated_snapshot)
7426 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7427 					&tr->array_buffer, iter->cpu_file);
7428 		else
7429 			ret = tracing_alloc_snapshot_instance(tr);
7430 		if (ret < 0)
7431 			break;
7432 		local_irq_disable();
7433 		/* Now, we're going to swap */
7434 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7435 			update_max_tr(tr, current, smp_processor_id(), NULL);
7436 		else
7437 			update_max_tr_single(tr, current, iter->cpu_file);
7438 		local_irq_enable();
7439 		break;
7440 	default:
7441 		if (tr->allocated_snapshot) {
7442 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7443 				tracing_reset_online_cpus(&tr->max_buffer);
7444 			else
7445 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7446 		}
7447 		break;
7448 	}
7449 
7450 	if (ret >= 0) {
7451 		*ppos += cnt;
7452 		ret = cnt;
7453 	}
7454 out:
7455 	mutex_unlock(&trace_types_lock);
7456 	return ret;
7457 }
7458 
7459 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7460 {
7461 	struct seq_file *m = file->private_data;
7462 	int ret;
7463 
7464 	ret = tracing_release(inode, file);
7465 
7466 	if (file->f_mode & FMODE_READ)
7467 		return ret;
7468 
7469 	/* If write only, the seq_file is just a stub */
7470 	if (m)
7471 		kfree(m->private);
7472 	kfree(m);
7473 
7474 	return 0;
7475 }
7476 
7477 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7478 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7479 				    size_t count, loff_t *ppos);
7480 static int tracing_buffers_release(struct inode *inode, struct file *file);
7481 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7482 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7483 
7484 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7485 {
7486 	struct ftrace_buffer_info *info;
7487 	int ret;
7488 
7489 	/* The following checks for tracefs lockdown */
7490 	ret = tracing_buffers_open(inode, filp);
7491 	if (ret < 0)
7492 		return ret;
7493 
7494 	info = filp->private_data;
7495 
7496 	if (info->iter.trace->use_max_tr) {
7497 		tracing_buffers_release(inode, filp);
7498 		return -EBUSY;
7499 	}
7500 
7501 	info->iter.snapshot = true;
7502 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7503 
7504 	return ret;
7505 }
7506 
7507 #endif /* CONFIG_TRACER_SNAPSHOT */
7508 
7509 
7510 static const struct file_operations tracing_thresh_fops = {
7511 	.open		= tracing_open_generic,
7512 	.read		= tracing_thresh_read,
7513 	.write		= tracing_thresh_write,
7514 	.llseek		= generic_file_llseek,
7515 };
7516 
7517 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7518 static const struct file_operations tracing_max_lat_fops = {
7519 	.open		= tracing_open_generic,
7520 	.read		= tracing_max_lat_read,
7521 	.write		= tracing_max_lat_write,
7522 	.llseek		= generic_file_llseek,
7523 };
7524 #endif
7525 
7526 static const struct file_operations set_tracer_fops = {
7527 	.open		= tracing_open_generic,
7528 	.read		= tracing_set_trace_read,
7529 	.write		= tracing_set_trace_write,
7530 	.llseek		= generic_file_llseek,
7531 };
7532 
7533 static const struct file_operations tracing_pipe_fops = {
7534 	.open		= tracing_open_pipe,
7535 	.poll		= tracing_poll_pipe,
7536 	.read		= tracing_read_pipe,
7537 	.splice_read	= tracing_splice_read_pipe,
7538 	.release	= tracing_release_pipe,
7539 	.llseek		= no_llseek,
7540 };
7541 
7542 static const struct file_operations tracing_entries_fops = {
7543 	.open		= tracing_open_generic_tr,
7544 	.read		= tracing_entries_read,
7545 	.write		= tracing_entries_write,
7546 	.llseek		= generic_file_llseek,
7547 	.release	= tracing_release_generic_tr,
7548 };
7549 
7550 static const struct file_operations tracing_total_entries_fops = {
7551 	.open		= tracing_open_generic_tr,
7552 	.read		= tracing_total_entries_read,
7553 	.llseek		= generic_file_llseek,
7554 	.release	= tracing_release_generic_tr,
7555 };
7556 
7557 static const struct file_operations tracing_free_buffer_fops = {
7558 	.open		= tracing_open_generic_tr,
7559 	.write		= tracing_free_buffer_write,
7560 	.release	= tracing_free_buffer_release,
7561 };
7562 
7563 static const struct file_operations tracing_mark_fops = {
7564 	.open		= tracing_open_generic_tr,
7565 	.write		= tracing_mark_write,
7566 	.llseek		= generic_file_llseek,
7567 	.release	= tracing_release_generic_tr,
7568 };
7569 
7570 static const struct file_operations tracing_mark_raw_fops = {
7571 	.open		= tracing_open_generic_tr,
7572 	.write		= tracing_mark_raw_write,
7573 	.llseek		= generic_file_llseek,
7574 	.release	= tracing_release_generic_tr,
7575 };
7576 
7577 static const struct file_operations trace_clock_fops = {
7578 	.open		= tracing_clock_open,
7579 	.read		= seq_read,
7580 	.llseek		= seq_lseek,
7581 	.release	= tracing_single_release_tr,
7582 	.write		= tracing_clock_write,
7583 };
7584 
7585 static const struct file_operations trace_time_stamp_mode_fops = {
7586 	.open		= tracing_time_stamp_mode_open,
7587 	.read		= seq_read,
7588 	.llseek		= seq_lseek,
7589 	.release	= tracing_single_release_tr,
7590 };
7591 
7592 #ifdef CONFIG_TRACER_SNAPSHOT
7593 static const struct file_operations snapshot_fops = {
7594 	.open		= tracing_snapshot_open,
7595 	.read		= seq_read,
7596 	.write		= tracing_snapshot_write,
7597 	.llseek		= tracing_lseek,
7598 	.release	= tracing_snapshot_release,
7599 };
7600 
7601 static const struct file_operations snapshot_raw_fops = {
7602 	.open		= snapshot_raw_open,
7603 	.read		= tracing_buffers_read,
7604 	.release	= tracing_buffers_release,
7605 	.splice_read	= tracing_buffers_splice_read,
7606 	.llseek		= no_llseek,
7607 };
7608 
7609 #endif /* CONFIG_TRACER_SNAPSHOT */
7610 
7611 /*
7612  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7613  * @filp: The active open file structure
7614  * @ubuf: The userspace provided buffer to read value into
7615  * @cnt: The maximum number of bytes to read
7616  * @ppos: The current "file" position
7617  *
7618  * This function implements the write interface for a struct trace_min_max_param.
7619  * The filp->private_data must point to a trace_min_max_param structure that
7620  * defines where to write the value, the min and the max acceptable values,
7621  * and a lock to protect the write.
7622  */
7623 static ssize_t
7624 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7625 {
7626 	struct trace_min_max_param *param = filp->private_data;
7627 	u64 val;
7628 	int err;
7629 
7630 	if (!param)
7631 		return -EFAULT;
7632 
7633 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7634 	if (err)
7635 		return err;
7636 
7637 	if (param->lock)
7638 		mutex_lock(param->lock);
7639 
7640 	if (param->min && val < *param->min)
7641 		err = -EINVAL;
7642 
7643 	if (param->max && val > *param->max)
7644 		err = -EINVAL;
7645 
7646 	if (!err)
7647 		*param->val = val;
7648 
7649 	if (param->lock)
7650 		mutex_unlock(param->lock);
7651 
7652 	if (err)
7653 		return err;
7654 
7655 	return cnt;
7656 }
7657 
7658 /*
7659  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7660  * @filp: The active open file structure
7661  * @ubuf: The userspace provided buffer to read value into
7662  * @cnt: The maximum number of bytes to read
7663  * @ppos: The current "file" position
7664  *
7665  * This function implements the read interface for a struct trace_min_max_param.
7666  * The filp->private_data must point to a trace_min_max_param struct with valid
7667  * data.
7668  */
7669 static ssize_t
7670 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7671 {
7672 	struct trace_min_max_param *param = filp->private_data;
7673 	char buf[U64_STR_SIZE];
7674 	int len;
7675 	u64 val;
7676 
7677 	if (!param)
7678 		return -EFAULT;
7679 
7680 	val = *param->val;
7681 
7682 	if (cnt > sizeof(buf))
7683 		cnt = sizeof(buf);
7684 
7685 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7686 
7687 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7688 }
7689 
7690 const struct file_operations trace_min_max_fops = {
7691 	.open		= tracing_open_generic,
7692 	.read		= trace_min_max_read,
7693 	.write		= trace_min_max_write,
7694 };
7695 
7696 #define TRACING_LOG_ERRS_MAX	8
7697 #define TRACING_LOG_LOC_MAX	128
7698 
7699 #define CMD_PREFIX "  Command: "
7700 
7701 struct err_info {
7702 	const char	**errs;	/* ptr to loc-specific array of err strings */
7703 	u8		type;	/* index into errs -> specific err string */
7704 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7705 	u64		ts;
7706 };
7707 
7708 struct tracing_log_err {
7709 	struct list_head	list;
7710 	struct err_info		info;
7711 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7712 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7713 };
7714 
7715 static DEFINE_MUTEX(tracing_err_log_lock);
7716 
7717 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7718 {
7719 	struct tracing_log_err *err;
7720 
7721 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7722 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7723 		if (!err)
7724 			err = ERR_PTR(-ENOMEM);
7725 		tr->n_err_log_entries++;
7726 
7727 		return err;
7728 	}
7729 
7730 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7731 	list_del(&err->list);
7732 
7733 	return err;
7734 }
7735 
7736 /**
7737  * err_pos - find the position of a string within a command for error careting
7738  * @cmd: The tracing command that caused the error
7739  * @str: The string to position the caret at within @cmd
7740  *
7741  * Finds the position of the first occurrence of @str within @cmd.  The
7742  * return value can be passed to tracing_log_err() for caret placement
7743  * within @cmd.
7744  *
7745  * Returns the index within @cmd of the first occurrence of @str or 0
7746  * if @str was not found.
7747  */
7748 unsigned int err_pos(char *cmd, const char *str)
7749 {
7750 	char *found;
7751 
7752 	if (WARN_ON(!strlen(cmd)))
7753 		return 0;
7754 
7755 	found = strstr(cmd, str);
7756 	if (found)
7757 		return found - cmd;
7758 
7759 	return 0;
7760 }
7761 
7762 /**
7763  * tracing_log_err - write an error to the tracing error log
7764  * @tr: The associated trace array for the error (NULL for top level array)
7765  * @loc: A string describing where the error occurred
7766  * @cmd: The tracing command that caused the error
7767  * @errs: The array of loc-specific static error strings
7768  * @type: The index into errs[], which produces the specific static err string
7769  * @pos: The position the caret should be placed in the cmd
7770  *
7771  * Writes an error into tracing/error_log of the form:
7772  *
7773  * <loc>: error: <text>
7774  *   Command: <cmd>
7775  *              ^
7776  *
7777  * tracing/error_log is a small log file containing the last
7778  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7779  * unless there has been a tracing error, and the error log can be
7780  * cleared and have its memory freed by writing the empty string in
7781  * truncation mode to it i.e. echo > tracing/error_log.
7782  *
7783  * NOTE: the @errs array along with the @type param are used to
7784  * produce a static error string - this string is not copied and saved
7785  * when the error is logged - only a pointer to it is saved.  See
7786  * existing callers for examples of how static strings are typically
7787  * defined for use with tracing_log_err().
7788  */
7789 void tracing_log_err(struct trace_array *tr,
7790 		     const char *loc, const char *cmd,
7791 		     const char **errs, u8 type, u8 pos)
7792 {
7793 	struct tracing_log_err *err;
7794 
7795 	if (!tr)
7796 		tr = &global_trace;
7797 
7798 	mutex_lock(&tracing_err_log_lock);
7799 	err = get_tracing_log_err(tr);
7800 	if (PTR_ERR(err) == -ENOMEM) {
7801 		mutex_unlock(&tracing_err_log_lock);
7802 		return;
7803 	}
7804 
7805 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7806 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7807 
7808 	err->info.errs = errs;
7809 	err->info.type = type;
7810 	err->info.pos = pos;
7811 	err->info.ts = local_clock();
7812 
7813 	list_add_tail(&err->list, &tr->err_log);
7814 	mutex_unlock(&tracing_err_log_lock);
7815 }
7816 
7817 static void clear_tracing_err_log(struct trace_array *tr)
7818 {
7819 	struct tracing_log_err *err, *next;
7820 
7821 	mutex_lock(&tracing_err_log_lock);
7822 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7823 		list_del(&err->list);
7824 		kfree(err);
7825 	}
7826 
7827 	tr->n_err_log_entries = 0;
7828 	mutex_unlock(&tracing_err_log_lock);
7829 }
7830 
7831 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7832 {
7833 	struct trace_array *tr = m->private;
7834 
7835 	mutex_lock(&tracing_err_log_lock);
7836 
7837 	return seq_list_start(&tr->err_log, *pos);
7838 }
7839 
7840 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7841 {
7842 	struct trace_array *tr = m->private;
7843 
7844 	return seq_list_next(v, &tr->err_log, pos);
7845 }
7846 
7847 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7848 {
7849 	mutex_unlock(&tracing_err_log_lock);
7850 }
7851 
7852 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7853 {
7854 	u8 i;
7855 
7856 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7857 		seq_putc(m, ' ');
7858 	for (i = 0; i < pos; i++)
7859 		seq_putc(m, ' ');
7860 	seq_puts(m, "^\n");
7861 }
7862 
7863 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7864 {
7865 	struct tracing_log_err *err = v;
7866 
7867 	if (err) {
7868 		const char *err_text = err->info.errs[err->info.type];
7869 		u64 sec = err->info.ts;
7870 		u32 nsec;
7871 
7872 		nsec = do_div(sec, NSEC_PER_SEC);
7873 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7874 			   err->loc, err_text);
7875 		seq_printf(m, "%s", err->cmd);
7876 		tracing_err_log_show_pos(m, err->info.pos);
7877 	}
7878 
7879 	return 0;
7880 }
7881 
7882 static const struct seq_operations tracing_err_log_seq_ops = {
7883 	.start  = tracing_err_log_seq_start,
7884 	.next   = tracing_err_log_seq_next,
7885 	.stop   = tracing_err_log_seq_stop,
7886 	.show   = tracing_err_log_seq_show
7887 };
7888 
7889 static int tracing_err_log_open(struct inode *inode, struct file *file)
7890 {
7891 	struct trace_array *tr = inode->i_private;
7892 	int ret = 0;
7893 
7894 	ret = tracing_check_open_get_tr(tr);
7895 	if (ret)
7896 		return ret;
7897 
7898 	/* If this file was opened for write, then erase contents */
7899 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7900 		clear_tracing_err_log(tr);
7901 
7902 	if (file->f_mode & FMODE_READ) {
7903 		ret = seq_open(file, &tracing_err_log_seq_ops);
7904 		if (!ret) {
7905 			struct seq_file *m = file->private_data;
7906 			m->private = tr;
7907 		} else {
7908 			trace_array_put(tr);
7909 		}
7910 	}
7911 	return ret;
7912 }
7913 
7914 static ssize_t tracing_err_log_write(struct file *file,
7915 				     const char __user *buffer,
7916 				     size_t count, loff_t *ppos)
7917 {
7918 	return count;
7919 }
7920 
7921 static int tracing_err_log_release(struct inode *inode, struct file *file)
7922 {
7923 	struct trace_array *tr = inode->i_private;
7924 
7925 	trace_array_put(tr);
7926 
7927 	if (file->f_mode & FMODE_READ)
7928 		seq_release(inode, file);
7929 
7930 	return 0;
7931 }
7932 
7933 static const struct file_operations tracing_err_log_fops = {
7934 	.open           = tracing_err_log_open,
7935 	.write		= tracing_err_log_write,
7936 	.read           = seq_read,
7937 	.llseek         = seq_lseek,
7938 	.release        = tracing_err_log_release,
7939 };
7940 
7941 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7942 {
7943 	struct trace_array *tr = inode->i_private;
7944 	struct ftrace_buffer_info *info;
7945 	int ret;
7946 
7947 	ret = tracing_check_open_get_tr(tr);
7948 	if (ret)
7949 		return ret;
7950 
7951 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7952 	if (!info) {
7953 		trace_array_put(tr);
7954 		return -ENOMEM;
7955 	}
7956 
7957 	mutex_lock(&trace_types_lock);
7958 
7959 	info->iter.tr		= tr;
7960 	info->iter.cpu_file	= tracing_get_cpu(inode);
7961 	info->iter.trace	= tr->current_trace;
7962 	info->iter.array_buffer = &tr->array_buffer;
7963 	info->spare		= NULL;
7964 	/* Force reading ring buffer for first read */
7965 	info->read		= (unsigned int)-1;
7966 
7967 	filp->private_data = info;
7968 
7969 	tr->trace_ref++;
7970 
7971 	mutex_unlock(&trace_types_lock);
7972 
7973 	ret = nonseekable_open(inode, filp);
7974 	if (ret < 0)
7975 		trace_array_put(tr);
7976 
7977 	return ret;
7978 }
7979 
7980 static __poll_t
7981 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7982 {
7983 	struct ftrace_buffer_info *info = filp->private_data;
7984 	struct trace_iterator *iter = &info->iter;
7985 
7986 	return trace_poll(iter, filp, poll_table);
7987 }
7988 
7989 static ssize_t
7990 tracing_buffers_read(struct file *filp, char __user *ubuf,
7991 		     size_t count, loff_t *ppos)
7992 {
7993 	struct ftrace_buffer_info *info = filp->private_data;
7994 	struct trace_iterator *iter = &info->iter;
7995 	ssize_t ret = 0;
7996 	ssize_t size;
7997 
7998 	if (!count)
7999 		return 0;
8000 
8001 #ifdef CONFIG_TRACER_MAX_TRACE
8002 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8003 		return -EBUSY;
8004 #endif
8005 
8006 	if (!info->spare) {
8007 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8008 							  iter->cpu_file);
8009 		if (IS_ERR(info->spare)) {
8010 			ret = PTR_ERR(info->spare);
8011 			info->spare = NULL;
8012 		} else {
8013 			info->spare_cpu = iter->cpu_file;
8014 		}
8015 	}
8016 	if (!info->spare)
8017 		return ret;
8018 
8019 	/* Do we have previous read data to read? */
8020 	if (info->read < PAGE_SIZE)
8021 		goto read;
8022 
8023  again:
8024 	trace_access_lock(iter->cpu_file);
8025 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8026 				    &info->spare,
8027 				    count,
8028 				    iter->cpu_file, 0);
8029 	trace_access_unlock(iter->cpu_file);
8030 
8031 	if (ret < 0) {
8032 		if (trace_empty(iter)) {
8033 			if ((filp->f_flags & O_NONBLOCK))
8034 				return -EAGAIN;
8035 
8036 			ret = wait_on_pipe(iter, 0);
8037 			if (ret)
8038 				return ret;
8039 
8040 			goto again;
8041 		}
8042 		return 0;
8043 	}
8044 
8045 	info->read = 0;
8046  read:
8047 	size = PAGE_SIZE - info->read;
8048 	if (size > count)
8049 		size = count;
8050 
8051 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8052 	if (ret == size)
8053 		return -EFAULT;
8054 
8055 	size -= ret;
8056 
8057 	*ppos += size;
8058 	info->read += size;
8059 
8060 	return size;
8061 }
8062 
8063 static int tracing_buffers_release(struct inode *inode, struct file *file)
8064 {
8065 	struct ftrace_buffer_info *info = file->private_data;
8066 	struct trace_iterator *iter = &info->iter;
8067 
8068 	mutex_lock(&trace_types_lock);
8069 
8070 	iter->tr->trace_ref--;
8071 
8072 	__trace_array_put(iter->tr);
8073 
8074 	if (info->spare)
8075 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8076 					   info->spare_cpu, info->spare);
8077 	kvfree(info);
8078 
8079 	mutex_unlock(&trace_types_lock);
8080 
8081 	return 0;
8082 }
8083 
8084 struct buffer_ref {
8085 	struct trace_buffer	*buffer;
8086 	void			*page;
8087 	int			cpu;
8088 	refcount_t		refcount;
8089 };
8090 
8091 static void buffer_ref_release(struct buffer_ref *ref)
8092 {
8093 	if (!refcount_dec_and_test(&ref->refcount))
8094 		return;
8095 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8096 	kfree(ref);
8097 }
8098 
8099 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8100 				    struct pipe_buffer *buf)
8101 {
8102 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8103 
8104 	buffer_ref_release(ref);
8105 	buf->private = 0;
8106 }
8107 
8108 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8109 				struct pipe_buffer *buf)
8110 {
8111 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8112 
8113 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8114 		return false;
8115 
8116 	refcount_inc(&ref->refcount);
8117 	return true;
8118 }
8119 
8120 /* Pipe buffer operations for a buffer. */
8121 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8122 	.release		= buffer_pipe_buf_release,
8123 	.get			= buffer_pipe_buf_get,
8124 };
8125 
8126 /*
8127  * Callback from splice_to_pipe(), if we need to release some pages
8128  * at the end of the spd in case we error'ed out in filling the pipe.
8129  */
8130 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8131 {
8132 	struct buffer_ref *ref =
8133 		(struct buffer_ref *)spd->partial[i].private;
8134 
8135 	buffer_ref_release(ref);
8136 	spd->partial[i].private = 0;
8137 }
8138 
8139 static ssize_t
8140 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8141 			    struct pipe_inode_info *pipe, size_t len,
8142 			    unsigned int flags)
8143 {
8144 	struct ftrace_buffer_info *info = file->private_data;
8145 	struct trace_iterator *iter = &info->iter;
8146 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8147 	struct page *pages_def[PIPE_DEF_BUFFERS];
8148 	struct splice_pipe_desc spd = {
8149 		.pages		= pages_def,
8150 		.partial	= partial_def,
8151 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8152 		.ops		= &buffer_pipe_buf_ops,
8153 		.spd_release	= buffer_spd_release,
8154 	};
8155 	struct buffer_ref *ref;
8156 	int entries, i;
8157 	ssize_t ret = 0;
8158 
8159 #ifdef CONFIG_TRACER_MAX_TRACE
8160 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8161 		return -EBUSY;
8162 #endif
8163 
8164 	if (*ppos & (PAGE_SIZE - 1))
8165 		return -EINVAL;
8166 
8167 	if (len & (PAGE_SIZE - 1)) {
8168 		if (len < PAGE_SIZE)
8169 			return -EINVAL;
8170 		len &= PAGE_MASK;
8171 	}
8172 
8173 	if (splice_grow_spd(pipe, &spd))
8174 		return -ENOMEM;
8175 
8176  again:
8177 	trace_access_lock(iter->cpu_file);
8178 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8179 
8180 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8181 		struct page *page;
8182 		int r;
8183 
8184 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8185 		if (!ref) {
8186 			ret = -ENOMEM;
8187 			break;
8188 		}
8189 
8190 		refcount_set(&ref->refcount, 1);
8191 		ref->buffer = iter->array_buffer->buffer;
8192 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8193 		if (IS_ERR(ref->page)) {
8194 			ret = PTR_ERR(ref->page);
8195 			ref->page = NULL;
8196 			kfree(ref);
8197 			break;
8198 		}
8199 		ref->cpu = iter->cpu_file;
8200 
8201 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8202 					  len, iter->cpu_file, 1);
8203 		if (r < 0) {
8204 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8205 						   ref->page);
8206 			kfree(ref);
8207 			break;
8208 		}
8209 
8210 		page = virt_to_page(ref->page);
8211 
8212 		spd.pages[i] = page;
8213 		spd.partial[i].len = PAGE_SIZE;
8214 		spd.partial[i].offset = 0;
8215 		spd.partial[i].private = (unsigned long)ref;
8216 		spd.nr_pages++;
8217 		*ppos += PAGE_SIZE;
8218 
8219 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8220 	}
8221 
8222 	trace_access_unlock(iter->cpu_file);
8223 	spd.nr_pages = i;
8224 
8225 	/* did we read anything? */
8226 	if (!spd.nr_pages) {
8227 		if (ret)
8228 			goto out;
8229 
8230 		ret = -EAGAIN;
8231 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8232 			goto out;
8233 
8234 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8235 		if (ret)
8236 			goto out;
8237 
8238 		goto again;
8239 	}
8240 
8241 	ret = splice_to_pipe(pipe, &spd);
8242 out:
8243 	splice_shrink_spd(&spd);
8244 
8245 	return ret;
8246 }
8247 
8248 static const struct file_operations tracing_buffers_fops = {
8249 	.open		= tracing_buffers_open,
8250 	.read		= tracing_buffers_read,
8251 	.poll		= tracing_buffers_poll,
8252 	.release	= tracing_buffers_release,
8253 	.splice_read	= tracing_buffers_splice_read,
8254 	.llseek		= no_llseek,
8255 };
8256 
8257 static ssize_t
8258 tracing_stats_read(struct file *filp, char __user *ubuf,
8259 		   size_t count, loff_t *ppos)
8260 {
8261 	struct inode *inode = file_inode(filp);
8262 	struct trace_array *tr = inode->i_private;
8263 	struct array_buffer *trace_buf = &tr->array_buffer;
8264 	int cpu = tracing_get_cpu(inode);
8265 	struct trace_seq *s;
8266 	unsigned long cnt;
8267 	unsigned long long t;
8268 	unsigned long usec_rem;
8269 
8270 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8271 	if (!s)
8272 		return -ENOMEM;
8273 
8274 	trace_seq_init(s);
8275 
8276 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8277 	trace_seq_printf(s, "entries: %ld\n", cnt);
8278 
8279 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8280 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8281 
8282 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8283 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8284 
8285 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8286 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8287 
8288 	if (trace_clocks[tr->clock_id].in_ns) {
8289 		/* local or global for trace_clock */
8290 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8291 		usec_rem = do_div(t, USEC_PER_SEC);
8292 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8293 								t, usec_rem);
8294 
8295 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8296 		usec_rem = do_div(t, USEC_PER_SEC);
8297 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8298 	} else {
8299 		/* counter or tsc mode for trace_clock */
8300 		trace_seq_printf(s, "oldest event ts: %llu\n",
8301 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8302 
8303 		trace_seq_printf(s, "now ts: %llu\n",
8304 				ring_buffer_time_stamp(trace_buf->buffer));
8305 	}
8306 
8307 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8308 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8309 
8310 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8311 	trace_seq_printf(s, "read events: %ld\n", cnt);
8312 
8313 	count = simple_read_from_buffer(ubuf, count, ppos,
8314 					s->buffer, trace_seq_used(s));
8315 
8316 	kfree(s);
8317 
8318 	return count;
8319 }
8320 
8321 static const struct file_operations tracing_stats_fops = {
8322 	.open		= tracing_open_generic_tr,
8323 	.read		= tracing_stats_read,
8324 	.llseek		= generic_file_llseek,
8325 	.release	= tracing_release_generic_tr,
8326 };
8327 
8328 #ifdef CONFIG_DYNAMIC_FTRACE
8329 
8330 static ssize_t
8331 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8332 		  size_t cnt, loff_t *ppos)
8333 {
8334 	ssize_t ret;
8335 	char *buf;
8336 	int r;
8337 
8338 	/* 256 should be plenty to hold the amount needed */
8339 	buf = kmalloc(256, GFP_KERNEL);
8340 	if (!buf)
8341 		return -ENOMEM;
8342 
8343 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8344 		      ftrace_update_tot_cnt,
8345 		      ftrace_number_of_pages,
8346 		      ftrace_number_of_groups);
8347 
8348 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8349 	kfree(buf);
8350 	return ret;
8351 }
8352 
8353 static const struct file_operations tracing_dyn_info_fops = {
8354 	.open		= tracing_open_generic,
8355 	.read		= tracing_read_dyn_info,
8356 	.llseek		= generic_file_llseek,
8357 };
8358 #endif /* CONFIG_DYNAMIC_FTRACE */
8359 
8360 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8361 static void
8362 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8363 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8364 		void *data)
8365 {
8366 	tracing_snapshot_instance(tr);
8367 }
8368 
8369 static void
8370 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8371 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8372 		      void *data)
8373 {
8374 	struct ftrace_func_mapper *mapper = data;
8375 	long *count = NULL;
8376 
8377 	if (mapper)
8378 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8379 
8380 	if (count) {
8381 
8382 		if (*count <= 0)
8383 			return;
8384 
8385 		(*count)--;
8386 	}
8387 
8388 	tracing_snapshot_instance(tr);
8389 }
8390 
8391 static int
8392 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8393 		      struct ftrace_probe_ops *ops, void *data)
8394 {
8395 	struct ftrace_func_mapper *mapper = data;
8396 	long *count = NULL;
8397 
8398 	seq_printf(m, "%ps:", (void *)ip);
8399 
8400 	seq_puts(m, "snapshot");
8401 
8402 	if (mapper)
8403 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8404 
8405 	if (count)
8406 		seq_printf(m, ":count=%ld\n", *count);
8407 	else
8408 		seq_puts(m, ":unlimited\n");
8409 
8410 	return 0;
8411 }
8412 
8413 static int
8414 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8415 		     unsigned long ip, void *init_data, void **data)
8416 {
8417 	struct ftrace_func_mapper *mapper = *data;
8418 
8419 	if (!mapper) {
8420 		mapper = allocate_ftrace_func_mapper();
8421 		if (!mapper)
8422 			return -ENOMEM;
8423 		*data = mapper;
8424 	}
8425 
8426 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8427 }
8428 
8429 static void
8430 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8431 		     unsigned long ip, void *data)
8432 {
8433 	struct ftrace_func_mapper *mapper = data;
8434 
8435 	if (!ip) {
8436 		if (!mapper)
8437 			return;
8438 		free_ftrace_func_mapper(mapper, NULL);
8439 		return;
8440 	}
8441 
8442 	ftrace_func_mapper_remove_ip(mapper, ip);
8443 }
8444 
8445 static struct ftrace_probe_ops snapshot_probe_ops = {
8446 	.func			= ftrace_snapshot,
8447 	.print			= ftrace_snapshot_print,
8448 };
8449 
8450 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8451 	.func			= ftrace_count_snapshot,
8452 	.print			= ftrace_snapshot_print,
8453 	.init			= ftrace_snapshot_init,
8454 	.free			= ftrace_snapshot_free,
8455 };
8456 
8457 static int
8458 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8459 			       char *glob, char *cmd, char *param, int enable)
8460 {
8461 	struct ftrace_probe_ops *ops;
8462 	void *count = (void *)-1;
8463 	char *number;
8464 	int ret;
8465 
8466 	if (!tr)
8467 		return -ENODEV;
8468 
8469 	/* hash funcs only work with set_ftrace_filter */
8470 	if (!enable)
8471 		return -EINVAL;
8472 
8473 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8474 
8475 	if (glob[0] == '!')
8476 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8477 
8478 	if (!param)
8479 		goto out_reg;
8480 
8481 	number = strsep(&param, ":");
8482 
8483 	if (!strlen(number))
8484 		goto out_reg;
8485 
8486 	/*
8487 	 * We use the callback data field (which is a pointer)
8488 	 * as our counter.
8489 	 */
8490 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8491 	if (ret)
8492 		return ret;
8493 
8494  out_reg:
8495 	ret = tracing_alloc_snapshot_instance(tr);
8496 	if (ret < 0)
8497 		goto out;
8498 
8499 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8500 
8501  out:
8502 	return ret < 0 ? ret : 0;
8503 }
8504 
8505 static struct ftrace_func_command ftrace_snapshot_cmd = {
8506 	.name			= "snapshot",
8507 	.func			= ftrace_trace_snapshot_callback,
8508 };
8509 
8510 static __init int register_snapshot_cmd(void)
8511 {
8512 	return register_ftrace_command(&ftrace_snapshot_cmd);
8513 }
8514 #else
8515 static inline __init int register_snapshot_cmd(void) { return 0; }
8516 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8517 
8518 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8519 {
8520 	if (WARN_ON(!tr->dir))
8521 		return ERR_PTR(-ENODEV);
8522 
8523 	/* Top directory uses NULL as the parent */
8524 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8525 		return NULL;
8526 
8527 	/* All sub buffers have a descriptor */
8528 	return tr->dir;
8529 }
8530 
8531 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8532 {
8533 	struct dentry *d_tracer;
8534 
8535 	if (tr->percpu_dir)
8536 		return tr->percpu_dir;
8537 
8538 	d_tracer = tracing_get_dentry(tr);
8539 	if (IS_ERR(d_tracer))
8540 		return NULL;
8541 
8542 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8543 
8544 	MEM_FAIL(!tr->percpu_dir,
8545 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8546 
8547 	return tr->percpu_dir;
8548 }
8549 
8550 static struct dentry *
8551 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8552 		      void *data, long cpu, const struct file_operations *fops)
8553 {
8554 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8555 
8556 	if (ret) /* See tracing_get_cpu() */
8557 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8558 	return ret;
8559 }
8560 
8561 static void
8562 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8563 {
8564 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8565 	struct dentry *d_cpu;
8566 	char cpu_dir[30]; /* 30 characters should be more than enough */
8567 
8568 	if (!d_percpu)
8569 		return;
8570 
8571 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8572 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8573 	if (!d_cpu) {
8574 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8575 		return;
8576 	}
8577 
8578 	/* per cpu trace_pipe */
8579 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8580 				tr, cpu, &tracing_pipe_fops);
8581 
8582 	/* per cpu trace */
8583 	trace_create_cpu_file("trace", 0644, d_cpu,
8584 				tr, cpu, &tracing_fops);
8585 
8586 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8587 				tr, cpu, &tracing_buffers_fops);
8588 
8589 	trace_create_cpu_file("stats", 0444, d_cpu,
8590 				tr, cpu, &tracing_stats_fops);
8591 
8592 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8593 				tr, cpu, &tracing_entries_fops);
8594 
8595 #ifdef CONFIG_TRACER_SNAPSHOT
8596 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8597 				tr, cpu, &snapshot_fops);
8598 
8599 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8600 				tr, cpu, &snapshot_raw_fops);
8601 #endif
8602 }
8603 
8604 #ifdef CONFIG_FTRACE_SELFTEST
8605 /* Let selftest have access to static functions in this file */
8606 #include "trace_selftest.c"
8607 #endif
8608 
8609 static ssize_t
8610 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8611 			loff_t *ppos)
8612 {
8613 	struct trace_option_dentry *topt = filp->private_data;
8614 	char *buf;
8615 
8616 	if (topt->flags->val & topt->opt->bit)
8617 		buf = "1\n";
8618 	else
8619 		buf = "0\n";
8620 
8621 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8622 }
8623 
8624 static ssize_t
8625 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8626 			 loff_t *ppos)
8627 {
8628 	struct trace_option_dentry *topt = filp->private_data;
8629 	unsigned long val;
8630 	int ret;
8631 
8632 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8633 	if (ret)
8634 		return ret;
8635 
8636 	if (val != 0 && val != 1)
8637 		return -EINVAL;
8638 
8639 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8640 		mutex_lock(&trace_types_lock);
8641 		ret = __set_tracer_option(topt->tr, topt->flags,
8642 					  topt->opt, !val);
8643 		mutex_unlock(&trace_types_lock);
8644 		if (ret)
8645 			return ret;
8646 	}
8647 
8648 	*ppos += cnt;
8649 
8650 	return cnt;
8651 }
8652 
8653 
8654 static const struct file_operations trace_options_fops = {
8655 	.open = tracing_open_generic,
8656 	.read = trace_options_read,
8657 	.write = trace_options_write,
8658 	.llseek	= generic_file_llseek,
8659 };
8660 
8661 /*
8662  * In order to pass in both the trace_array descriptor as well as the index
8663  * to the flag that the trace option file represents, the trace_array
8664  * has a character array of trace_flags_index[], which holds the index
8665  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8666  * The address of this character array is passed to the flag option file
8667  * read/write callbacks.
8668  *
8669  * In order to extract both the index and the trace_array descriptor,
8670  * get_tr_index() uses the following algorithm.
8671  *
8672  *   idx = *ptr;
8673  *
8674  * As the pointer itself contains the address of the index (remember
8675  * index[1] == 1).
8676  *
8677  * Then to get the trace_array descriptor, by subtracting that index
8678  * from the ptr, we get to the start of the index itself.
8679  *
8680  *   ptr - idx == &index[0]
8681  *
8682  * Then a simple container_of() from that pointer gets us to the
8683  * trace_array descriptor.
8684  */
8685 static void get_tr_index(void *data, struct trace_array **ptr,
8686 			 unsigned int *pindex)
8687 {
8688 	*pindex = *(unsigned char *)data;
8689 
8690 	*ptr = container_of(data - *pindex, struct trace_array,
8691 			    trace_flags_index);
8692 }
8693 
8694 static ssize_t
8695 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8696 			loff_t *ppos)
8697 {
8698 	void *tr_index = filp->private_data;
8699 	struct trace_array *tr;
8700 	unsigned int index;
8701 	char *buf;
8702 
8703 	get_tr_index(tr_index, &tr, &index);
8704 
8705 	if (tr->trace_flags & (1 << index))
8706 		buf = "1\n";
8707 	else
8708 		buf = "0\n";
8709 
8710 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8711 }
8712 
8713 static ssize_t
8714 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8715 			 loff_t *ppos)
8716 {
8717 	void *tr_index = filp->private_data;
8718 	struct trace_array *tr;
8719 	unsigned int index;
8720 	unsigned long val;
8721 	int ret;
8722 
8723 	get_tr_index(tr_index, &tr, &index);
8724 
8725 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8726 	if (ret)
8727 		return ret;
8728 
8729 	if (val != 0 && val != 1)
8730 		return -EINVAL;
8731 
8732 	mutex_lock(&event_mutex);
8733 	mutex_lock(&trace_types_lock);
8734 	ret = set_tracer_flag(tr, 1 << index, val);
8735 	mutex_unlock(&trace_types_lock);
8736 	mutex_unlock(&event_mutex);
8737 
8738 	if (ret < 0)
8739 		return ret;
8740 
8741 	*ppos += cnt;
8742 
8743 	return cnt;
8744 }
8745 
8746 static const struct file_operations trace_options_core_fops = {
8747 	.open = tracing_open_generic,
8748 	.read = trace_options_core_read,
8749 	.write = trace_options_core_write,
8750 	.llseek = generic_file_llseek,
8751 };
8752 
8753 struct dentry *trace_create_file(const char *name,
8754 				 umode_t mode,
8755 				 struct dentry *parent,
8756 				 void *data,
8757 				 const struct file_operations *fops)
8758 {
8759 	struct dentry *ret;
8760 
8761 	ret = tracefs_create_file(name, mode, parent, data, fops);
8762 	if (!ret)
8763 		pr_warn("Could not create tracefs '%s' entry\n", name);
8764 
8765 	return ret;
8766 }
8767 
8768 
8769 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8770 {
8771 	struct dentry *d_tracer;
8772 
8773 	if (tr->options)
8774 		return tr->options;
8775 
8776 	d_tracer = tracing_get_dentry(tr);
8777 	if (IS_ERR(d_tracer))
8778 		return NULL;
8779 
8780 	tr->options = tracefs_create_dir("options", d_tracer);
8781 	if (!tr->options) {
8782 		pr_warn("Could not create tracefs directory 'options'\n");
8783 		return NULL;
8784 	}
8785 
8786 	return tr->options;
8787 }
8788 
8789 static void
8790 create_trace_option_file(struct trace_array *tr,
8791 			 struct trace_option_dentry *topt,
8792 			 struct tracer_flags *flags,
8793 			 struct tracer_opt *opt)
8794 {
8795 	struct dentry *t_options;
8796 
8797 	t_options = trace_options_init_dentry(tr);
8798 	if (!t_options)
8799 		return;
8800 
8801 	topt->flags = flags;
8802 	topt->opt = opt;
8803 	topt->tr = tr;
8804 
8805 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8806 				    &trace_options_fops);
8807 
8808 }
8809 
8810 static void
8811 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8812 {
8813 	struct trace_option_dentry *topts;
8814 	struct trace_options *tr_topts;
8815 	struct tracer_flags *flags;
8816 	struct tracer_opt *opts;
8817 	int cnt;
8818 	int i;
8819 
8820 	if (!tracer)
8821 		return;
8822 
8823 	flags = tracer->flags;
8824 
8825 	if (!flags || !flags->opts)
8826 		return;
8827 
8828 	/*
8829 	 * If this is an instance, only create flags for tracers
8830 	 * the instance may have.
8831 	 */
8832 	if (!trace_ok_for_array(tracer, tr))
8833 		return;
8834 
8835 	for (i = 0; i < tr->nr_topts; i++) {
8836 		/* Make sure there's no duplicate flags. */
8837 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8838 			return;
8839 	}
8840 
8841 	opts = flags->opts;
8842 
8843 	for (cnt = 0; opts[cnt].name; cnt++)
8844 		;
8845 
8846 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8847 	if (!topts)
8848 		return;
8849 
8850 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8851 			    GFP_KERNEL);
8852 	if (!tr_topts) {
8853 		kfree(topts);
8854 		return;
8855 	}
8856 
8857 	tr->topts = tr_topts;
8858 	tr->topts[tr->nr_topts].tracer = tracer;
8859 	tr->topts[tr->nr_topts].topts = topts;
8860 	tr->nr_topts++;
8861 
8862 	for (cnt = 0; opts[cnt].name; cnt++) {
8863 		create_trace_option_file(tr, &topts[cnt], flags,
8864 					 &opts[cnt]);
8865 		MEM_FAIL(topts[cnt].entry == NULL,
8866 			  "Failed to create trace option: %s",
8867 			  opts[cnt].name);
8868 	}
8869 }
8870 
8871 static struct dentry *
8872 create_trace_option_core_file(struct trace_array *tr,
8873 			      const char *option, long index)
8874 {
8875 	struct dentry *t_options;
8876 
8877 	t_options = trace_options_init_dentry(tr);
8878 	if (!t_options)
8879 		return NULL;
8880 
8881 	return trace_create_file(option, 0644, t_options,
8882 				 (void *)&tr->trace_flags_index[index],
8883 				 &trace_options_core_fops);
8884 }
8885 
8886 static void create_trace_options_dir(struct trace_array *tr)
8887 {
8888 	struct dentry *t_options;
8889 	bool top_level = tr == &global_trace;
8890 	int i;
8891 
8892 	t_options = trace_options_init_dentry(tr);
8893 	if (!t_options)
8894 		return;
8895 
8896 	for (i = 0; trace_options[i]; i++) {
8897 		if (top_level ||
8898 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8899 			create_trace_option_core_file(tr, trace_options[i], i);
8900 	}
8901 }
8902 
8903 static ssize_t
8904 rb_simple_read(struct file *filp, char __user *ubuf,
8905 	       size_t cnt, loff_t *ppos)
8906 {
8907 	struct trace_array *tr = filp->private_data;
8908 	char buf[64];
8909 	int r;
8910 
8911 	r = tracer_tracing_is_on(tr);
8912 	r = sprintf(buf, "%d\n", r);
8913 
8914 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8915 }
8916 
8917 static ssize_t
8918 rb_simple_write(struct file *filp, const char __user *ubuf,
8919 		size_t cnt, loff_t *ppos)
8920 {
8921 	struct trace_array *tr = filp->private_data;
8922 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8923 	unsigned long val;
8924 	int ret;
8925 
8926 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8927 	if (ret)
8928 		return ret;
8929 
8930 	if (buffer) {
8931 		mutex_lock(&trace_types_lock);
8932 		if (!!val == tracer_tracing_is_on(tr)) {
8933 			val = 0; /* do nothing */
8934 		} else if (val) {
8935 			tracer_tracing_on(tr);
8936 			if (tr->current_trace->start)
8937 				tr->current_trace->start(tr);
8938 		} else {
8939 			tracer_tracing_off(tr);
8940 			if (tr->current_trace->stop)
8941 				tr->current_trace->stop(tr);
8942 		}
8943 		mutex_unlock(&trace_types_lock);
8944 	}
8945 
8946 	(*ppos)++;
8947 
8948 	return cnt;
8949 }
8950 
8951 static const struct file_operations rb_simple_fops = {
8952 	.open		= tracing_open_generic_tr,
8953 	.read		= rb_simple_read,
8954 	.write		= rb_simple_write,
8955 	.release	= tracing_release_generic_tr,
8956 	.llseek		= default_llseek,
8957 };
8958 
8959 static ssize_t
8960 buffer_percent_read(struct file *filp, char __user *ubuf,
8961 		    size_t cnt, loff_t *ppos)
8962 {
8963 	struct trace_array *tr = filp->private_data;
8964 	char buf[64];
8965 	int r;
8966 
8967 	r = tr->buffer_percent;
8968 	r = sprintf(buf, "%d\n", r);
8969 
8970 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8971 }
8972 
8973 static ssize_t
8974 buffer_percent_write(struct file *filp, const char __user *ubuf,
8975 		     size_t cnt, loff_t *ppos)
8976 {
8977 	struct trace_array *tr = filp->private_data;
8978 	unsigned long val;
8979 	int ret;
8980 
8981 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8982 	if (ret)
8983 		return ret;
8984 
8985 	if (val > 100)
8986 		return -EINVAL;
8987 
8988 	if (!val)
8989 		val = 1;
8990 
8991 	tr->buffer_percent = val;
8992 
8993 	(*ppos)++;
8994 
8995 	return cnt;
8996 }
8997 
8998 static const struct file_operations buffer_percent_fops = {
8999 	.open		= tracing_open_generic_tr,
9000 	.read		= buffer_percent_read,
9001 	.write		= buffer_percent_write,
9002 	.release	= tracing_release_generic_tr,
9003 	.llseek		= default_llseek,
9004 };
9005 
9006 static struct dentry *trace_instance_dir;
9007 
9008 static void
9009 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9010 
9011 static int
9012 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9013 {
9014 	enum ring_buffer_flags rb_flags;
9015 
9016 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9017 
9018 	buf->tr = tr;
9019 
9020 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9021 	if (!buf->buffer)
9022 		return -ENOMEM;
9023 
9024 	buf->data = alloc_percpu(struct trace_array_cpu);
9025 	if (!buf->data) {
9026 		ring_buffer_free(buf->buffer);
9027 		buf->buffer = NULL;
9028 		return -ENOMEM;
9029 	}
9030 
9031 	/* Allocate the first page for all buffers */
9032 	set_buffer_entries(&tr->array_buffer,
9033 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9034 
9035 	return 0;
9036 }
9037 
9038 static int allocate_trace_buffers(struct trace_array *tr, int size)
9039 {
9040 	int ret;
9041 
9042 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9043 	if (ret)
9044 		return ret;
9045 
9046 #ifdef CONFIG_TRACER_MAX_TRACE
9047 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9048 				    allocate_snapshot ? size : 1);
9049 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9050 		ring_buffer_free(tr->array_buffer.buffer);
9051 		tr->array_buffer.buffer = NULL;
9052 		free_percpu(tr->array_buffer.data);
9053 		tr->array_buffer.data = NULL;
9054 		return -ENOMEM;
9055 	}
9056 	tr->allocated_snapshot = allocate_snapshot;
9057 
9058 	/*
9059 	 * Only the top level trace array gets its snapshot allocated
9060 	 * from the kernel command line.
9061 	 */
9062 	allocate_snapshot = false;
9063 #endif
9064 
9065 	return 0;
9066 }
9067 
9068 static void free_trace_buffer(struct array_buffer *buf)
9069 {
9070 	if (buf->buffer) {
9071 		ring_buffer_free(buf->buffer);
9072 		buf->buffer = NULL;
9073 		free_percpu(buf->data);
9074 		buf->data = NULL;
9075 	}
9076 }
9077 
9078 static void free_trace_buffers(struct trace_array *tr)
9079 {
9080 	if (!tr)
9081 		return;
9082 
9083 	free_trace_buffer(&tr->array_buffer);
9084 
9085 #ifdef CONFIG_TRACER_MAX_TRACE
9086 	free_trace_buffer(&tr->max_buffer);
9087 #endif
9088 }
9089 
9090 static void init_trace_flags_index(struct trace_array *tr)
9091 {
9092 	int i;
9093 
9094 	/* Used by the trace options files */
9095 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9096 		tr->trace_flags_index[i] = i;
9097 }
9098 
9099 static void __update_tracer_options(struct trace_array *tr)
9100 {
9101 	struct tracer *t;
9102 
9103 	for (t = trace_types; t; t = t->next)
9104 		add_tracer_options(tr, t);
9105 }
9106 
9107 static void update_tracer_options(struct trace_array *tr)
9108 {
9109 	mutex_lock(&trace_types_lock);
9110 	__update_tracer_options(tr);
9111 	mutex_unlock(&trace_types_lock);
9112 }
9113 
9114 /* Must have trace_types_lock held */
9115 struct trace_array *trace_array_find(const char *instance)
9116 {
9117 	struct trace_array *tr, *found = NULL;
9118 
9119 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9120 		if (tr->name && strcmp(tr->name, instance) == 0) {
9121 			found = tr;
9122 			break;
9123 		}
9124 	}
9125 
9126 	return found;
9127 }
9128 
9129 struct trace_array *trace_array_find_get(const char *instance)
9130 {
9131 	struct trace_array *tr;
9132 
9133 	mutex_lock(&trace_types_lock);
9134 	tr = trace_array_find(instance);
9135 	if (tr)
9136 		tr->ref++;
9137 	mutex_unlock(&trace_types_lock);
9138 
9139 	return tr;
9140 }
9141 
9142 static int trace_array_create_dir(struct trace_array *tr)
9143 {
9144 	int ret;
9145 
9146 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9147 	if (!tr->dir)
9148 		return -EINVAL;
9149 
9150 	ret = event_trace_add_tracer(tr->dir, tr);
9151 	if (ret) {
9152 		tracefs_remove(tr->dir);
9153 		return ret;
9154 	}
9155 
9156 	init_tracer_tracefs(tr, tr->dir);
9157 	__update_tracer_options(tr);
9158 
9159 	return ret;
9160 }
9161 
9162 static struct trace_array *trace_array_create(const char *name)
9163 {
9164 	struct trace_array *tr;
9165 	int ret;
9166 
9167 	ret = -ENOMEM;
9168 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9169 	if (!tr)
9170 		return ERR_PTR(ret);
9171 
9172 	tr->name = kstrdup(name, GFP_KERNEL);
9173 	if (!tr->name)
9174 		goto out_free_tr;
9175 
9176 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9177 		goto out_free_tr;
9178 
9179 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9180 
9181 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9182 
9183 	raw_spin_lock_init(&tr->start_lock);
9184 
9185 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9186 
9187 	tr->current_trace = &nop_trace;
9188 
9189 	INIT_LIST_HEAD(&tr->systems);
9190 	INIT_LIST_HEAD(&tr->events);
9191 	INIT_LIST_HEAD(&tr->hist_vars);
9192 	INIT_LIST_HEAD(&tr->err_log);
9193 
9194 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9195 		goto out_free_tr;
9196 
9197 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9198 		goto out_free_tr;
9199 
9200 	ftrace_init_trace_array(tr);
9201 
9202 	init_trace_flags_index(tr);
9203 
9204 	if (trace_instance_dir) {
9205 		ret = trace_array_create_dir(tr);
9206 		if (ret)
9207 			goto out_free_tr;
9208 	} else
9209 		__trace_early_add_events(tr);
9210 
9211 	list_add(&tr->list, &ftrace_trace_arrays);
9212 
9213 	tr->ref++;
9214 
9215 	return tr;
9216 
9217  out_free_tr:
9218 	ftrace_free_ftrace_ops(tr);
9219 	free_trace_buffers(tr);
9220 	free_cpumask_var(tr->tracing_cpumask);
9221 	kfree(tr->name);
9222 	kfree(tr);
9223 
9224 	return ERR_PTR(ret);
9225 }
9226 
9227 static int instance_mkdir(const char *name)
9228 {
9229 	struct trace_array *tr;
9230 	int ret;
9231 
9232 	mutex_lock(&event_mutex);
9233 	mutex_lock(&trace_types_lock);
9234 
9235 	ret = -EEXIST;
9236 	if (trace_array_find(name))
9237 		goto out_unlock;
9238 
9239 	tr = trace_array_create(name);
9240 
9241 	ret = PTR_ERR_OR_ZERO(tr);
9242 
9243 out_unlock:
9244 	mutex_unlock(&trace_types_lock);
9245 	mutex_unlock(&event_mutex);
9246 	return ret;
9247 }
9248 
9249 /**
9250  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9251  * @name: The name of the trace array to be looked up/created.
9252  *
9253  * Returns pointer to trace array with given name.
9254  * NULL, if it cannot be created.
9255  *
9256  * NOTE: This function increments the reference counter associated with the
9257  * trace array returned. This makes sure it cannot be freed while in use.
9258  * Use trace_array_put() once the trace array is no longer needed.
9259  * If the trace_array is to be freed, trace_array_destroy() needs to
9260  * be called after the trace_array_put(), or simply let user space delete
9261  * it from the tracefs instances directory. But until the
9262  * trace_array_put() is called, user space can not delete it.
9263  *
9264  */
9265 struct trace_array *trace_array_get_by_name(const char *name)
9266 {
9267 	struct trace_array *tr;
9268 
9269 	mutex_lock(&event_mutex);
9270 	mutex_lock(&trace_types_lock);
9271 
9272 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9273 		if (tr->name && strcmp(tr->name, name) == 0)
9274 			goto out_unlock;
9275 	}
9276 
9277 	tr = trace_array_create(name);
9278 
9279 	if (IS_ERR(tr))
9280 		tr = NULL;
9281 out_unlock:
9282 	if (tr)
9283 		tr->ref++;
9284 
9285 	mutex_unlock(&trace_types_lock);
9286 	mutex_unlock(&event_mutex);
9287 	return tr;
9288 }
9289 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9290 
9291 static int __remove_instance(struct trace_array *tr)
9292 {
9293 	int i;
9294 
9295 	/* Reference counter for a newly created trace array = 1. */
9296 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9297 		return -EBUSY;
9298 
9299 	list_del(&tr->list);
9300 
9301 	/* Disable all the flags that were enabled coming in */
9302 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9303 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9304 			set_tracer_flag(tr, 1 << i, 0);
9305 	}
9306 
9307 	tracing_set_nop(tr);
9308 	clear_ftrace_function_probes(tr);
9309 	event_trace_del_tracer(tr);
9310 	ftrace_clear_pids(tr);
9311 	ftrace_destroy_function_files(tr);
9312 	tracefs_remove(tr->dir);
9313 	free_percpu(tr->last_func_repeats);
9314 	free_trace_buffers(tr);
9315 
9316 	for (i = 0; i < tr->nr_topts; i++) {
9317 		kfree(tr->topts[i].topts);
9318 	}
9319 	kfree(tr->topts);
9320 
9321 	free_cpumask_var(tr->tracing_cpumask);
9322 	kfree(tr->name);
9323 	kfree(tr);
9324 
9325 	return 0;
9326 }
9327 
9328 int trace_array_destroy(struct trace_array *this_tr)
9329 {
9330 	struct trace_array *tr;
9331 	int ret;
9332 
9333 	if (!this_tr)
9334 		return -EINVAL;
9335 
9336 	mutex_lock(&event_mutex);
9337 	mutex_lock(&trace_types_lock);
9338 
9339 	ret = -ENODEV;
9340 
9341 	/* Making sure trace array exists before destroying it. */
9342 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9343 		if (tr == this_tr) {
9344 			ret = __remove_instance(tr);
9345 			break;
9346 		}
9347 	}
9348 
9349 	mutex_unlock(&trace_types_lock);
9350 	mutex_unlock(&event_mutex);
9351 
9352 	return ret;
9353 }
9354 EXPORT_SYMBOL_GPL(trace_array_destroy);
9355 
9356 static int instance_rmdir(const char *name)
9357 {
9358 	struct trace_array *tr;
9359 	int ret;
9360 
9361 	mutex_lock(&event_mutex);
9362 	mutex_lock(&trace_types_lock);
9363 
9364 	ret = -ENODEV;
9365 	tr = trace_array_find(name);
9366 	if (tr)
9367 		ret = __remove_instance(tr);
9368 
9369 	mutex_unlock(&trace_types_lock);
9370 	mutex_unlock(&event_mutex);
9371 
9372 	return ret;
9373 }
9374 
9375 static __init void create_trace_instances(struct dentry *d_tracer)
9376 {
9377 	struct trace_array *tr;
9378 
9379 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9380 							 instance_mkdir,
9381 							 instance_rmdir);
9382 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9383 		return;
9384 
9385 	mutex_lock(&event_mutex);
9386 	mutex_lock(&trace_types_lock);
9387 
9388 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9389 		if (!tr->name)
9390 			continue;
9391 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9392 			     "Failed to create instance directory\n"))
9393 			break;
9394 	}
9395 
9396 	mutex_unlock(&trace_types_lock);
9397 	mutex_unlock(&event_mutex);
9398 }
9399 
9400 static void
9401 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9402 {
9403 	struct trace_event_file *file;
9404 	int cpu;
9405 
9406 	trace_create_file("available_tracers", 0444, d_tracer,
9407 			tr, &show_traces_fops);
9408 
9409 	trace_create_file("current_tracer", 0644, d_tracer,
9410 			tr, &set_tracer_fops);
9411 
9412 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9413 			  tr, &tracing_cpumask_fops);
9414 
9415 	trace_create_file("trace_options", 0644, d_tracer,
9416 			  tr, &tracing_iter_fops);
9417 
9418 	trace_create_file("trace", 0644, d_tracer,
9419 			  tr, &tracing_fops);
9420 
9421 	trace_create_file("trace_pipe", 0444, d_tracer,
9422 			  tr, &tracing_pipe_fops);
9423 
9424 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9425 			  tr, &tracing_entries_fops);
9426 
9427 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9428 			  tr, &tracing_total_entries_fops);
9429 
9430 	trace_create_file("free_buffer", 0200, d_tracer,
9431 			  tr, &tracing_free_buffer_fops);
9432 
9433 	trace_create_file("trace_marker", 0220, d_tracer,
9434 			  tr, &tracing_mark_fops);
9435 
9436 	file = __find_event_file(tr, "ftrace", "print");
9437 	if (file && file->dir)
9438 		trace_create_file("trigger", 0644, file->dir, file,
9439 				  &event_trigger_fops);
9440 	tr->trace_marker_file = file;
9441 
9442 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9443 			  tr, &tracing_mark_raw_fops);
9444 
9445 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9446 			  &trace_clock_fops);
9447 
9448 	trace_create_file("tracing_on", 0644, d_tracer,
9449 			  tr, &rb_simple_fops);
9450 
9451 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9452 			  &trace_time_stamp_mode_fops);
9453 
9454 	tr->buffer_percent = 50;
9455 
9456 	trace_create_file("buffer_percent", 0444, d_tracer,
9457 			tr, &buffer_percent_fops);
9458 
9459 	create_trace_options_dir(tr);
9460 
9461 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9462 	trace_create_maxlat_file(tr, d_tracer);
9463 #endif
9464 
9465 	if (ftrace_create_function_files(tr, d_tracer))
9466 		MEM_FAIL(1, "Could not allocate function filter files");
9467 
9468 #ifdef CONFIG_TRACER_SNAPSHOT
9469 	trace_create_file("snapshot", 0644, d_tracer,
9470 			  tr, &snapshot_fops);
9471 #endif
9472 
9473 	trace_create_file("error_log", 0644, d_tracer,
9474 			  tr, &tracing_err_log_fops);
9475 
9476 	for_each_tracing_cpu(cpu)
9477 		tracing_init_tracefs_percpu(tr, cpu);
9478 
9479 	ftrace_init_tracefs(tr, d_tracer);
9480 }
9481 
9482 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9483 {
9484 	struct vfsmount *mnt;
9485 	struct file_system_type *type;
9486 
9487 	/*
9488 	 * To maintain backward compatibility for tools that mount
9489 	 * debugfs to get to the tracing facility, tracefs is automatically
9490 	 * mounted to the debugfs/tracing directory.
9491 	 */
9492 	type = get_fs_type("tracefs");
9493 	if (!type)
9494 		return NULL;
9495 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9496 	put_filesystem(type);
9497 	if (IS_ERR(mnt))
9498 		return NULL;
9499 	mntget(mnt);
9500 
9501 	return mnt;
9502 }
9503 
9504 /**
9505  * tracing_init_dentry - initialize top level trace array
9506  *
9507  * This is called when creating files or directories in the tracing
9508  * directory. It is called via fs_initcall() by any of the boot up code
9509  * and expects to return the dentry of the top level tracing directory.
9510  */
9511 int tracing_init_dentry(void)
9512 {
9513 	struct trace_array *tr = &global_trace;
9514 
9515 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9516 		pr_warn("Tracing disabled due to lockdown\n");
9517 		return -EPERM;
9518 	}
9519 
9520 	/* The top level trace array uses  NULL as parent */
9521 	if (tr->dir)
9522 		return 0;
9523 
9524 	if (WARN_ON(!tracefs_initialized()))
9525 		return -ENODEV;
9526 
9527 	/*
9528 	 * As there may still be users that expect the tracing
9529 	 * files to exist in debugfs/tracing, we must automount
9530 	 * the tracefs file system there, so older tools still
9531 	 * work with the newer kernel.
9532 	 */
9533 	tr->dir = debugfs_create_automount("tracing", NULL,
9534 					   trace_automount, NULL);
9535 
9536 	return 0;
9537 }
9538 
9539 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9540 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9541 
9542 static struct workqueue_struct *eval_map_wq __initdata;
9543 static struct work_struct eval_map_work __initdata;
9544 
9545 static void __init eval_map_work_func(struct work_struct *work)
9546 {
9547 	int len;
9548 
9549 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9550 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9551 }
9552 
9553 static int __init trace_eval_init(void)
9554 {
9555 	INIT_WORK(&eval_map_work, eval_map_work_func);
9556 
9557 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9558 	if (!eval_map_wq) {
9559 		pr_err("Unable to allocate eval_map_wq\n");
9560 		/* Do work here */
9561 		eval_map_work_func(&eval_map_work);
9562 		return -ENOMEM;
9563 	}
9564 
9565 	queue_work(eval_map_wq, &eval_map_work);
9566 	return 0;
9567 }
9568 
9569 static int __init trace_eval_sync(void)
9570 {
9571 	/* Make sure the eval map updates are finished */
9572 	if (eval_map_wq)
9573 		destroy_workqueue(eval_map_wq);
9574 	return 0;
9575 }
9576 
9577 late_initcall_sync(trace_eval_sync);
9578 
9579 
9580 #ifdef CONFIG_MODULES
9581 static void trace_module_add_evals(struct module *mod)
9582 {
9583 	if (!mod->num_trace_evals)
9584 		return;
9585 
9586 	/*
9587 	 * Modules with bad taint do not have events created, do
9588 	 * not bother with enums either.
9589 	 */
9590 	if (trace_module_has_bad_taint(mod))
9591 		return;
9592 
9593 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9594 }
9595 
9596 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9597 static void trace_module_remove_evals(struct module *mod)
9598 {
9599 	union trace_eval_map_item *map;
9600 	union trace_eval_map_item **last = &trace_eval_maps;
9601 
9602 	if (!mod->num_trace_evals)
9603 		return;
9604 
9605 	mutex_lock(&trace_eval_mutex);
9606 
9607 	map = trace_eval_maps;
9608 
9609 	while (map) {
9610 		if (map->head.mod == mod)
9611 			break;
9612 		map = trace_eval_jmp_to_tail(map);
9613 		last = &map->tail.next;
9614 		map = map->tail.next;
9615 	}
9616 	if (!map)
9617 		goto out;
9618 
9619 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9620 	kfree(map);
9621  out:
9622 	mutex_unlock(&trace_eval_mutex);
9623 }
9624 #else
9625 static inline void trace_module_remove_evals(struct module *mod) { }
9626 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9627 
9628 static int trace_module_notify(struct notifier_block *self,
9629 			       unsigned long val, void *data)
9630 {
9631 	struct module *mod = data;
9632 
9633 	switch (val) {
9634 	case MODULE_STATE_COMING:
9635 		trace_module_add_evals(mod);
9636 		break;
9637 	case MODULE_STATE_GOING:
9638 		trace_module_remove_evals(mod);
9639 		break;
9640 	}
9641 
9642 	return NOTIFY_OK;
9643 }
9644 
9645 static struct notifier_block trace_module_nb = {
9646 	.notifier_call = trace_module_notify,
9647 	.priority = 0,
9648 };
9649 #endif /* CONFIG_MODULES */
9650 
9651 static __init int tracer_init_tracefs(void)
9652 {
9653 	int ret;
9654 
9655 	trace_access_lock_init();
9656 
9657 	ret = tracing_init_dentry();
9658 	if (ret)
9659 		return 0;
9660 
9661 	event_trace_init();
9662 
9663 	init_tracer_tracefs(&global_trace, NULL);
9664 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9665 
9666 	trace_create_file("tracing_thresh", 0644, NULL,
9667 			&global_trace, &tracing_thresh_fops);
9668 
9669 	trace_create_file("README", 0444, NULL,
9670 			NULL, &tracing_readme_fops);
9671 
9672 	trace_create_file("saved_cmdlines", 0444, NULL,
9673 			NULL, &tracing_saved_cmdlines_fops);
9674 
9675 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9676 			  NULL, &tracing_saved_cmdlines_size_fops);
9677 
9678 	trace_create_file("saved_tgids", 0444, NULL,
9679 			NULL, &tracing_saved_tgids_fops);
9680 
9681 	trace_eval_init();
9682 
9683 	trace_create_eval_file(NULL);
9684 
9685 #ifdef CONFIG_MODULES
9686 	register_module_notifier(&trace_module_nb);
9687 #endif
9688 
9689 #ifdef CONFIG_DYNAMIC_FTRACE
9690 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9691 			NULL, &tracing_dyn_info_fops);
9692 #endif
9693 
9694 	create_trace_instances(NULL);
9695 
9696 	update_tracer_options(&global_trace);
9697 
9698 	return 0;
9699 }
9700 
9701 fs_initcall(tracer_init_tracefs);
9702 
9703 static int trace_panic_handler(struct notifier_block *this,
9704 			       unsigned long event, void *unused)
9705 {
9706 	if (ftrace_dump_on_oops)
9707 		ftrace_dump(ftrace_dump_on_oops);
9708 	return NOTIFY_OK;
9709 }
9710 
9711 static struct notifier_block trace_panic_notifier = {
9712 	.notifier_call  = trace_panic_handler,
9713 	.next           = NULL,
9714 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9715 };
9716 
9717 static int trace_die_handler(struct notifier_block *self,
9718 			     unsigned long val,
9719 			     void *data)
9720 {
9721 	switch (val) {
9722 	case DIE_OOPS:
9723 		if (ftrace_dump_on_oops)
9724 			ftrace_dump(ftrace_dump_on_oops);
9725 		break;
9726 	default:
9727 		break;
9728 	}
9729 	return NOTIFY_OK;
9730 }
9731 
9732 static struct notifier_block trace_die_notifier = {
9733 	.notifier_call = trace_die_handler,
9734 	.priority = 200
9735 };
9736 
9737 /*
9738  * printk is set to max of 1024, we really don't need it that big.
9739  * Nothing should be printing 1000 characters anyway.
9740  */
9741 #define TRACE_MAX_PRINT		1000
9742 
9743 /*
9744  * Define here KERN_TRACE so that we have one place to modify
9745  * it if we decide to change what log level the ftrace dump
9746  * should be at.
9747  */
9748 #define KERN_TRACE		KERN_EMERG
9749 
9750 void
9751 trace_printk_seq(struct trace_seq *s)
9752 {
9753 	/* Probably should print a warning here. */
9754 	if (s->seq.len >= TRACE_MAX_PRINT)
9755 		s->seq.len = TRACE_MAX_PRINT;
9756 
9757 	/*
9758 	 * More paranoid code. Although the buffer size is set to
9759 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9760 	 * an extra layer of protection.
9761 	 */
9762 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9763 		s->seq.len = s->seq.size - 1;
9764 
9765 	/* should be zero ended, but we are paranoid. */
9766 	s->buffer[s->seq.len] = 0;
9767 
9768 	printk(KERN_TRACE "%s", s->buffer);
9769 
9770 	trace_seq_init(s);
9771 }
9772 
9773 void trace_init_global_iter(struct trace_iterator *iter)
9774 {
9775 	iter->tr = &global_trace;
9776 	iter->trace = iter->tr->current_trace;
9777 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9778 	iter->array_buffer = &global_trace.array_buffer;
9779 
9780 	if (iter->trace && iter->trace->open)
9781 		iter->trace->open(iter);
9782 
9783 	/* Annotate start of buffers if we had overruns */
9784 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9785 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9786 
9787 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9788 	if (trace_clocks[iter->tr->clock_id].in_ns)
9789 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9790 }
9791 
9792 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9793 {
9794 	/* use static because iter can be a bit big for the stack */
9795 	static struct trace_iterator iter;
9796 	static atomic_t dump_running;
9797 	struct trace_array *tr = &global_trace;
9798 	unsigned int old_userobj;
9799 	unsigned long flags;
9800 	int cnt = 0, cpu;
9801 
9802 	/* Only allow one dump user at a time. */
9803 	if (atomic_inc_return(&dump_running) != 1) {
9804 		atomic_dec(&dump_running);
9805 		return;
9806 	}
9807 
9808 	/*
9809 	 * Always turn off tracing when we dump.
9810 	 * We don't need to show trace output of what happens
9811 	 * between multiple crashes.
9812 	 *
9813 	 * If the user does a sysrq-z, then they can re-enable
9814 	 * tracing with echo 1 > tracing_on.
9815 	 */
9816 	tracing_off();
9817 
9818 	local_irq_save(flags);
9819 	printk_nmi_direct_enter();
9820 
9821 	/* Simulate the iterator */
9822 	trace_init_global_iter(&iter);
9823 	/* Can not use kmalloc for iter.temp and iter.fmt */
9824 	iter.temp = static_temp_buf;
9825 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9826 	iter.fmt = static_fmt_buf;
9827 	iter.fmt_size = STATIC_FMT_BUF_SIZE;
9828 
9829 	for_each_tracing_cpu(cpu) {
9830 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9831 	}
9832 
9833 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9834 
9835 	/* don't look at user memory in panic mode */
9836 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9837 
9838 	switch (oops_dump_mode) {
9839 	case DUMP_ALL:
9840 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9841 		break;
9842 	case DUMP_ORIG:
9843 		iter.cpu_file = raw_smp_processor_id();
9844 		break;
9845 	case DUMP_NONE:
9846 		goto out_enable;
9847 	default:
9848 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9849 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9850 	}
9851 
9852 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9853 
9854 	/* Did function tracer already get disabled? */
9855 	if (ftrace_is_dead()) {
9856 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9857 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9858 	}
9859 
9860 	/*
9861 	 * We need to stop all tracing on all CPUS to read
9862 	 * the next buffer. This is a bit expensive, but is
9863 	 * not done often. We fill all what we can read,
9864 	 * and then release the locks again.
9865 	 */
9866 
9867 	while (!trace_empty(&iter)) {
9868 
9869 		if (!cnt)
9870 			printk(KERN_TRACE "---------------------------------\n");
9871 
9872 		cnt++;
9873 
9874 		trace_iterator_reset(&iter);
9875 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9876 
9877 		if (trace_find_next_entry_inc(&iter) != NULL) {
9878 			int ret;
9879 
9880 			ret = print_trace_line(&iter);
9881 			if (ret != TRACE_TYPE_NO_CONSUME)
9882 				trace_consume(&iter);
9883 		}
9884 		touch_nmi_watchdog();
9885 
9886 		trace_printk_seq(&iter.seq);
9887 	}
9888 
9889 	if (!cnt)
9890 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9891 	else
9892 		printk(KERN_TRACE "---------------------------------\n");
9893 
9894  out_enable:
9895 	tr->trace_flags |= old_userobj;
9896 
9897 	for_each_tracing_cpu(cpu) {
9898 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9899 	}
9900 	atomic_dec(&dump_running);
9901 	printk_nmi_direct_exit();
9902 	local_irq_restore(flags);
9903 }
9904 EXPORT_SYMBOL_GPL(ftrace_dump);
9905 
9906 #define WRITE_BUFSIZE  4096
9907 
9908 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9909 				size_t count, loff_t *ppos,
9910 				int (*createfn)(const char *))
9911 {
9912 	char *kbuf, *buf, *tmp;
9913 	int ret = 0;
9914 	size_t done = 0;
9915 	size_t size;
9916 
9917 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9918 	if (!kbuf)
9919 		return -ENOMEM;
9920 
9921 	while (done < count) {
9922 		size = count - done;
9923 
9924 		if (size >= WRITE_BUFSIZE)
9925 			size = WRITE_BUFSIZE - 1;
9926 
9927 		if (copy_from_user(kbuf, buffer + done, size)) {
9928 			ret = -EFAULT;
9929 			goto out;
9930 		}
9931 		kbuf[size] = '\0';
9932 		buf = kbuf;
9933 		do {
9934 			tmp = strchr(buf, '\n');
9935 			if (tmp) {
9936 				*tmp = '\0';
9937 				size = tmp - buf + 1;
9938 			} else {
9939 				size = strlen(buf);
9940 				if (done + size < count) {
9941 					if (buf != kbuf)
9942 						break;
9943 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9944 					pr_warn("Line length is too long: Should be less than %d\n",
9945 						WRITE_BUFSIZE - 2);
9946 					ret = -EINVAL;
9947 					goto out;
9948 				}
9949 			}
9950 			done += size;
9951 
9952 			/* Remove comments */
9953 			tmp = strchr(buf, '#');
9954 
9955 			if (tmp)
9956 				*tmp = '\0';
9957 
9958 			ret = createfn(buf);
9959 			if (ret)
9960 				goto out;
9961 			buf += size;
9962 
9963 		} while (done < count);
9964 	}
9965 	ret = done;
9966 
9967 out:
9968 	kfree(kbuf);
9969 
9970 	return ret;
9971 }
9972 
9973 __init static int tracer_alloc_buffers(void)
9974 {
9975 	int ring_buf_size;
9976 	int ret = -ENOMEM;
9977 
9978 
9979 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9980 		pr_warn("Tracing disabled due to lockdown\n");
9981 		return -EPERM;
9982 	}
9983 
9984 	/*
9985 	 * Make sure we don't accidentally add more trace options
9986 	 * than we have bits for.
9987 	 */
9988 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9989 
9990 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9991 		goto out;
9992 
9993 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9994 		goto out_free_buffer_mask;
9995 
9996 	/* Only allocate trace_printk buffers if a trace_printk exists */
9997 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9998 		/* Must be called before global_trace.buffer is allocated */
9999 		trace_printk_init_buffers();
10000 
10001 	/* To save memory, keep the ring buffer size to its minimum */
10002 	if (ring_buffer_expanded)
10003 		ring_buf_size = trace_buf_size;
10004 	else
10005 		ring_buf_size = 1;
10006 
10007 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10008 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10009 
10010 	raw_spin_lock_init(&global_trace.start_lock);
10011 
10012 	/*
10013 	 * The prepare callbacks allocates some memory for the ring buffer. We
10014 	 * don't free the buffer if the CPU goes down. If we were to free
10015 	 * the buffer, then the user would lose any trace that was in the
10016 	 * buffer. The memory will be removed once the "instance" is removed.
10017 	 */
10018 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10019 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10020 				      NULL);
10021 	if (ret < 0)
10022 		goto out_free_cpumask;
10023 	/* Used for event triggers */
10024 	ret = -ENOMEM;
10025 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10026 	if (!temp_buffer)
10027 		goto out_rm_hp_state;
10028 
10029 	if (trace_create_savedcmd() < 0)
10030 		goto out_free_temp_buffer;
10031 
10032 	/* TODO: make the number of buffers hot pluggable with CPUS */
10033 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10034 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10035 		goto out_free_savedcmd;
10036 	}
10037 
10038 	if (global_trace.buffer_disabled)
10039 		tracing_off();
10040 
10041 	if (trace_boot_clock) {
10042 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10043 		if (ret < 0)
10044 			pr_warn("Trace clock %s not defined, going back to default\n",
10045 				trace_boot_clock);
10046 	}
10047 
10048 	/*
10049 	 * register_tracer() might reference current_trace, so it
10050 	 * needs to be set before we register anything. This is
10051 	 * just a bootstrap of current_trace anyway.
10052 	 */
10053 	global_trace.current_trace = &nop_trace;
10054 
10055 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10056 
10057 	ftrace_init_global_array_ops(&global_trace);
10058 
10059 	init_trace_flags_index(&global_trace);
10060 
10061 	register_tracer(&nop_trace);
10062 
10063 	/* Function tracing may start here (via kernel command line) */
10064 	init_function_trace();
10065 
10066 	/* All seems OK, enable tracing */
10067 	tracing_disabled = 0;
10068 
10069 	atomic_notifier_chain_register(&panic_notifier_list,
10070 				       &trace_panic_notifier);
10071 
10072 	register_die_notifier(&trace_die_notifier);
10073 
10074 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10075 
10076 	INIT_LIST_HEAD(&global_trace.systems);
10077 	INIT_LIST_HEAD(&global_trace.events);
10078 	INIT_LIST_HEAD(&global_trace.hist_vars);
10079 	INIT_LIST_HEAD(&global_trace.err_log);
10080 	list_add(&global_trace.list, &ftrace_trace_arrays);
10081 
10082 	apply_trace_boot_options();
10083 
10084 	register_snapshot_cmd();
10085 
10086 	test_can_verify();
10087 
10088 	return 0;
10089 
10090 out_free_savedcmd:
10091 	free_saved_cmdlines_buffer(savedcmd);
10092 out_free_temp_buffer:
10093 	ring_buffer_free(temp_buffer);
10094 out_rm_hp_state:
10095 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10096 out_free_cpumask:
10097 	free_cpumask_var(global_trace.tracing_cpumask);
10098 out_free_buffer_mask:
10099 	free_cpumask_var(tracing_buffer_mask);
10100 out:
10101 	return ret;
10102 }
10103 
10104 void __init early_trace_init(void)
10105 {
10106 	if (tracepoint_printk) {
10107 		tracepoint_print_iter =
10108 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10109 		if (MEM_FAIL(!tracepoint_print_iter,
10110 			     "Failed to allocate trace iterator\n"))
10111 			tracepoint_printk = 0;
10112 		else
10113 			static_key_enable(&tracepoint_printk_key.key);
10114 	}
10115 	tracer_alloc_buffers();
10116 }
10117 
10118 void __init trace_init(void)
10119 {
10120 	trace_event_init();
10121 }
10122 
10123 __init static void clear_boot_tracer(void)
10124 {
10125 	/*
10126 	 * The default tracer at boot buffer is an init section.
10127 	 * This function is called in lateinit. If we did not
10128 	 * find the boot tracer, then clear it out, to prevent
10129 	 * later registration from accessing the buffer that is
10130 	 * about to be freed.
10131 	 */
10132 	if (!default_bootup_tracer)
10133 		return;
10134 
10135 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10136 	       default_bootup_tracer);
10137 	default_bootup_tracer = NULL;
10138 }
10139 
10140 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10141 __init static void tracing_set_default_clock(void)
10142 {
10143 	/* sched_clock_stable() is determined in late_initcall */
10144 	if (!trace_boot_clock && !sched_clock_stable()) {
10145 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10146 			pr_warn("Can not set tracing clock due to lockdown\n");
10147 			return;
10148 		}
10149 
10150 		printk(KERN_WARNING
10151 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10152 		       "If you want to keep using the local clock, then add:\n"
10153 		       "  \"trace_clock=local\"\n"
10154 		       "on the kernel command line\n");
10155 		tracing_set_clock(&global_trace, "global");
10156 	}
10157 }
10158 #else
10159 static inline void tracing_set_default_clock(void) { }
10160 #endif
10161 
10162 __init static int late_trace_init(void)
10163 {
10164 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10165 		static_key_disable(&tracepoint_printk_key.key);
10166 		tracepoint_printk = 0;
10167 	}
10168 
10169 	tracing_set_default_clock();
10170 	clear_boot_tracer();
10171 	return 0;
10172 }
10173 
10174 late_initcall_sync(late_trace_init);
10175