xref: /linux-6.15/include/linux/trace_events.h (revision cfd6ed45)
1 
2 #ifndef _LINUX_TRACE_EVENT_H
3 #define _LINUX_TRACE_EVENT_H
4 
5 #include <linux/ring_buffer.h>
6 #include <linux/trace_seq.h>
7 #include <linux/percpu.h>
8 #include <linux/hardirq.h>
9 #include <linux/perf_event.h>
10 #include <linux/tracepoint.h>
11 
12 struct trace_array;
13 struct trace_buffer;
14 struct tracer;
15 struct dentry;
16 struct bpf_prog;
17 
18 const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
19 				  unsigned long flags,
20 				  const struct trace_print_flags *flag_array);
21 
22 const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
23 				    const struct trace_print_flags *symbol_array);
24 
25 #if BITS_PER_LONG == 32
26 const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
27 		      unsigned long long flags,
28 		      const struct trace_print_flags_u64 *flag_array);
29 
30 const char *trace_print_symbols_seq_u64(struct trace_seq *p,
31 					unsigned long long val,
32 					const struct trace_print_flags_u64
33 								 *symbol_array);
34 #endif
35 
36 const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
37 				    unsigned int bitmask_size);
38 
39 const char *trace_print_hex_seq(struct trace_seq *p,
40 				const unsigned char *buf, int len,
41 				bool concatenate);
42 
43 const char *trace_print_array_seq(struct trace_seq *p,
44 				   const void *buf, int count,
45 				   size_t el_size);
46 
47 struct trace_iterator;
48 struct trace_event;
49 
50 int trace_raw_output_prep(struct trace_iterator *iter,
51 			  struct trace_event *event);
52 
53 /*
54  * The trace entry - the most basic unit of tracing. This is what
55  * is printed in the end as a single line in the trace output, such as:
56  *
57  *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
58  */
59 struct trace_entry {
60 	unsigned short		type;
61 	unsigned char		flags;
62 	unsigned char		preempt_count;
63 	int			pid;
64 };
65 
66 #define TRACE_EVENT_TYPE_MAX						\
67 	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
68 
69 /*
70  * Trace iterator - used by printout routines who present trace
71  * results to users and which routines might sleep, etc:
72  */
73 struct trace_iterator {
74 	struct trace_array	*tr;
75 	struct tracer		*trace;
76 	struct trace_buffer	*trace_buffer;
77 	void			*private;
78 	int			cpu_file;
79 	struct mutex		mutex;
80 	struct ring_buffer_iter	**buffer_iter;
81 	unsigned long		iter_flags;
82 
83 	/* trace_seq for __print_flags() and __print_symbolic() etc. */
84 	struct trace_seq	tmp_seq;
85 
86 	cpumask_var_t		started;
87 
88 	/* it's true when current open file is snapshot */
89 	bool			snapshot;
90 
91 	/* The below is zeroed out in pipe_read */
92 	struct trace_seq	seq;
93 	struct trace_entry	*ent;
94 	unsigned long		lost_events;
95 	int			leftover;
96 	int			ent_size;
97 	int			cpu;
98 	u64			ts;
99 
100 	loff_t			pos;
101 	long			idx;
102 
103 	/* All new field here will be zeroed out in pipe_read */
104 };
105 
106 enum trace_iter_flags {
107 	TRACE_FILE_LAT_FMT	= 1,
108 	TRACE_FILE_ANNOTATE	= 2,
109 	TRACE_FILE_TIME_IN_NS	= 4,
110 };
111 
112 
113 typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
114 				      int flags, struct trace_event *event);
115 
116 struct trace_event_functions {
117 	trace_print_func	trace;
118 	trace_print_func	raw;
119 	trace_print_func	hex;
120 	trace_print_func	binary;
121 };
122 
123 struct trace_event {
124 	struct hlist_node		node;
125 	struct list_head		list;
126 	int				type;
127 	struct trace_event_functions	*funcs;
128 };
129 
130 extern int register_trace_event(struct trace_event *event);
131 extern int unregister_trace_event(struct trace_event *event);
132 
133 /* Return values for print_line callback */
134 enum print_line_t {
135 	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
136 	TRACE_TYPE_HANDLED	= 1,
137 	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
138 	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
139 };
140 
141 /*
142  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
143  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
144  * simplifies those functions and keeps them in sync.
145  */
146 static inline enum print_line_t trace_handle_return(struct trace_seq *s)
147 {
148 	return trace_seq_has_overflowed(s) ?
149 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
150 }
151 
152 void tracing_generic_entry_update(struct trace_entry *entry,
153 				  unsigned long flags,
154 				  int pc);
155 struct trace_event_file;
156 
157 struct ring_buffer_event *
158 trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
159 				struct trace_event_file *trace_file,
160 				int type, unsigned long len,
161 				unsigned long flags, int pc);
162 
163 void tracing_record_cmdline(struct task_struct *tsk);
164 
165 int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
166 
167 struct event_filter;
168 
169 enum trace_reg {
170 	TRACE_REG_REGISTER,
171 	TRACE_REG_UNREGISTER,
172 #ifdef CONFIG_PERF_EVENTS
173 	TRACE_REG_PERF_REGISTER,
174 	TRACE_REG_PERF_UNREGISTER,
175 	TRACE_REG_PERF_OPEN,
176 	TRACE_REG_PERF_CLOSE,
177 	TRACE_REG_PERF_ADD,
178 	TRACE_REG_PERF_DEL,
179 #endif
180 };
181 
182 struct trace_event_call;
183 
184 struct trace_event_class {
185 	const char		*system;
186 	void			*probe;
187 #ifdef CONFIG_PERF_EVENTS
188 	void			*perf_probe;
189 #endif
190 	int			(*reg)(struct trace_event_call *event,
191 				       enum trace_reg type, void *data);
192 	int			(*define_fields)(struct trace_event_call *);
193 	struct list_head	*(*get_fields)(struct trace_event_call *);
194 	struct list_head	fields;
195 	int			(*raw_init)(struct trace_event_call *);
196 };
197 
198 extern int trace_event_reg(struct trace_event_call *event,
199 			    enum trace_reg type, void *data);
200 
201 struct trace_event_buffer {
202 	struct ring_buffer		*buffer;
203 	struct ring_buffer_event	*event;
204 	struct trace_event_file		*trace_file;
205 	void				*entry;
206 	unsigned long			flags;
207 	int				pc;
208 };
209 
210 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
211 				  struct trace_event_file *trace_file,
212 				  unsigned long len);
213 
214 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer);
215 
216 enum {
217 	TRACE_EVENT_FL_FILTERED_BIT,
218 	TRACE_EVENT_FL_CAP_ANY_BIT,
219 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
220 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
221 	TRACE_EVENT_FL_WAS_ENABLED_BIT,
222 	TRACE_EVENT_FL_TRACEPOINT_BIT,
223 	TRACE_EVENT_FL_KPROBE_BIT,
224 	TRACE_EVENT_FL_UPROBE_BIT,
225 };
226 
227 /*
228  * Event flags:
229  *  FILTERED	  - The event has a filter attached
230  *  CAP_ANY	  - Any user can enable for perf
231  *  NO_SET_FILTER - Set when filter has error and is to be ignored
232  *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
233  *  WAS_ENABLED   - Set and stays set when an event was ever enabled
234  *                    (used for module unloading, if a module event is enabled,
235  *                     it is best to clear the buffers that used it).
236  *  TRACEPOINT    - Event is a tracepoint
237  *  KPROBE        - Event is a kprobe
238  *  UPROBE        - Event is a uprobe
239  */
240 enum {
241 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
242 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
243 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
244 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
245 	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
246 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
247 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
248 	TRACE_EVENT_FL_UPROBE		= (1 << TRACE_EVENT_FL_UPROBE_BIT),
249 };
250 
251 #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
252 
253 struct trace_event_call {
254 	struct list_head	list;
255 	struct trace_event_class *class;
256 	union {
257 		char			*name;
258 		/* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
259 		struct tracepoint	*tp;
260 	};
261 	struct trace_event	event;
262 	char			*print_fmt;
263 	struct event_filter	*filter;
264 	void			*mod;
265 	void			*data;
266 	/*
267 	 *   bit 0:		filter_active
268 	 *   bit 1:		allow trace by non root (cap any)
269 	 *   bit 2:		failed to apply filter
270 	 *   bit 3:		trace internal event (do not enable)
271 	 *   bit 4:		Event was enabled by module
272 	 *   bit 5:		use call filter rather than file filter
273 	 *   bit 6:		Event is a tracepoint
274 	 */
275 	int			flags; /* static flags of different events */
276 
277 #ifdef CONFIG_PERF_EVENTS
278 	int				perf_refcount;
279 	struct hlist_head __percpu	*perf_events;
280 	struct bpf_prog			*prog;
281 
282 	int	(*perf_perm)(struct trace_event_call *,
283 			     struct perf_event *);
284 #endif
285 };
286 
287 static inline const char *
288 trace_event_name(struct trace_event_call *call)
289 {
290 	if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
291 		return call->tp ? call->tp->name : NULL;
292 	else
293 		return call->name;
294 }
295 
296 struct trace_array;
297 struct trace_subsystem_dir;
298 
299 enum {
300 	EVENT_FILE_FL_ENABLED_BIT,
301 	EVENT_FILE_FL_RECORDED_CMD_BIT,
302 	EVENT_FILE_FL_FILTERED_BIT,
303 	EVENT_FILE_FL_NO_SET_FILTER_BIT,
304 	EVENT_FILE_FL_SOFT_MODE_BIT,
305 	EVENT_FILE_FL_SOFT_DISABLED_BIT,
306 	EVENT_FILE_FL_TRIGGER_MODE_BIT,
307 	EVENT_FILE_FL_TRIGGER_COND_BIT,
308 	EVENT_FILE_FL_PID_FILTER_BIT,
309 };
310 
311 /*
312  * Event file flags:
313  *  ENABLED	  - The event is enabled
314  *  RECORDED_CMD  - The comms should be recorded at sched_switch
315  *  FILTERED	  - The event has a filter attached
316  *  NO_SET_FILTER - Set when filter has error and is to be ignored
317  *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
318  *  SOFT_DISABLED - When set, do not trace the event (even though its
319  *                   tracepoint may be enabled)
320  *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
321  *  TRIGGER_COND  - When set, one or more triggers has an associated filter
322  *  PID_FILTER    - When set, the event is filtered based on pid
323  */
324 enum {
325 	EVENT_FILE_FL_ENABLED		= (1 << EVENT_FILE_FL_ENABLED_BIT),
326 	EVENT_FILE_FL_RECORDED_CMD	= (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
327 	EVENT_FILE_FL_FILTERED		= (1 << EVENT_FILE_FL_FILTERED_BIT),
328 	EVENT_FILE_FL_NO_SET_FILTER	= (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
329 	EVENT_FILE_FL_SOFT_MODE		= (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
330 	EVENT_FILE_FL_SOFT_DISABLED	= (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
331 	EVENT_FILE_FL_TRIGGER_MODE	= (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
332 	EVENT_FILE_FL_TRIGGER_COND	= (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
333 	EVENT_FILE_FL_PID_FILTER	= (1 << EVENT_FILE_FL_PID_FILTER_BIT),
334 };
335 
336 struct trace_event_file {
337 	struct list_head		list;
338 	struct trace_event_call		*event_call;
339 	struct event_filter		*filter;
340 	struct dentry			*dir;
341 	struct trace_array		*tr;
342 	struct trace_subsystem_dir	*system;
343 	struct list_head		triggers;
344 
345 	/*
346 	 * 32 bit flags:
347 	 *   bit 0:		enabled
348 	 *   bit 1:		enabled cmd record
349 	 *   bit 2:		enable/disable with the soft disable bit
350 	 *   bit 3:		soft disabled
351 	 *   bit 4:		trigger enabled
352 	 *
353 	 * Note: The bits must be set atomically to prevent races
354 	 * from other writers. Reads of flags do not need to be in
355 	 * sync as they occur in critical sections. But the way flags
356 	 * is currently used, these changes do not affect the code
357 	 * except that when a change is made, it may have a slight
358 	 * delay in propagating the changes to other CPUs due to
359 	 * caching and such. Which is mostly OK ;-)
360 	 */
361 	unsigned long		flags;
362 	atomic_t		sm_ref;	/* soft-mode reference counter */
363 	atomic_t		tm_ref;	/* trigger-mode reference counter */
364 };
365 
366 #define __TRACE_EVENT_FLAGS(name, value)				\
367 	static int __init trace_init_flags_##name(void)			\
368 	{								\
369 		event_##name.flags |= value;				\
370 		return 0;						\
371 	}								\
372 	early_initcall(trace_init_flags_##name);
373 
374 #define __TRACE_EVENT_PERF_PERM(name, expr...)				\
375 	static int perf_perm_##name(struct trace_event_call *tp_event, \
376 				    struct perf_event *p_event)		\
377 	{								\
378 		return ({ expr; });					\
379 	}								\
380 	static int __init trace_init_perf_perm_##name(void)		\
381 	{								\
382 		event_##name.perf_perm = &perf_perm_##name;		\
383 		return 0;						\
384 	}								\
385 	early_initcall(trace_init_perf_perm_##name);
386 
387 #define PERF_MAX_TRACE_SIZE	2048
388 
389 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
390 
391 enum event_trigger_type {
392 	ETT_NONE		= (0),
393 	ETT_TRACE_ONOFF		= (1 << 0),
394 	ETT_SNAPSHOT		= (1 << 1),
395 	ETT_STACKTRACE		= (1 << 2),
396 	ETT_EVENT_ENABLE	= (1 << 3),
397 	ETT_EVENT_HIST		= (1 << 4),
398 	ETT_HIST_ENABLE		= (1 << 5),
399 };
400 
401 extern int filter_match_preds(struct event_filter *filter, void *rec);
402 
403 extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
404 						   void *rec);
405 extern void event_triggers_post_call(struct trace_event_file *file,
406 				     enum event_trigger_type tt,
407 				     void *rec);
408 
409 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
410 
411 /**
412  * trace_trigger_soft_disabled - do triggers and test if soft disabled
413  * @file: The file pointer of the event to test
414  *
415  * If any triggers without filters are attached to this event, they
416  * will be called here. If the event is soft disabled and has no
417  * triggers that require testing the fields, it will return true,
418  * otherwise false.
419  */
420 static inline bool
421 trace_trigger_soft_disabled(struct trace_event_file *file)
422 {
423 	unsigned long eflags = file->flags;
424 
425 	if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
426 		if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
427 			event_triggers_call(file, NULL);
428 		if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
429 			return true;
430 		if (eflags & EVENT_FILE_FL_PID_FILTER)
431 			return trace_event_ignore_this_pid(file);
432 	}
433 	return false;
434 }
435 
436 #ifdef CONFIG_BPF_EVENTS
437 unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
438 #else
439 static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
440 {
441 	return 1;
442 }
443 #endif
444 
445 enum {
446 	FILTER_OTHER = 0,
447 	FILTER_STATIC_STRING,
448 	FILTER_DYN_STRING,
449 	FILTER_PTR_STRING,
450 	FILTER_TRACE_FN,
451 	FILTER_COMM,
452 	FILTER_CPU,
453 };
454 
455 extern int trace_event_raw_init(struct trace_event_call *call);
456 extern int trace_define_field(struct trace_event_call *call, const char *type,
457 			      const char *name, int offset, int size,
458 			      int is_signed, int filter_type);
459 extern int trace_add_event_call(struct trace_event_call *call);
460 extern int trace_remove_event_call(struct trace_event_call *call);
461 extern int trace_event_get_offsets(struct trace_event_call *call);
462 
463 #define is_signed_type(type)	(((type)(-1)) < (type)1)
464 
465 int trace_set_clr_event(const char *system, const char *event, int set);
466 
467 /*
468  * The double __builtin_constant_p is because gcc will give us an error
469  * if we try to allocate the static variable to fmt if it is not a
470  * constant. Even with the outer if statement optimizing out.
471  */
472 #define event_trace_printk(ip, fmt, args...)				\
473 do {									\
474 	__trace_printk_check_format(fmt, ##args);			\
475 	tracing_record_cmdline(current);				\
476 	if (__builtin_constant_p(fmt)) {				\
477 		static const char *trace_printk_fmt			\
478 		  __attribute__((section("__trace_printk_fmt"))) =	\
479 			__builtin_constant_p(fmt) ? fmt : NULL;		\
480 									\
481 		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
482 	} else								\
483 		__trace_printk(ip, fmt, ##args);			\
484 } while (0)
485 
486 #ifdef CONFIG_PERF_EVENTS
487 struct perf_event;
488 
489 DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
490 
491 extern int  perf_trace_init(struct perf_event *event);
492 extern void perf_trace_destroy(struct perf_event *event);
493 extern int  perf_trace_add(struct perf_event *event, int flags);
494 extern void perf_trace_del(struct perf_event *event, int flags);
495 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
496 				     char *filter_str);
497 extern void ftrace_profile_free_filter(struct perf_event *event);
498 void perf_trace_buf_update(void *record, u16 type);
499 void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
500 
501 void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
502 			       struct trace_event_call *call, u64 count,
503 			       struct pt_regs *regs, struct hlist_head *head,
504 			       struct task_struct *task);
505 
506 static inline void
507 perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
508 		       u64 count, struct pt_regs *regs, void *head,
509 		       struct task_struct *task)
510 {
511 	perf_tp_event(type, count, raw_data, size, regs, head, rctx, task);
512 }
513 #endif
514 
515 #endif /* _LINUX_TRACE_EVENT_H */
516