xref: /linux-6.15/include/linux/trace_events.h (revision 1ce84604)
1 
2 #ifndef _LINUX_TRACE_EVENT_H
3 #define _LINUX_TRACE_EVENT_H
4 
5 #include <linux/ring_buffer.h>
6 #include <linux/trace_seq.h>
7 #include <linux/percpu.h>
8 #include <linux/hardirq.h>
9 #include <linux/perf_event.h>
10 #include <linux/tracepoint.h>
11 
12 struct trace_array;
13 struct trace_buffer;
14 struct tracer;
15 struct dentry;
16 struct bpf_prog;
17 
18 const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
19 				  unsigned long flags,
20 				  const struct trace_print_flags *flag_array);
21 
22 const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
23 				    const struct trace_print_flags *symbol_array);
24 
25 #if BITS_PER_LONG == 32
26 const char *trace_print_symbols_seq_u64(struct trace_seq *p,
27 					unsigned long long val,
28 					const struct trace_print_flags_u64
29 								 *symbol_array);
30 #endif
31 
32 const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
33 				    unsigned int bitmask_size);
34 
35 const char *trace_print_hex_seq(struct trace_seq *p,
36 				const unsigned char *buf, int len,
37 				bool spacing);
38 
39 const char *trace_print_array_seq(struct trace_seq *p,
40 				   const void *buf, int count,
41 				   size_t el_size);
42 
43 struct trace_iterator;
44 struct trace_event;
45 
46 int trace_raw_output_prep(struct trace_iterator *iter,
47 			  struct trace_event *event);
48 
49 /*
50  * The trace entry - the most basic unit of tracing. This is what
51  * is printed in the end as a single line in the trace output, such as:
52  *
53  *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
54  */
55 struct trace_entry {
56 	unsigned short		type;
57 	unsigned char		flags;
58 	unsigned char		preempt_count;
59 	int			pid;
60 };
61 
62 #define TRACE_EVENT_TYPE_MAX						\
63 	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
64 
65 /*
66  * Trace iterator - used by printout routines who present trace
67  * results to users and which routines might sleep, etc:
68  */
69 struct trace_iterator {
70 	struct trace_array	*tr;
71 	struct tracer		*trace;
72 	struct trace_buffer	*trace_buffer;
73 	void			*private;
74 	int			cpu_file;
75 	struct mutex		mutex;
76 	struct ring_buffer_iter	**buffer_iter;
77 	unsigned long		iter_flags;
78 
79 	/* trace_seq for __print_flags() and __print_symbolic() etc. */
80 	struct trace_seq	tmp_seq;
81 
82 	cpumask_var_t		started;
83 
84 	/* it's true when current open file is snapshot */
85 	bool			snapshot;
86 
87 	/* The below is zeroed out in pipe_read */
88 	struct trace_seq	seq;
89 	struct trace_entry	*ent;
90 	unsigned long		lost_events;
91 	int			leftover;
92 	int			ent_size;
93 	int			cpu;
94 	u64			ts;
95 
96 	loff_t			pos;
97 	long			idx;
98 
99 	/* All new field here will be zeroed out in pipe_read */
100 };
101 
102 enum trace_iter_flags {
103 	TRACE_FILE_LAT_FMT	= 1,
104 	TRACE_FILE_ANNOTATE	= 2,
105 	TRACE_FILE_TIME_IN_NS	= 4,
106 };
107 
108 
109 typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
110 				      int flags, struct trace_event *event);
111 
112 struct trace_event_functions {
113 	trace_print_func	trace;
114 	trace_print_func	raw;
115 	trace_print_func	hex;
116 	trace_print_func	binary;
117 };
118 
119 struct trace_event {
120 	struct hlist_node		node;
121 	struct list_head		list;
122 	int				type;
123 	struct trace_event_functions	*funcs;
124 };
125 
126 extern int register_trace_event(struct trace_event *event);
127 extern int unregister_trace_event(struct trace_event *event);
128 
129 /* Return values for print_line callback */
130 enum print_line_t {
131 	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
132 	TRACE_TYPE_HANDLED	= 1,
133 	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
134 	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
135 };
136 
137 /*
138  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
139  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
140  * simplifies those functions and keeps them in sync.
141  */
142 static inline enum print_line_t trace_handle_return(struct trace_seq *s)
143 {
144 	return trace_seq_has_overflowed(s) ?
145 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
146 }
147 
148 void tracing_generic_entry_update(struct trace_entry *entry,
149 				  unsigned long flags,
150 				  int pc);
151 struct trace_event_file;
152 
153 struct ring_buffer_event *
154 trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
155 				struct trace_event_file *trace_file,
156 				int type, unsigned long len,
157 				unsigned long flags, int pc);
158 
159 void tracing_record_cmdline(struct task_struct *tsk);
160 
161 int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
162 
163 struct event_filter;
164 
165 enum trace_reg {
166 	TRACE_REG_REGISTER,
167 	TRACE_REG_UNREGISTER,
168 #ifdef CONFIG_PERF_EVENTS
169 	TRACE_REG_PERF_REGISTER,
170 	TRACE_REG_PERF_UNREGISTER,
171 	TRACE_REG_PERF_OPEN,
172 	TRACE_REG_PERF_CLOSE,
173 	TRACE_REG_PERF_ADD,
174 	TRACE_REG_PERF_DEL,
175 #endif
176 };
177 
178 struct trace_event_call;
179 
180 struct trace_event_class {
181 	const char		*system;
182 	void			*probe;
183 #ifdef CONFIG_PERF_EVENTS
184 	void			*perf_probe;
185 #endif
186 	int			(*reg)(struct trace_event_call *event,
187 				       enum trace_reg type, void *data);
188 	int			(*define_fields)(struct trace_event_call *);
189 	struct list_head	*(*get_fields)(struct trace_event_call *);
190 	struct list_head	fields;
191 	int			(*raw_init)(struct trace_event_call *);
192 };
193 
194 extern int trace_event_reg(struct trace_event_call *event,
195 			    enum trace_reg type, void *data);
196 
197 struct trace_event_buffer {
198 	struct ring_buffer		*buffer;
199 	struct ring_buffer_event	*event;
200 	struct trace_event_file		*trace_file;
201 	void				*entry;
202 	unsigned long			flags;
203 	int				pc;
204 };
205 
206 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
207 				  struct trace_event_file *trace_file,
208 				  unsigned long len);
209 
210 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer);
211 
212 enum {
213 	TRACE_EVENT_FL_FILTERED_BIT,
214 	TRACE_EVENT_FL_CAP_ANY_BIT,
215 	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
216 	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
217 	TRACE_EVENT_FL_WAS_ENABLED_BIT,
218 	TRACE_EVENT_FL_TRACEPOINT_BIT,
219 	TRACE_EVENT_FL_KPROBE_BIT,
220 	TRACE_EVENT_FL_UPROBE_BIT,
221 };
222 
223 /*
224  * Event flags:
225  *  FILTERED	  - The event has a filter attached
226  *  CAP_ANY	  - Any user can enable for perf
227  *  NO_SET_FILTER - Set when filter has error and is to be ignored
228  *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
229  *  WAS_ENABLED   - Set and stays set when an event was ever enabled
230  *                    (used for module unloading, if a module event is enabled,
231  *                     it is best to clear the buffers that used it).
232  *  TRACEPOINT    - Event is a tracepoint
233  *  KPROBE        - Event is a kprobe
234  *  UPROBE        - Event is a uprobe
235  */
236 enum {
237 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
238 	TRACE_EVENT_FL_CAP_ANY		= (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
239 	TRACE_EVENT_FL_NO_SET_FILTER	= (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
240 	TRACE_EVENT_FL_IGNORE_ENABLE	= (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
241 	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
242 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
243 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
244 	TRACE_EVENT_FL_UPROBE		= (1 << TRACE_EVENT_FL_UPROBE_BIT),
245 };
246 
247 #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
248 
249 struct trace_event_call {
250 	struct list_head	list;
251 	struct trace_event_class *class;
252 	union {
253 		char			*name;
254 		/* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
255 		struct tracepoint	*tp;
256 	};
257 	struct trace_event	event;
258 	char			*print_fmt;
259 	struct event_filter	*filter;
260 	void			*mod;
261 	void			*data;
262 	/*
263 	 *   bit 0:		filter_active
264 	 *   bit 1:		allow trace by non root (cap any)
265 	 *   bit 2:		failed to apply filter
266 	 *   bit 3:		trace internal event (do not enable)
267 	 *   bit 4:		Event was enabled by module
268 	 *   bit 5:		use call filter rather than file filter
269 	 *   bit 6:		Event is a tracepoint
270 	 */
271 	int			flags; /* static flags of different events */
272 
273 #ifdef CONFIG_PERF_EVENTS
274 	int				perf_refcount;
275 	struct hlist_head __percpu	*perf_events;
276 	struct bpf_prog			*prog;
277 
278 	int	(*perf_perm)(struct trace_event_call *,
279 			     struct perf_event *);
280 #endif
281 };
282 
283 static inline const char *
284 trace_event_name(struct trace_event_call *call)
285 {
286 	if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
287 		return call->tp ? call->tp->name : NULL;
288 	else
289 		return call->name;
290 }
291 
292 struct trace_array;
293 struct trace_subsystem_dir;
294 
295 enum {
296 	EVENT_FILE_FL_ENABLED_BIT,
297 	EVENT_FILE_FL_RECORDED_CMD_BIT,
298 	EVENT_FILE_FL_FILTERED_BIT,
299 	EVENT_FILE_FL_NO_SET_FILTER_BIT,
300 	EVENT_FILE_FL_SOFT_MODE_BIT,
301 	EVENT_FILE_FL_SOFT_DISABLED_BIT,
302 	EVENT_FILE_FL_TRIGGER_MODE_BIT,
303 	EVENT_FILE_FL_TRIGGER_COND_BIT,
304 	EVENT_FILE_FL_PID_FILTER_BIT,
305 };
306 
307 /*
308  * Event file flags:
309  *  ENABLED	  - The event is enabled
310  *  RECORDED_CMD  - The comms should be recorded at sched_switch
311  *  FILTERED	  - The event has a filter attached
312  *  NO_SET_FILTER - Set when filter has error and is to be ignored
313  *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
314  *  SOFT_DISABLED - When set, do not trace the event (even though its
315  *                   tracepoint may be enabled)
316  *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
317  *  TRIGGER_COND  - When set, one or more triggers has an associated filter
318  *  PID_FILTER    - When set, the event is filtered based on pid
319  */
320 enum {
321 	EVENT_FILE_FL_ENABLED		= (1 << EVENT_FILE_FL_ENABLED_BIT),
322 	EVENT_FILE_FL_RECORDED_CMD	= (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
323 	EVENT_FILE_FL_FILTERED		= (1 << EVENT_FILE_FL_FILTERED_BIT),
324 	EVENT_FILE_FL_NO_SET_FILTER	= (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
325 	EVENT_FILE_FL_SOFT_MODE		= (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
326 	EVENT_FILE_FL_SOFT_DISABLED	= (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
327 	EVENT_FILE_FL_TRIGGER_MODE	= (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
328 	EVENT_FILE_FL_TRIGGER_COND	= (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
329 	EVENT_FILE_FL_PID_FILTER	= (1 << EVENT_FILE_FL_PID_FILTER_BIT),
330 };
331 
332 struct trace_event_file {
333 	struct list_head		list;
334 	struct trace_event_call		*event_call;
335 	struct event_filter		*filter;
336 	struct dentry			*dir;
337 	struct trace_array		*tr;
338 	struct trace_subsystem_dir	*system;
339 	struct list_head		triggers;
340 
341 	/*
342 	 * 32 bit flags:
343 	 *   bit 0:		enabled
344 	 *   bit 1:		enabled cmd record
345 	 *   bit 2:		enable/disable with the soft disable bit
346 	 *   bit 3:		soft disabled
347 	 *   bit 4:		trigger enabled
348 	 *
349 	 * Note: The bits must be set atomically to prevent races
350 	 * from other writers. Reads of flags do not need to be in
351 	 * sync as they occur in critical sections. But the way flags
352 	 * is currently used, these changes do not affect the code
353 	 * except that when a change is made, it may have a slight
354 	 * delay in propagating the changes to other CPUs due to
355 	 * caching and such. Which is mostly OK ;-)
356 	 */
357 	unsigned long		flags;
358 	atomic_t		sm_ref;	/* soft-mode reference counter */
359 	atomic_t		tm_ref;	/* trigger-mode reference counter */
360 };
361 
362 #define __TRACE_EVENT_FLAGS(name, value)				\
363 	static int __init trace_init_flags_##name(void)			\
364 	{								\
365 		event_##name.flags |= value;				\
366 		return 0;						\
367 	}								\
368 	early_initcall(trace_init_flags_##name);
369 
370 #define __TRACE_EVENT_PERF_PERM(name, expr...)				\
371 	static int perf_perm_##name(struct trace_event_call *tp_event, \
372 				    struct perf_event *p_event)		\
373 	{								\
374 		return ({ expr; });					\
375 	}								\
376 	static int __init trace_init_perf_perm_##name(void)		\
377 	{								\
378 		event_##name.perf_perm = &perf_perm_##name;		\
379 		return 0;						\
380 	}								\
381 	early_initcall(trace_init_perf_perm_##name);
382 
383 #define PERF_MAX_TRACE_SIZE	2048
384 
385 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
386 
387 enum event_trigger_type {
388 	ETT_NONE		= (0),
389 	ETT_TRACE_ONOFF		= (1 << 0),
390 	ETT_SNAPSHOT		= (1 << 1),
391 	ETT_STACKTRACE		= (1 << 2),
392 	ETT_EVENT_ENABLE	= (1 << 3),
393 	ETT_EVENT_HIST		= (1 << 4),
394 	ETT_HIST_ENABLE		= (1 << 5),
395 };
396 
397 extern int filter_match_preds(struct event_filter *filter, void *rec);
398 
399 extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
400 						   void *rec);
401 extern void event_triggers_post_call(struct trace_event_file *file,
402 				     enum event_trigger_type tt,
403 				     void *rec);
404 
405 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
406 
407 /**
408  * trace_trigger_soft_disabled - do triggers and test if soft disabled
409  * @file: The file pointer of the event to test
410  *
411  * If any triggers without filters are attached to this event, they
412  * will be called here. If the event is soft disabled and has no
413  * triggers that require testing the fields, it will return true,
414  * otherwise false.
415  */
416 static inline bool
417 trace_trigger_soft_disabled(struct trace_event_file *file)
418 {
419 	unsigned long eflags = file->flags;
420 
421 	if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
422 		if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
423 			event_triggers_call(file, NULL);
424 		if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
425 			return true;
426 		if (eflags & EVENT_FILE_FL_PID_FILTER)
427 			return trace_event_ignore_this_pid(file);
428 	}
429 	return false;
430 }
431 
432 #ifdef CONFIG_BPF_EVENTS
433 unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
434 #else
435 static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
436 {
437 	return 1;
438 }
439 #endif
440 
441 enum {
442 	FILTER_OTHER = 0,
443 	FILTER_STATIC_STRING,
444 	FILTER_DYN_STRING,
445 	FILTER_PTR_STRING,
446 	FILTER_TRACE_FN,
447 	FILTER_COMM,
448 	FILTER_CPU,
449 };
450 
451 extern int trace_event_raw_init(struct trace_event_call *call);
452 extern int trace_define_field(struct trace_event_call *call, const char *type,
453 			      const char *name, int offset, int size,
454 			      int is_signed, int filter_type);
455 extern int trace_add_event_call(struct trace_event_call *call);
456 extern int trace_remove_event_call(struct trace_event_call *call);
457 extern int trace_event_get_offsets(struct trace_event_call *call);
458 
459 #define is_signed_type(type)	(((type)(-1)) < (type)1)
460 
461 int trace_set_clr_event(const char *system, const char *event, int set);
462 
463 /*
464  * The double __builtin_constant_p is because gcc will give us an error
465  * if we try to allocate the static variable to fmt if it is not a
466  * constant. Even with the outer if statement optimizing out.
467  */
468 #define event_trace_printk(ip, fmt, args...)				\
469 do {									\
470 	__trace_printk_check_format(fmt, ##args);			\
471 	tracing_record_cmdline(current);				\
472 	if (__builtin_constant_p(fmt)) {				\
473 		static const char *trace_printk_fmt			\
474 		  __attribute__((section("__trace_printk_fmt"))) =	\
475 			__builtin_constant_p(fmt) ? fmt : NULL;		\
476 									\
477 		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
478 	} else								\
479 		__trace_printk(ip, fmt, ##args);			\
480 } while (0)
481 
482 #ifdef CONFIG_PERF_EVENTS
483 struct perf_event;
484 
485 DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
486 
487 extern int  perf_trace_init(struct perf_event *event);
488 extern void perf_trace_destroy(struct perf_event *event);
489 extern int  perf_trace_add(struct perf_event *event, int flags);
490 extern void perf_trace_del(struct perf_event *event, int flags);
491 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
492 				     char *filter_str);
493 extern void ftrace_profile_free_filter(struct perf_event *event);
494 void perf_trace_buf_update(void *record, u16 type);
495 void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
496 
497 void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
498 			       struct trace_event_call *call, u64 count,
499 			       struct pt_regs *regs, struct hlist_head *head,
500 			       struct task_struct *task);
501 
502 static inline void
503 perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
504 		       u64 count, struct pt_regs *regs, void *head,
505 		       struct task_struct *task)
506 {
507 	perf_tp_event(type, count, raw_data, size, regs, head, rctx, task);
508 }
509 #endif
510 
511 #endif /* _LINUX_TRACE_EVENT_H */
512