xref: /linux-6.15/include/linux/tracepoint.h (revision c8177aba)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _LINUX_TRACEPOINT_H
3 #define _LINUX_TRACEPOINT_H
4 
5 /*
6  * Kernel Tracepoint API.
7  *
8  * See Documentation/trace/tracepoints.rst.
9  *
10  * Copyright (C) 2008-2014 Mathieu Desnoyers <[email protected]>
11  *
12  * Heavily inspired from the Linux Kernel Markers.
13  */
14 
15 #include <linux/smp.h>
16 #include <linux/srcu.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/cpumask.h>
20 #include <linux/rcupdate.h>
21 #include <linux/tracepoint-defs.h>
22 #include <linux/static_call.h>
23 
24 struct module;
25 struct tracepoint;
26 struct notifier_block;
27 
28 struct trace_eval_map {
29 	const char		*system;
30 	const char		*eval_string;
31 	unsigned long		eval_value;
32 };
33 
34 #define TRACEPOINT_DEFAULT_PRIO	10
35 
36 extern struct srcu_struct tracepoint_srcu;
37 
38 extern int
39 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
40 extern int
41 tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
42 			       int prio);
43 extern int
44 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
45 extern void
46 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
47 		void *priv);
48 
49 #ifdef CONFIG_MODULES
50 struct tp_module {
51 	struct list_head list;
52 	struct module *mod;
53 };
54 
55 bool trace_module_has_bad_taint(struct module *mod);
56 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
57 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
58 #else
59 static inline bool trace_module_has_bad_taint(struct module *mod)
60 {
61 	return false;
62 }
63 static inline
64 int register_tracepoint_module_notifier(struct notifier_block *nb)
65 {
66 	return 0;
67 }
68 static inline
69 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
70 {
71 	return 0;
72 }
73 #endif /* CONFIG_MODULES */
74 
75 /*
76  * tracepoint_synchronize_unregister must be called between the last tracepoint
77  * probe unregistration and the end of module exit to make sure there is no
78  * caller executing a probe when it is freed.
79  */
80 #ifdef CONFIG_TRACEPOINTS
81 static inline void tracepoint_synchronize_unregister(void)
82 {
83 	synchronize_srcu(&tracepoint_srcu);
84 	synchronize_rcu();
85 }
86 #else
87 static inline void tracepoint_synchronize_unregister(void)
88 { }
89 #endif
90 
91 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
92 extern int syscall_regfunc(void);
93 extern void syscall_unregfunc(void);
94 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
95 
96 #ifndef PARAMS
97 #define PARAMS(args...) args
98 #endif
99 
100 #define TRACE_DEFINE_ENUM(x)
101 #define TRACE_DEFINE_SIZEOF(x)
102 
103 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
104 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
105 {
106 	return offset_to_ptr(p);
107 }
108 
109 #define __TRACEPOINT_ENTRY(name)					\
110 	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
111 	    "	.balign 4					\n"	\
112 	    "	.long 	__tracepoint_" #name " - .		\n"	\
113 	    "	.previous					\n")
114 #else
115 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
116 {
117 	return *p;
118 }
119 
120 #define __TRACEPOINT_ENTRY(name)					 \
121 	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
122 	__section("__tracepoints_ptrs") = &__tracepoint_##name
123 #endif
124 
125 #endif /* _LINUX_TRACEPOINT_H */
126 
127 /*
128  * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
129  *  file ifdef protection.
130  *  This is due to the way trace events work. If a file includes two
131  *  trace event headers under one "CREATE_TRACE_POINTS" the first include
132  *  will override the TRACE_EVENT and break the second include.
133  */
134 
135 #ifndef DECLARE_TRACE
136 
137 #define TP_PROTO(args...)	args
138 #define TP_ARGS(args...)	args
139 #define TP_CONDITION(args...)	args
140 
141 /*
142  * Individual subsystem my have a separate configuration to
143  * enable their tracepoints. By default, this file will create
144  * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem
145  * wants to be able to disable its tracepoints from being created
146  * it can define NOTRACE before including the tracepoint headers.
147  */
148 #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
149 #define TRACEPOINTS_ENABLED
150 #endif
151 
152 #ifdef TRACEPOINTS_ENABLED
153 
154 #ifdef CONFIG_HAVE_STATIC_CALL
155 #define __DO_TRACE_CALL(name)	static_call(tp_func_##name)
156 #else
157 #define __DO_TRACE_CALL(name)	__traceiter_##name
158 #endif /* CONFIG_HAVE_STATIC_CALL */
159 
160 /*
161  * it_func[0] is never NULL because there is at least one element in the array
162  * when the array itself is non NULL.
163  *
164  * Note, the proto and args passed in includes "__data" as the first parameter.
165  * The reason for this is to handle the "void" prototype. If a tracepoint
166  * has a "void" prototype, then it is invalid to declare a function
167  * as "(void *, void)".
168  */
169 #define __DO_TRACE(name, proto, args, cond, rcuidle)			\
170 	do {								\
171 		struct tracepoint_func *it_func_ptr;			\
172 		int __maybe_unused __idx = 0;				\
173 		void *__data;						\
174 									\
175 		if (!(cond))						\
176 			return;						\
177 									\
178 		/* srcu can't be used from NMI */			\
179 		WARN_ON_ONCE(rcuidle && in_nmi());			\
180 									\
181 		/* keep srcu and sched-rcu usage consistent */		\
182 		preempt_disable_notrace();				\
183 									\
184 		/*							\
185 		 * For rcuidle callers, use srcu since sched-rcu	\
186 		 * doesn't work from the idle path.			\
187 		 */							\
188 		if (rcuidle) {						\
189 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
190 			rcu_irq_enter_irqson();				\
191 		}							\
192 									\
193 		it_func_ptr =						\
194 			rcu_dereference_raw((&__tracepoint_##name)->funcs); \
195 		if (it_func_ptr) {					\
196 			__data = (it_func_ptr)->data;			\
197 			__DO_TRACE_CALL(name)(args);			\
198 		}							\
199 									\
200 		if (rcuidle) {						\
201 			rcu_irq_exit_irqson();				\
202 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
203 		}							\
204 									\
205 		preempt_enable_notrace();				\
206 	} while (0)
207 
208 #ifndef MODULE
209 #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \
210 	static inline void trace_##name##_rcuidle(proto)		\
211 	{								\
212 		if (static_key_false(&__tracepoint_##name.key))		\
213 			__DO_TRACE(name,				\
214 				TP_PROTO(data_proto),			\
215 				TP_ARGS(data_args),			\
216 				TP_CONDITION(cond), 1);			\
217 	}
218 #else
219 #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)
220 #endif
221 
222 /*
223  * Make sure the alignment of the structure in the __tracepoints section will
224  * not add unwanted padding between the beginning of the section and the
225  * structure. Force alignment to the same alignment as the section start.
226  *
227  * When lockdep is enabled, we make sure to always do the RCU portions of
228  * the tracepoint code, regardless of whether tracing is on. However,
229  * don't check if the condition is false, due to interaction with idle
230  * instrumentation. This lets us find RCU issues triggered with tracepoints
231  * even when this tracepoint is off. This code has no purpose other than
232  * poking RCU a bit.
233  */
234 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
235 	extern int __traceiter_##name(data_proto);			\
236 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
237 	extern struct tracepoint __tracepoint_##name;			\
238 	static inline void trace_##name(proto)				\
239 	{								\
240 		if (static_key_false(&__tracepoint_##name.key))		\
241 			__DO_TRACE(name,				\
242 				TP_PROTO(data_proto),			\
243 				TP_ARGS(data_args),			\
244 				TP_CONDITION(cond), 0);			\
245 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
246 			rcu_read_lock_sched_notrace();			\
247 			rcu_dereference_sched(__tracepoint_##name.funcs);\
248 			rcu_read_unlock_sched_notrace();		\
249 		}							\
250 	}								\
251 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
252 		PARAMS(cond), PARAMS(data_proto), PARAMS(data_args))	\
253 	static inline int						\
254 	register_trace_##name(void (*probe)(data_proto), void *data)	\
255 	{								\
256 		return tracepoint_probe_register(&__tracepoint_##name,	\
257 						(void *)probe, data);	\
258 	}								\
259 	static inline int						\
260 	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
261 				   int prio)				\
262 	{								\
263 		return tracepoint_probe_register_prio(&__tracepoint_##name, \
264 					      (void *)probe, data, prio); \
265 	}								\
266 	static inline int						\
267 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
268 	{								\
269 		return tracepoint_probe_unregister(&__tracepoint_##name,\
270 						(void *)probe, data);	\
271 	}								\
272 	static inline void						\
273 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
274 	{								\
275 	}								\
276 	static inline bool						\
277 	trace_##name##_enabled(void)					\
278 	{								\
279 		return static_key_false(&__tracepoint_##name.key);	\
280 	}
281 
282 /*
283  * We have no guarantee that gcc and the linker won't up-align the tracepoint
284  * structures, so we create an array of pointers that will be used for iteration
285  * on the tracepoints.
286  */
287 #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
288 	static const char __tpstrtab_##_name[]				\
289 	__section("__tracepoints_strings") = #_name;			\
290 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
291 	int __traceiter_##_name(void *__data, proto);			\
292 	struct tracepoint __tracepoint_##_name	__used			\
293 	__section("__tracepoints") = {					\
294 		.name = __tpstrtab_##_name,				\
295 		.key = STATIC_KEY_INIT_FALSE,				\
296 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
297 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
298 		.iterator = &__traceiter_##_name,			\
299 		.regfunc = _reg,					\
300 		.unregfunc = _unreg,					\
301 		.funcs = NULL };					\
302 	__TRACEPOINT_ENTRY(_name);					\
303 	int __traceiter_##_name(void *__data, proto)			\
304 	{								\
305 		struct tracepoint_func *it_func_ptr;			\
306 		void *it_func;						\
307 									\
308 		it_func_ptr =						\
309 			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
310 		if (it_func_ptr) {					\
311 			do {						\
312 				it_func = (it_func_ptr)->func;		\
313 				__data = (it_func_ptr)->data;		\
314 				((void(*)(void *, proto))(it_func))(__data, args); \
315 			} while ((++it_func_ptr)->func);		\
316 		}							\
317 		return 0;						\
318 	}								\
319 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
320 
321 #define DEFINE_TRACE(name, proto, args)		\
322 	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
323 
324 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
325 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
326 	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
327 	EXPORT_STATIC_CALL_GPL(tp_func_##name)
328 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
329 	EXPORT_SYMBOL(__tracepoint_##name);				\
330 	EXPORT_SYMBOL(__traceiter_##name);				\
331 	EXPORT_STATIC_CALL(tp_func_##name)
332 
333 
334 #else /* !TRACEPOINTS_ENABLED */
335 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
336 	static inline void trace_##name(proto)				\
337 	{ }								\
338 	static inline void trace_##name##_rcuidle(proto)		\
339 	{ }								\
340 	static inline int						\
341 	register_trace_##name(void (*probe)(data_proto),		\
342 			      void *data)				\
343 	{								\
344 		return -ENOSYS;						\
345 	}								\
346 	static inline int						\
347 	unregister_trace_##name(void (*probe)(data_proto),		\
348 				void *data)				\
349 	{								\
350 		return -ENOSYS;						\
351 	}								\
352 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
353 	{								\
354 	}								\
355 	static inline bool						\
356 	trace_##name##_enabled(void)					\
357 	{								\
358 		return false;						\
359 	}
360 
361 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
362 #define DEFINE_TRACE(name, proto, args)
363 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
364 #define EXPORT_TRACEPOINT_SYMBOL(name)
365 
366 #endif /* TRACEPOINTS_ENABLED */
367 
368 #ifdef CONFIG_TRACING
369 /**
370  * tracepoint_string - register constant persistent string to trace system
371  * @str - a constant persistent string that will be referenced in tracepoints
372  *
373  * If constant strings are being used in tracepoints, it is faster and
374  * more efficient to just save the pointer to the string and reference
375  * that with a printf "%s" instead of saving the string in the ring buffer
376  * and wasting space and time.
377  *
378  * The problem with the above approach is that userspace tools that read
379  * the binary output of the trace buffers do not have access to the string.
380  * Instead they just show the address of the string which is not very
381  * useful to users.
382  *
383  * With tracepoint_string(), the string will be registered to the tracing
384  * system and exported to userspace via the debugfs/tracing/printk_formats
385  * file that maps the string address to the string text. This way userspace
386  * tools that read the binary buffers have a way to map the pointers to
387  * the ASCII strings they represent.
388  *
389  * The @str used must be a constant string and persistent as it would not
390  * make sense to show a string that no longer exists. But it is still fine
391  * to be used with modules, because when modules are unloaded, if they
392  * had tracepoints, the ring buffers are cleared too. As long as the string
393  * does not change during the life of the module, it is fine to use
394  * tracepoint_string() within a module.
395  */
396 #define tracepoint_string(str)						\
397 	({								\
398 		static const char *___tp_str __tracepoint_string = str; \
399 		___tp_str;						\
400 	})
401 #define __tracepoint_string	__used __section("__tracepoint_str")
402 #else
403 /*
404  * tracepoint_string() is used to save the string address for userspace
405  * tracing tools. When tracing isn't configured, there's no need to save
406  * anything.
407  */
408 # define tracepoint_string(str) str
409 # define __tracepoint_string
410 #endif
411 
412 #define DECLARE_TRACE(name, proto, args)				\
413 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
414 			cpu_online(raw_smp_processor_id()),		\
415 			PARAMS(void *__data, proto),			\
416 			PARAMS(__data, args))
417 
418 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
419 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
420 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
421 			PARAMS(void *__data, proto),			\
422 			PARAMS(__data, args))
423 
424 #define TRACE_EVENT_FLAGS(event, flag)
425 
426 #define TRACE_EVENT_PERF_PERM(event, expr...)
427 
428 #endif /* DECLARE_TRACE */
429 
430 #ifndef TRACE_EVENT
431 /*
432  * For use with the TRACE_EVENT macro:
433  *
434  * We define a tracepoint, its arguments, its printk format
435  * and its 'fast binary record' layout.
436  *
437  * Firstly, name your tracepoint via TRACE_EVENT(name : the
438  * 'subsystem_event' notation is fine.
439  *
440  * Think about this whole construct as the
441  * 'trace_sched_switch() function' from now on.
442  *
443  *
444  *  TRACE_EVENT(sched_switch,
445  *
446  *	*
447  *	* A function has a regular function arguments
448  *	* prototype, declare it via TP_PROTO():
449  *	*
450  *
451  *	TP_PROTO(struct rq *rq, struct task_struct *prev,
452  *		 struct task_struct *next),
453  *
454  *	*
455  *	* Define the call signature of the 'function'.
456  *	* (Design sidenote: we use this instead of a
457  *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
458  *	*
459  *
460  *	TP_ARGS(rq, prev, next),
461  *
462  *	*
463  *	* Fast binary tracing: define the trace record via
464  *	* TP_STRUCT__entry(). You can think about it like a
465  *	* regular C structure local variable definition.
466  *	*
467  *	* This is how the trace record is structured and will
468  *	* be saved into the ring buffer. These are the fields
469  *	* that will be exposed to user-space in
470  *	* /sys/kernel/debug/tracing/events/<*>/format.
471  *	*
472  *	* The declared 'local variable' is called '__entry'
473  *	*
474  *	* __field(pid_t, prev_prid) is equivalent to a standard declariton:
475  *	*
476  *	*	pid_t	prev_pid;
477  *	*
478  *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
479  *	*
480  *	*	char	prev_comm[TASK_COMM_LEN];
481  *	*
482  *
483  *	TP_STRUCT__entry(
484  *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
485  *		__field(	pid_t,	prev_pid			)
486  *		__field(	int,	prev_prio			)
487  *		__array(	char,	next_comm,	TASK_COMM_LEN	)
488  *		__field(	pid_t,	next_pid			)
489  *		__field(	int,	next_prio			)
490  *	),
491  *
492  *	*
493  *	* Assign the entry into the trace record, by embedding
494  *	* a full C statement block into TP_fast_assign(). You
495  *	* can refer to the trace record as '__entry' -
496  *	* otherwise you can put arbitrary C code in here.
497  *	*
498  *	* Note: this C code will execute every time a trace event
499  *	* happens, on an active tracepoint.
500  *	*
501  *
502  *	TP_fast_assign(
503  *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
504  *		__entry->prev_pid	= prev->pid;
505  *		__entry->prev_prio	= prev->prio;
506  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
507  *		__entry->next_pid	= next->pid;
508  *		__entry->next_prio	= next->prio;
509  *	),
510  *
511  *	*
512  *	* Formatted output of a trace record via TP_printk().
513  *	* This is how the tracepoint will appear under ftrace
514  *	* plugins that make use of this tracepoint.
515  *	*
516  *	* (raw-binary tracing wont actually perform this step.)
517  *	*
518  *
519  *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
520  *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
521  *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
522  *
523  * );
524  *
525  * This macro construct is thus used for the regular printk format
526  * tracing setup, it is used to construct a function pointer based
527  * tracepoint callback (this is used by programmatic plugins and
528  * can also by used by generic instrumentation like SystemTap), and
529  * it is also used to expose a structured trace record in
530  * /sys/kernel/debug/tracing/events/.
531  *
532  * A set of (un)registration functions can be passed to the variant
533  * TRACE_EVENT_FN to perform any (un)registration work.
534  */
535 
536 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
537 #define DEFINE_EVENT(template, name, proto, args)		\
538 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
539 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
540 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
541 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
542 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
543 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
544 			       args, cond)			\
545 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
546 				PARAMS(args), PARAMS(cond))
547 
548 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
549 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
550 #define TRACE_EVENT_FN(name, proto, args, struct,		\
551 		assign, print, reg, unreg)			\
552 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
553 #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
554 		assign, print, reg, unreg)			\
555 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
556 			PARAMS(args), PARAMS(cond))
557 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
558 			      struct, assign, print)		\
559 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
560 				PARAMS(args), PARAMS(cond))
561 
562 #define TRACE_EVENT_FLAGS(event, flag)
563 
564 #define TRACE_EVENT_PERF_PERM(event, expr...)
565 
566 #define DECLARE_EVENT_NOP(name, proto, args)				\
567 	static inline void trace_##name(proto)				\
568 	{ }								\
569 	static inline bool trace_##name##_enabled(void)			\
570 	{								\
571 		return false;						\
572 	}
573 
574 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)	\
575 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
576 
577 #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
578 #define DEFINE_EVENT_NOP(template, name, proto, args)			\
579 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
580 
581 #endif /* ifdef TRACE_EVENT (see note above) */
582