xref: /linux-6.15/include/linux/tracepoint.h (revision c098564d)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _LINUX_TRACEPOINT_H
3 #define _LINUX_TRACEPOINT_H
4 
5 /*
6  * Kernel Tracepoint API.
7  *
8  * See Documentation/trace/tracepoints.rst.
9  *
10  * Copyright (C) 2008-2014 Mathieu Desnoyers <[email protected]>
11  *
12  * Heavily inspired from the Linux Kernel Markers.
13  */
14 
15 #include <linux/smp.h>
16 #include <linux/srcu.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/cpumask.h>
20 #include <linux/rcupdate.h>
21 #include <linux/tracepoint-defs.h>
22 #include <linux/static_call.h>
23 
24 struct module;
25 struct tracepoint;
26 struct notifier_block;
27 
28 struct trace_eval_map {
29 	const char		*system;
30 	const char		*eval_string;
31 	unsigned long		eval_value;
32 };
33 
34 #define TRACEPOINT_DEFAULT_PRIO	10
35 
36 extern struct srcu_struct tracepoint_srcu;
37 
38 extern int
39 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
40 extern int
41 tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
42 			       int prio);
43 extern int
44 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
45 extern void
46 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
47 		void *priv);
48 
49 #ifdef CONFIG_MODULES
50 struct tp_module {
51 	struct list_head list;
52 	struct module *mod;
53 };
54 
55 bool trace_module_has_bad_taint(struct module *mod);
56 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
57 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
58 #else
59 static inline bool trace_module_has_bad_taint(struct module *mod)
60 {
61 	return false;
62 }
63 static inline
64 int register_tracepoint_module_notifier(struct notifier_block *nb)
65 {
66 	return 0;
67 }
68 static inline
69 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
70 {
71 	return 0;
72 }
73 #endif /* CONFIG_MODULES */
74 
75 /*
76  * tracepoint_synchronize_unregister must be called between the last tracepoint
77  * probe unregistration and the end of module exit to make sure there is no
78  * caller executing a probe when it is freed.
79  */
80 #ifdef CONFIG_TRACEPOINTS
81 static inline void tracepoint_synchronize_unregister(void)
82 {
83 	synchronize_srcu(&tracepoint_srcu);
84 	synchronize_rcu();
85 }
86 #else
87 static inline void tracepoint_synchronize_unregister(void)
88 { }
89 #endif
90 
91 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
92 extern int syscall_regfunc(void);
93 extern void syscall_unregfunc(void);
94 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
95 
96 #ifndef PARAMS
97 #define PARAMS(args...) args
98 #endif
99 
100 #define TRACE_DEFINE_ENUM(x)
101 #define TRACE_DEFINE_SIZEOF(x)
102 
103 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
104 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
105 {
106 	return offset_to_ptr(p);
107 }
108 
109 #define __TRACEPOINT_ENTRY(name)					\
110 	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
111 	    "	.balign 4					\n"	\
112 	    "	.long 	__tracepoint_" #name " - .		\n"	\
113 	    "	.previous					\n")
114 #else
115 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
116 {
117 	return *p;
118 }
119 
120 #define __TRACEPOINT_ENTRY(name)					 \
121 	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
122 	__section("__tracepoints_ptrs") = &__tracepoint_##name
123 #endif
124 
125 #endif /* _LINUX_TRACEPOINT_H */
126 
127 /*
128  * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
129  *  file ifdef protection.
130  *  This is due to the way trace events work. If a file includes two
131  *  trace event headers under one "CREATE_TRACE_POINTS" the first include
132  *  will override the TRACE_EVENT and break the second include.
133  */
134 
135 #ifndef DECLARE_TRACE
136 
137 #define TP_PROTO(args...)	args
138 #define TP_ARGS(args...)	args
139 #define TP_CONDITION(args...)	args
140 
141 /*
142  * Individual subsystem my have a separate configuration to
143  * enable their tracepoints. By default, this file will create
144  * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem
145  * wants to be able to disable its tracepoints from being created
146  * it can define NOTRACE before including the tracepoint headers.
147  */
148 #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
149 #define TRACEPOINTS_ENABLED
150 #endif
151 
152 #ifdef TRACEPOINTS_ENABLED
153 
154 #ifdef CONFIG_HAVE_STATIC_CALL
155 #define __DO_TRACE_CALL(name, args)					\
156 	do {								\
157 		struct tracepoint_func *it_func_ptr;			\
158 		void *__data;						\
159 		it_func_ptr =						\
160 			rcu_dereference_raw((&__tracepoint_##name)->funcs); \
161 		if (it_func_ptr) {					\
162 			__data = (it_func_ptr)->data;			\
163 			static_call(tp_func_##name)(__data, args);	\
164 		}							\
165 	} while (0)
166 #else
167 #define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
168 #endif /* CONFIG_HAVE_STATIC_CALL */
169 
170 /*
171  * it_func[0] is never NULL because there is at least one element in the array
172  * when the array itself is non NULL.
173  */
174 #define __DO_TRACE(name, args, cond, rcuidle)				\
175 	do {								\
176 		int __maybe_unused __idx = 0;				\
177 									\
178 		if (!(cond))						\
179 			return;						\
180 									\
181 		/* srcu can't be used from NMI */			\
182 		WARN_ON_ONCE(rcuidle && in_nmi());			\
183 									\
184 		/* keep srcu and sched-rcu usage consistent */		\
185 		preempt_disable_notrace();				\
186 									\
187 		/*							\
188 		 * For rcuidle callers, use srcu since sched-rcu	\
189 		 * doesn't work from the idle path.			\
190 		 */							\
191 		if (rcuidle) {						\
192 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
193 			rcu_irq_enter_irqson();				\
194 		}							\
195 									\
196 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
197 									\
198 		if (rcuidle) {						\
199 			rcu_irq_exit_irqson();				\
200 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
201 		}							\
202 									\
203 		preempt_enable_notrace();				\
204 	} while (0)
205 
206 #ifndef MODULE
207 #define __DECLARE_TRACE_RCU(name, proto, args, cond)			\
208 	static inline void trace_##name##_rcuidle(proto)		\
209 	{								\
210 		if (static_key_false(&__tracepoint_##name.key))		\
211 			__DO_TRACE(name,				\
212 				TP_ARGS(args),				\
213 				TP_CONDITION(cond), 1);			\
214 	}
215 #else
216 #define __DECLARE_TRACE_RCU(name, proto, args, cond)
217 #endif
218 
219 /*
220  * Make sure the alignment of the structure in the __tracepoints section will
221  * not add unwanted padding between the beginning of the section and the
222  * structure. Force alignment to the same alignment as the section start.
223  *
224  * When lockdep is enabled, we make sure to always do the RCU portions of
225  * the tracepoint code, regardless of whether tracing is on. However,
226  * don't check if the condition is false, due to interaction with idle
227  * instrumentation. This lets us find RCU issues triggered with tracepoints
228  * even when this tracepoint is off. This code has no purpose other than
229  * poking RCU a bit.
230  */
231 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
232 	extern int __traceiter_##name(data_proto);			\
233 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
234 	extern struct tracepoint __tracepoint_##name;			\
235 	static inline void trace_##name(proto)				\
236 	{								\
237 		if (static_key_false(&__tracepoint_##name.key))		\
238 			__DO_TRACE(name,				\
239 				TP_ARGS(args),				\
240 				TP_CONDITION(cond), 0);			\
241 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
242 			rcu_read_lock_sched_notrace();			\
243 			rcu_dereference_sched(__tracepoint_##name.funcs);\
244 			rcu_read_unlock_sched_notrace();		\
245 		}							\
246 	}								\
247 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
248 			    PARAMS(cond))				\
249 	static inline int						\
250 	register_trace_##name(void (*probe)(data_proto), void *data)	\
251 	{								\
252 		return tracepoint_probe_register(&__tracepoint_##name,	\
253 						(void *)probe, data);	\
254 	}								\
255 	static inline int						\
256 	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
257 				   int prio)				\
258 	{								\
259 		return tracepoint_probe_register_prio(&__tracepoint_##name, \
260 					      (void *)probe, data, prio); \
261 	}								\
262 	static inline int						\
263 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
264 	{								\
265 		return tracepoint_probe_unregister(&__tracepoint_##name,\
266 						(void *)probe, data);	\
267 	}								\
268 	static inline void						\
269 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
270 	{								\
271 	}								\
272 	static inline bool						\
273 	trace_##name##_enabled(void)					\
274 	{								\
275 		return static_key_false(&__tracepoint_##name.key);	\
276 	}
277 
278 /*
279  * We have no guarantee that gcc and the linker won't up-align the tracepoint
280  * structures, so we create an array of pointers that will be used for iteration
281  * on the tracepoints.
282  */
283 #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
284 	static const char __tpstrtab_##_name[]				\
285 	__section("__tracepoints_strings") = #_name;			\
286 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
287 	int __traceiter_##_name(void *__data, proto);			\
288 	struct tracepoint __tracepoint_##_name	__used			\
289 	__section("__tracepoints") = {					\
290 		.name = __tpstrtab_##_name,				\
291 		.key = STATIC_KEY_INIT_FALSE,				\
292 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
293 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
294 		.iterator = &__traceiter_##_name,			\
295 		.regfunc = _reg,					\
296 		.unregfunc = _unreg,					\
297 		.funcs = NULL };					\
298 	__TRACEPOINT_ENTRY(_name);					\
299 	int __traceiter_##_name(void *__data, proto)			\
300 	{								\
301 		struct tracepoint_func *it_func_ptr;			\
302 		void *it_func;						\
303 									\
304 		it_func_ptr =						\
305 			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
306 		if (it_func_ptr) {					\
307 			do {						\
308 				it_func = READ_ONCE((it_func_ptr)->func); \
309 				__data = (it_func_ptr)->data;		\
310 				((void(*)(void *, proto))(it_func))(__data, args); \
311 			} while ((++it_func_ptr)->func);		\
312 		}							\
313 		return 0;						\
314 	}								\
315 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
316 
317 #define DEFINE_TRACE(name, proto, args)		\
318 	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
319 
320 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
321 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
322 	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
323 	EXPORT_STATIC_CALL_GPL(tp_func_##name)
324 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
325 	EXPORT_SYMBOL(__tracepoint_##name);				\
326 	EXPORT_SYMBOL(__traceiter_##name);				\
327 	EXPORT_STATIC_CALL(tp_func_##name)
328 
329 
330 #else /* !TRACEPOINTS_ENABLED */
331 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
332 	static inline void trace_##name(proto)				\
333 	{ }								\
334 	static inline void trace_##name##_rcuidle(proto)		\
335 	{ }								\
336 	static inline int						\
337 	register_trace_##name(void (*probe)(data_proto),		\
338 			      void *data)				\
339 	{								\
340 		return -ENOSYS;						\
341 	}								\
342 	static inline int						\
343 	unregister_trace_##name(void (*probe)(data_proto),		\
344 				void *data)				\
345 	{								\
346 		return -ENOSYS;						\
347 	}								\
348 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
349 	{								\
350 	}								\
351 	static inline bool						\
352 	trace_##name##_enabled(void)					\
353 	{								\
354 		return false;						\
355 	}
356 
357 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
358 #define DEFINE_TRACE(name, proto, args)
359 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
360 #define EXPORT_TRACEPOINT_SYMBOL(name)
361 
362 #endif /* TRACEPOINTS_ENABLED */
363 
364 #ifdef CONFIG_TRACING
365 /**
366  * tracepoint_string - register constant persistent string to trace system
367  * @str - a constant persistent string that will be referenced in tracepoints
368  *
369  * If constant strings are being used in tracepoints, it is faster and
370  * more efficient to just save the pointer to the string and reference
371  * that with a printf "%s" instead of saving the string in the ring buffer
372  * and wasting space and time.
373  *
374  * The problem with the above approach is that userspace tools that read
375  * the binary output of the trace buffers do not have access to the string.
376  * Instead they just show the address of the string which is not very
377  * useful to users.
378  *
379  * With tracepoint_string(), the string will be registered to the tracing
380  * system and exported to userspace via the debugfs/tracing/printk_formats
381  * file that maps the string address to the string text. This way userspace
382  * tools that read the binary buffers have a way to map the pointers to
383  * the ASCII strings they represent.
384  *
385  * The @str used must be a constant string and persistent as it would not
386  * make sense to show a string that no longer exists. But it is still fine
387  * to be used with modules, because when modules are unloaded, if they
388  * had tracepoints, the ring buffers are cleared too. As long as the string
389  * does not change during the life of the module, it is fine to use
390  * tracepoint_string() within a module.
391  */
392 #define tracepoint_string(str)						\
393 	({								\
394 		static const char *___tp_str __tracepoint_string = str; \
395 		___tp_str;						\
396 	})
397 #define __tracepoint_string	__used __section("__tracepoint_str")
398 #else
399 /*
400  * tracepoint_string() is used to save the string address for userspace
401  * tracing tools. When tracing isn't configured, there's no need to save
402  * anything.
403  */
404 # define tracepoint_string(str) str
405 # define __tracepoint_string
406 #endif
407 
408 #define DECLARE_TRACE(name, proto, args)				\
409 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
410 			cpu_online(raw_smp_processor_id()),		\
411 			PARAMS(void *__data, proto))
412 
413 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
414 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
415 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
416 			PARAMS(void *__data, proto))
417 
418 #define TRACE_EVENT_FLAGS(event, flag)
419 
420 #define TRACE_EVENT_PERF_PERM(event, expr...)
421 
422 #endif /* DECLARE_TRACE */
423 
424 #ifndef TRACE_EVENT
425 /*
426  * For use with the TRACE_EVENT macro:
427  *
428  * We define a tracepoint, its arguments, its printk format
429  * and its 'fast binary record' layout.
430  *
431  * Firstly, name your tracepoint via TRACE_EVENT(name : the
432  * 'subsystem_event' notation is fine.
433  *
434  * Think about this whole construct as the
435  * 'trace_sched_switch() function' from now on.
436  *
437  *
438  *  TRACE_EVENT(sched_switch,
439  *
440  *	*
441  *	* A function has a regular function arguments
442  *	* prototype, declare it via TP_PROTO():
443  *	*
444  *
445  *	TP_PROTO(struct rq *rq, struct task_struct *prev,
446  *		 struct task_struct *next),
447  *
448  *	*
449  *	* Define the call signature of the 'function'.
450  *	* (Design sidenote: we use this instead of a
451  *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
452  *	*
453  *
454  *	TP_ARGS(rq, prev, next),
455  *
456  *	*
457  *	* Fast binary tracing: define the trace record via
458  *	* TP_STRUCT__entry(). You can think about it like a
459  *	* regular C structure local variable definition.
460  *	*
461  *	* This is how the trace record is structured and will
462  *	* be saved into the ring buffer. These are the fields
463  *	* that will be exposed to user-space in
464  *	* /sys/kernel/debug/tracing/events/<*>/format.
465  *	*
466  *	* The declared 'local variable' is called '__entry'
467  *	*
468  *	* __field(pid_t, prev_prid) is equivalent to a standard declaration:
469  *	*
470  *	*	pid_t	prev_pid;
471  *	*
472  *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
473  *	*
474  *	*	char	prev_comm[TASK_COMM_LEN];
475  *	*
476  *
477  *	TP_STRUCT__entry(
478  *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
479  *		__field(	pid_t,	prev_pid			)
480  *		__field(	int,	prev_prio			)
481  *		__array(	char,	next_comm,	TASK_COMM_LEN	)
482  *		__field(	pid_t,	next_pid			)
483  *		__field(	int,	next_prio			)
484  *	),
485  *
486  *	*
487  *	* Assign the entry into the trace record, by embedding
488  *	* a full C statement block into TP_fast_assign(). You
489  *	* can refer to the trace record as '__entry' -
490  *	* otherwise you can put arbitrary C code in here.
491  *	*
492  *	* Note: this C code will execute every time a trace event
493  *	* happens, on an active tracepoint.
494  *	*
495  *
496  *	TP_fast_assign(
497  *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
498  *		__entry->prev_pid	= prev->pid;
499  *		__entry->prev_prio	= prev->prio;
500  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
501  *		__entry->next_pid	= next->pid;
502  *		__entry->next_prio	= next->prio;
503  *	),
504  *
505  *	*
506  *	* Formatted output of a trace record via TP_printk().
507  *	* This is how the tracepoint will appear under ftrace
508  *	* plugins that make use of this tracepoint.
509  *	*
510  *	* (raw-binary tracing wont actually perform this step.)
511  *	*
512  *
513  *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
514  *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
515  *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
516  *
517  * );
518  *
519  * This macro construct is thus used for the regular printk format
520  * tracing setup, it is used to construct a function pointer based
521  * tracepoint callback (this is used by programmatic plugins and
522  * can also by used by generic instrumentation like SystemTap), and
523  * it is also used to expose a structured trace record in
524  * /sys/kernel/debug/tracing/events/.
525  *
526  * A set of (un)registration functions can be passed to the variant
527  * TRACE_EVENT_FN to perform any (un)registration work.
528  */
529 
530 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
531 #define DEFINE_EVENT(template, name, proto, args)		\
532 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
533 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
534 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
535 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
536 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
537 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
538 			       args, cond)			\
539 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
540 				PARAMS(args), PARAMS(cond))
541 
542 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
543 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
544 #define TRACE_EVENT_FN(name, proto, args, struct,		\
545 		assign, print, reg, unreg)			\
546 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
547 #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
548 		assign, print, reg, unreg)			\
549 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
550 			PARAMS(args), PARAMS(cond))
551 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
552 			      struct, assign, print)		\
553 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
554 				PARAMS(args), PARAMS(cond))
555 
556 #define TRACE_EVENT_FLAGS(event, flag)
557 
558 #define TRACE_EVENT_PERF_PERM(event, expr...)
559 
560 #define DECLARE_EVENT_NOP(name, proto, args)				\
561 	static inline void trace_##name(proto)				\
562 	{ }								\
563 	static inline bool trace_##name##_enabled(void)			\
564 	{								\
565 		return false;						\
566 	}
567 
568 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)	\
569 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
570 
571 #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
572 #define DEFINE_EVENT_NOP(template, name, proto, args)			\
573 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
574 
575 #endif /* ifdef TRACE_EVENT (see note above) */
576