xref: /linux-6.15/include/linux/tracepoint.h (revision bbaf1ff0)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _LINUX_TRACEPOINT_H
3 #define _LINUX_TRACEPOINT_H
4 
5 /*
6  * Kernel Tracepoint API.
7  *
8  * See Documentation/trace/tracepoints.rst.
9  *
10  * Copyright (C) 2008-2014 Mathieu Desnoyers <[email protected]>
11  *
12  * Heavily inspired from the Linux Kernel Markers.
13  */
14 
15 #include <linux/smp.h>
16 #include <linux/srcu.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/cpumask.h>
20 #include <linux/rcupdate.h>
21 #include <linux/tracepoint-defs.h>
22 #include <linux/static_call.h>
23 
24 struct module;
25 struct tracepoint;
26 struct notifier_block;
27 
28 struct trace_eval_map {
29 	const char		*system;
30 	const char		*eval_string;
31 	unsigned long		eval_value;
32 };
33 
34 #define TRACEPOINT_DEFAULT_PRIO	10
35 
36 extern struct srcu_struct tracepoint_srcu;
37 
38 extern int
39 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
40 extern int
41 tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
42 			       int prio);
43 extern int
44 tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data,
45 					 int prio);
46 extern int
47 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
48 static inline int
49 tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe,
50 				    void *data)
51 {
52 	return tracepoint_probe_register_prio_may_exist(tp, probe, data,
53 							TRACEPOINT_DEFAULT_PRIO);
54 }
55 extern void
56 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
57 		void *priv);
58 
59 #ifdef CONFIG_MODULES
60 struct tp_module {
61 	struct list_head list;
62 	struct module *mod;
63 };
64 
65 bool trace_module_has_bad_taint(struct module *mod);
66 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
67 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
68 #else
69 static inline bool trace_module_has_bad_taint(struct module *mod)
70 {
71 	return false;
72 }
73 static inline
74 int register_tracepoint_module_notifier(struct notifier_block *nb)
75 {
76 	return 0;
77 }
78 static inline
79 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
80 {
81 	return 0;
82 }
83 #endif /* CONFIG_MODULES */
84 
85 /*
86  * tracepoint_synchronize_unregister must be called between the last tracepoint
87  * probe unregistration and the end of module exit to make sure there is no
88  * caller executing a probe when it is freed.
89  */
90 #ifdef CONFIG_TRACEPOINTS
91 static inline void tracepoint_synchronize_unregister(void)
92 {
93 	synchronize_srcu(&tracepoint_srcu);
94 	synchronize_rcu();
95 }
96 #else
97 static inline void tracepoint_synchronize_unregister(void)
98 { }
99 #endif
100 
101 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
102 extern int syscall_regfunc(void);
103 extern void syscall_unregfunc(void);
104 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
105 
106 #ifndef PARAMS
107 #define PARAMS(args...) args
108 #endif
109 
110 #define TRACE_DEFINE_ENUM(x)
111 #define TRACE_DEFINE_SIZEOF(x)
112 
113 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
114 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
115 {
116 	return offset_to_ptr(p);
117 }
118 
119 #define __TRACEPOINT_ENTRY(name)					\
120 	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
121 	    "	.balign 4					\n"	\
122 	    "	.long 	__tracepoint_" #name " - .		\n"	\
123 	    "	.previous					\n")
124 #else
125 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
126 {
127 	return *p;
128 }
129 
130 #define __TRACEPOINT_ENTRY(name)					 \
131 	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
132 	__section("__tracepoints_ptrs") = &__tracepoint_##name
133 #endif
134 
135 #endif /* _LINUX_TRACEPOINT_H */
136 
137 /*
138  * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
139  *  file ifdef protection.
140  *  This is due to the way trace events work. If a file includes two
141  *  trace event headers under one "CREATE_TRACE_POINTS" the first include
142  *  will override the TRACE_EVENT and break the second include.
143  */
144 
145 #ifndef DECLARE_TRACE
146 
147 #define TP_PROTO(args...)	args
148 #define TP_ARGS(args...)	args
149 #define TP_CONDITION(args...)	args
150 
151 /*
152  * Individual subsystem my have a separate configuration to
153  * enable their tracepoints. By default, this file will create
154  * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem
155  * wants to be able to disable its tracepoints from being created
156  * it can define NOTRACE before including the tracepoint headers.
157  */
158 #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
159 #define TRACEPOINTS_ENABLED
160 #endif
161 
162 #ifdef TRACEPOINTS_ENABLED
163 
164 #ifdef CONFIG_HAVE_STATIC_CALL
165 #define __DO_TRACE_CALL(name, args)					\
166 	do {								\
167 		struct tracepoint_func *it_func_ptr;			\
168 		void *__data;						\
169 		it_func_ptr =						\
170 			rcu_dereference_raw((&__tracepoint_##name)->funcs); \
171 		if (it_func_ptr) {					\
172 			__data = (it_func_ptr)->data;			\
173 			static_call(tp_func_##name)(__data, args);	\
174 		}							\
175 	} while (0)
176 #else
177 #define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
178 #endif /* CONFIG_HAVE_STATIC_CALL */
179 
180 /*
181  * ARCH_WANTS_NO_INSTR archs are expected to have sanitized entry and idle
182  * code that disallow any/all tracing/instrumentation when RCU isn't watching.
183  */
184 #ifdef CONFIG_ARCH_WANTS_NO_INSTR
185 #define RCUIDLE_COND(rcuidle)	(rcuidle)
186 #else
187 /* srcu can't be used from NMI */
188 #define RCUIDLE_COND(rcuidle)	(rcuidle && in_nmi())
189 #endif
190 
191 /*
192  * it_func[0] is never NULL because there is at least one element in the array
193  * when the array itself is non NULL.
194  */
195 #define __DO_TRACE(name, args, cond, rcuidle)				\
196 	do {								\
197 		int __maybe_unused __idx = 0;				\
198 									\
199 		if (!(cond))						\
200 			return;						\
201 									\
202 		if (WARN_ON_ONCE(RCUIDLE_COND(rcuidle)))		\
203 			return;						\
204 									\
205 		/* keep srcu and sched-rcu usage consistent */		\
206 		preempt_disable_notrace();				\
207 									\
208 		/*							\
209 		 * For rcuidle callers, use srcu since sched-rcu	\
210 		 * doesn't work from the idle path.			\
211 		 */							\
212 		if (rcuidle) {						\
213 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
214 			ct_irq_enter_irqson();				\
215 		}							\
216 									\
217 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
218 									\
219 		if (rcuidle) {						\
220 			ct_irq_exit_irqson();				\
221 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
222 		}							\
223 									\
224 		preempt_enable_notrace();				\
225 	} while (0)
226 
227 #ifndef MODULE
228 #define __DECLARE_TRACE_RCU(name, proto, args, cond)			\
229 	static inline void trace_##name##_rcuidle(proto)		\
230 	{								\
231 		if (static_key_false(&__tracepoint_##name.key))		\
232 			__DO_TRACE(name,				\
233 				TP_ARGS(args),				\
234 				TP_CONDITION(cond), 1);			\
235 	}
236 #else
237 #define __DECLARE_TRACE_RCU(name, proto, args, cond)
238 #endif
239 
240 /*
241  * Make sure the alignment of the structure in the __tracepoints section will
242  * not add unwanted padding between the beginning of the section and the
243  * structure. Force alignment to the same alignment as the section start.
244  *
245  * When lockdep is enabled, we make sure to always test if RCU is
246  * "watching" regardless if the tracepoint is enabled or not. Tracepoints
247  * require RCU to be active, and it should always warn at the tracepoint
248  * site if it is not watching, as it will need to be active when the
249  * tracepoint is enabled.
250  */
251 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
252 	extern int __traceiter_##name(data_proto);			\
253 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
254 	extern struct tracepoint __tracepoint_##name;			\
255 	static inline void trace_##name(proto)				\
256 	{								\
257 		if (static_key_false(&__tracepoint_##name.key))		\
258 			__DO_TRACE(name,				\
259 				TP_ARGS(args),				\
260 				TP_CONDITION(cond), 0);			\
261 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
262 			WARN_ON_ONCE(!rcu_is_watching());		\
263 		}							\
264 	}								\
265 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
266 			    PARAMS(cond))				\
267 	static inline int						\
268 	register_trace_##name(void (*probe)(data_proto), void *data)	\
269 	{								\
270 		return tracepoint_probe_register(&__tracepoint_##name,	\
271 						(void *)probe, data);	\
272 	}								\
273 	static inline int						\
274 	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
275 				   int prio)				\
276 	{								\
277 		return tracepoint_probe_register_prio(&__tracepoint_##name, \
278 					      (void *)probe, data, prio); \
279 	}								\
280 	static inline int						\
281 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
282 	{								\
283 		return tracepoint_probe_unregister(&__tracepoint_##name,\
284 						(void *)probe, data);	\
285 	}								\
286 	static inline void						\
287 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
288 	{								\
289 	}								\
290 	static inline bool						\
291 	trace_##name##_enabled(void)					\
292 	{								\
293 		return static_key_false(&__tracepoint_##name.key);	\
294 	}
295 
296 /*
297  * We have no guarantee that gcc and the linker won't up-align the tracepoint
298  * structures, so we create an array of pointers that will be used for iteration
299  * on the tracepoints.
300  */
301 #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
302 	static const char __tpstrtab_##_name[]				\
303 	__section("__tracepoints_strings") = #_name;			\
304 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
305 	int __traceiter_##_name(void *__data, proto);			\
306 	struct tracepoint __tracepoint_##_name	__used			\
307 	__section("__tracepoints") = {					\
308 		.name = __tpstrtab_##_name,				\
309 		.key = STATIC_KEY_INIT_FALSE,				\
310 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
311 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
312 		.iterator = &__traceiter_##_name,			\
313 		.regfunc = _reg,					\
314 		.unregfunc = _unreg,					\
315 		.funcs = NULL };					\
316 	__TRACEPOINT_ENTRY(_name);					\
317 	int __traceiter_##_name(void *__data, proto)			\
318 	{								\
319 		struct tracepoint_func *it_func_ptr;			\
320 		void *it_func;						\
321 									\
322 		it_func_ptr =						\
323 			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
324 		if (it_func_ptr) {					\
325 			do {						\
326 				it_func = READ_ONCE((it_func_ptr)->func); \
327 				__data = (it_func_ptr)->data;		\
328 				((void(*)(void *, proto))(it_func))(__data, args); \
329 			} while ((++it_func_ptr)->func);		\
330 		}							\
331 		return 0;						\
332 	}								\
333 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
334 
335 #define DEFINE_TRACE(name, proto, args)		\
336 	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
337 
338 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
339 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
340 	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
341 	EXPORT_STATIC_CALL_GPL(tp_func_##name)
342 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
343 	EXPORT_SYMBOL(__tracepoint_##name);				\
344 	EXPORT_SYMBOL(__traceiter_##name);				\
345 	EXPORT_STATIC_CALL(tp_func_##name)
346 
347 
348 #else /* !TRACEPOINTS_ENABLED */
349 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
350 	static inline void trace_##name(proto)				\
351 	{ }								\
352 	static inline void trace_##name##_rcuidle(proto)		\
353 	{ }								\
354 	static inline int						\
355 	register_trace_##name(void (*probe)(data_proto),		\
356 			      void *data)				\
357 	{								\
358 		return -ENOSYS;						\
359 	}								\
360 	static inline int						\
361 	unregister_trace_##name(void (*probe)(data_proto),		\
362 				void *data)				\
363 	{								\
364 		return -ENOSYS;						\
365 	}								\
366 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
367 	{								\
368 	}								\
369 	static inline bool						\
370 	trace_##name##_enabled(void)					\
371 	{								\
372 		return false;						\
373 	}
374 
375 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
376 #define DEFINE_TRACE(name, proto, args)
377 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
378 #define EXPORT_TRACEPOINT_SYMBOL(name)
379 
380 #endif /* TRACEPOINTS_ENABLED */
381 
382 #ifdef CONFIG_TRACING
383 /**
384  * tracepoint_string - register constant persistent string to trace system
385  * @str - a constant persistent string that will be referenced in tracepoints
386  *
387  * If constant strings are being used in tracepoints, it is faster and
388  * more efficient to just save the pointer to the string and reference
389  * that with a printf "%s" instead of saving the string in the ring buffer
390  * and wasting space and time.
391  *
392  * The problem with the above approach is that userspace tools that read
393  * the binary output of the trace buffers do not have access to the string.
394  * Instead they just show the address of the string which is not very
395  * useful to users.
396  *
397  * With tracepoint_string(), the string will be registered to the tracing
398  * system and exported to userspace via the debugfs/tracing/printk_formats
399  * file that maps the string address to the string text. This way userspace
400  * tools that read the binary buffers have a way to map the pointers to
401  * the ASCII strings they represent.
402  *
403  * The @str used must be a constant string and persistent as it would not
404  * make sense to show a string that no longer exists. But it is still fine
405  * to be used with modules, because when modules are unloaded, if they
406  * had tracepoints, the ring buffers are cleared too. As long as the string
407  * does not change during the life of the module, it is fine to use
408  * tracepoint_string() within a module.
409  */
410 #define tracepoint_string(str)						\
411 	({								\
412 		static const char *___tp_str __tracepoint_string = str; \
413 		___tp_str;						\
414 	})
415 #define __tracepoint_string	__used __section("__tracepoint_str")
416 #else
417 /*
418  * tracepoint_string() is used to save the string address for userspace
419  * tracing tools. When tracing isn't configured, there's no need to save
420  * anything.
421  */
422 # define tracepoint_string(str) str
423 # define __tracepoint_string
424 #endif
425 
426 #define DECLARE_TRACE(name, proto, args)				\
427 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
428 			cpu_online(raw_smp_processor_id()),		\
429 			PARAMS(void *__data, proto))
430 
431 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
432 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
433 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
434 			PARAMS(void *__data, proto))
435 
436 #define TRACE_EVENT_FLAGS(event, flag)
437 
438 #define TRACE_EVENT_PERF_PERM(event, expr...)
439 
440 #endif /* DECLARE_TRACE */
441 
442 #ifndef TRACE_EVENT
443 /*
444  * For use with the TRACE_EVENT macro:
445  *
446  * We define a tracepoint, its arguments, its printk format
447  * and its 'fast binary record' layout.
448  *
449  * Firstly, name your tracepoint via TRACE_EVENT(name : the
450  * 'subsystem_event' notation is fine.
451  *
452  * Think about this whole construct as the
453  * 'trace_sched_switch() function' from now on.
454  *
455  *
456  *  TRACE_EVENT(sched_switch,
457  *
458  *	*
459  *	* A function has a regular function arguments
460  *	* prototype, declare it via TP_PROTO():
461  *	*
462  *
463  *	TP_PROTO(struct rq *rq, struct task_struct *prev,
464  *		 struct task_struct *next),
465  *
466  *	*
467  *	* Define the call signature of the 'function'.
468  *	* (Design sidenote: we use this instead of a
469  *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
470  *	*
471  *
472  *	TP_ARGS(rq, prev, next),
473  *
474  *	*
475  *	* Fast binary tracing: define the trace record via
476  *	* TP_STRUCT__entry(). You can think about it like a
477  *	* regular C structure local variable definition.
478  *	*
479  *	* This is how the trace record is structured and will
480  *	* be saved into the ring buffer. These are the fields
481  *	* that will be exposed to user-space in
482  *	* /sys/kernel/tracing/events/<*>/format.
483  *	*
484  *	* The declared 'local variable' is called '__entry'
485  *	*
486  *	* __field(pid_t, prev_pid) is equivalent to a standard declaration:
487  *	*
488  *	*	pid_t	prev_pid;
489  *	*
490  *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
491  *	*
492  *	*	char	prev_comm[TASK_COMM_LEN];
493  *	*
494  *
495  *	TP_STRUCT__entry(
496  *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
497  *		__field(	pid_t,	prev_pid			)
498  *		__field(	int,	prev_prio			)
499  *		__array(	char,	next_comm,	TASK_COMM_LEN	)
500  *		__field(	pid_t,	next_pid			)
501  *		__field(	int,	next_prio			)
502  *	),
503  *
504  *	*
505  *	* Assign the entry into the trace record, by embedding
506  *	* a full C statement block into TP_fast_assign(). You
507  *	* can refer to the trace record as '__entry' -
508  *	* otherwise you can put arbitrary C code in here.
509  *	*
510  *	* Note: this C code will execute every time a trace event
511  *	* happens, on an active tracepoint.
512  *	*
513  *
514  *	TP_fast_assign(
515  *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
516  *		__entry->prev_pid	= prev->pid;
517  *		__entry->prev_prio	= prev->prio;
518  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
519  *		__entry->next_pid	= next->pid;
520  *		__entry->next_prio	= next->prio;
521  *	),
522  *
523  *	*
524  *	* Formatted output of a trace record via TP_printk().
525  *	* This is how the tracepoint will appear under ftrace
526  *	* plugins that make use of this tracepoint.
527  *	*
528  *	* (raw-binary tracing wont actually perform this step.)
529  *	*
530  *
531  *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
532  *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
533  *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
534  *
535  * );
536  *
537  * This macro construct is thus used for the regular printk format
538  * tracing setup, it is used to construct a function pointer based
539  * tracepoint callback (this is used by programmatic plugins and
540  * can also by used by generic instrumentation like SystemTap), and
541  * it is also used to expose a structured trace record in
542  * /sys/kernel/tracing/events/.
543  *
544  * A set of (un)registration functions can be passed to the variant
545  * TRACE_EVENT_FN to perform any (un)registration work.
546  */
547 
548 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
549 #define DEFINE_EVENT(template, name, proto, args)		\
550 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
551 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
552 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
553 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
554 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
555 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
556 			       args, cond)			\
557 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
558 				PARAMS(args), PARAMS(cond))
559 
560 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
561 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
562 #define TRACE_EVENT_FN(name, proto, args, struct,		\
563 		assign, print, reg, unreg)			\
564 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
565 #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
566 		assign, print, reg, unreg)			\
567 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
568 			PARAMS(args), PARAMS(cond))
569 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
570 			      struct, assign, print)		\
571 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
572 				PARAMS(args), PARAMS(cond))
573 
574 #define TRACE_EVENT_FLAGS(event, flag)
575 
576 #define TRACE_EVENT_PERF_PERM(event, expr...)
577 
578 #define DECLARE_EVENT_NOP(name, proto, args)				\
579 	static inline void trace_##name(proto)				\
580 	{ }								\
581 	static inline bool trace_##name##_enabled(void)			\
582 	{								\
583 		return false;						\
584 	}
585 
586 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)	\
587 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
588 
589 #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
590 #define DEFINE_EVENT_NOP(template, name, proto, args)			\
591 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
592 
593 #endif /* ifdef TRACE_EVENT (see note above) */
594