xref: /linux-6.15/include/linux/tracepoint.h (revision c42e2f07)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _LINUX_TRACEPOINT_H
3 #define _LINUX_TRACEPOINT_H
4 
5 /*
6  * Kernel Tracepoint API.
7  *
8  * See Documentation/trace/tracepoints.rst.
9  *
10  * Copyright (C) 2008-2014 Mathieu Desnoyers <[email protected]>
11  *
12  * Heavily inspired from the Linux Kernel Markers.
13  */
14 
15 #include <linux/smp.h>
16 #include <linux/srcu.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/rcupdate.h>
20 #include <linux/tracepoint-defs.h>
21 #include <linux/static_call.h>
22 
23 struct module;
24 struct tracepoint;
25 struct notifier_block;
26 
27 struct trace_eval_map {
28 	const char		*system;
29 	const char		*eval_string;
30 	unsigned long		eval_value;
31 };
32 
33 #define TRACEPOINT_DEFAULT_PRIO	10
34 
35 extern struct srcu_struct tracepoint_srcu;
36 
37 extern int
38 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
39 extern int
40 tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
41 			       int prio);
42 extern int
43 tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data,
44 					 int prio);
45 extern int
46 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
47 static inline int
48 tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe,
49 				    void *data)
50 {
51 	return tracepoint_probe_register_prio_may_exist(tp, probe, data,
52 							TRACEPOINT_DEFAULT_PRIO);
53 }
54 extern void
55 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
56 		void *priv);
57 
58 #ifdef CONFIG_MODULES
59 struct tp_module {
60 	struct list_head list;
61 	struct module *mod;
62 };
63 
64 bool trace_module_has_bad_taint(struct module *mod);
65 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
66 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
67 #else
68 static inline bool trace_module_has_bad_taint(struct module *mod)
69 {
70 	return false;
71 }
72 static inline
73 int register_tracepoint_module_notifier(struct notifier_block *nb)
74 {
75 	return 0;
76 }
77 static inline
78 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
79 {
80 	return 0;
81 }
82 #endif /* CONFIG_MODULES */
83 
84 /*
85  * tracepoint_synchronize_unregister must be called between the last tracepoint
86  * probe unregistration and the end of module exit to make sure there is no
87  * caller executing a probe when it is freed.
88  */
89 #ifdef CONFIG_TRACEPOINTS
90 static inline void tracepoint_synchronize_unregister(void)
91 {
92 	synchronize_srcu(&tracepoint_srcu);
93 	synchronize_rcu();
94 }
95 #else
96 static inline void tracepoint_synchronize_unregister(void)
97 { }
98 #endif
99 
100 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
101 extern int syscall_regfunc(void);
102 extern void syscall_unregfunc(void);
103 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
104 
105 #ifndef PARAMS
106 #define PARAMS(args...) args
107 #endif
108 
109 #define TRACE_DEFINE_ENUM(x)
110 #define TRACE_DEFINE_SIZEOF(x)
111 
112 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
113 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
114 {
115 	return offset_to_ptr(p);
116 }
117 
118 #define __TRACEPOINT_ENTRY(name)					\
119 	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
120 	    "	.balign 4					\n"	\
121 	    "	.long 	__tracepoint_" #name " - .		\n"	\
122 	    "	.previous					\n")
123 #else
124 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
125 {
126 	return *p;
127 }
128 
129 #define __TRACEPOINT_ENTRY(name)					 \
130 	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
131 	__section("__tracepoints_ptrs") = &__tracepoint_##name
132 #endif
133 
134 #endif /* _LINUX_TRACEPOINT_H */
135 
136 /*
137  * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
138  *  file ifdef protection.
139  *  This is due to the way trace events work. If a file includes two
140  *  trace event headers under one "CREATE_TRACE_POINTS" the first include
141  *  will override the TRACE_EVENT and break the second include.
142  */
143 
144 #ifndef DECLARE_TRACE
145 
146 #define TP_PROTO(args...)	args
147 #define TP_ARGS(args...)	args
148 #define TP_CONDITION(args...)	args
149 
150 /*
151  * Individual subsystem my have a separate configuration to
152  * enable their tracepoints. By default, this file will create
153  * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem
154  * wants to be able to disable its tracepoints from being created
155  * it can define NOTRACE before including the tracepoint headers.
156  */
157 #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
158 #define TRACEPOINTS_ENABLED
159 #endif
160 
161 #ifdef TRACEPOINTS_ENABLED
162 
163 #ifdef CONFIG_HAVE_STATIC_CALL
164 #define __DO_TRACE_CALL(name, args)					\
165 	do {								\
166 		struct tracepoint_func *it_func_ptr;			\
167 		void *__data;						\
168 		it_func_ptr =						\
169 			rcu_dereference_raw((&__tracepoint_##name)->funcs); \
170 		if (it_func_ptr) {					\
171 			__data = (it_func_ptr)->data;			\
172 			static_call(tp_func_##name)(__data, args);	\
173 		}							\
174 	} while (0)
175 #else
176 #define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
177 #endif /* CONFIG_HAVE_STATIC_CALL */
178 
179 /*
180  * ARCH_WANTS_NO_INSTR archs are expected to have sanitized entry and idle
181  * code that disallow any/all tracing/instrumentation when RCU isn't watching.
182  */
183 #ifdef CONFIG_ARCH_WANTS_NO_INSTR
184 #define RCUIDLE_COND(rcuidle)	(rcuidle)
185 #else
186 /* srcu can't be used from NMI */
187 #define RCUIDLE_COND(rcuidle)	(rcuidle && in_nmi())
188 #endif
189 
190 /*
191  * it_func[0] is never NULL because there is at least one element in the array
192  * when the array itself is non NULL.
193  */
194 #define __DO_TRACE(name, args, cond, rcuidle)				\
195 	do {								\
196 		int __maybe_unused __idx = 0;				\
197 									\
198 		if (!(cond))						\
199 			return;						\
200 									\
201 		if (WARN_ONCE(RCUIDLE_COND(rcuidle),			\
202 			      "Bad RCU usage for tracepoint"))		\
203 			return;						\
204 									\
205 		/* keep srcu and sched-rcu usage consistent */		\
206 		preempt_disable_notrace();				\
207 									\
208 		/*							\
209 		 * For rcuidle callers, use srcu since sched-rcu	\
210 		 * doesn't work from the idle path.			\
211 		 */							\
212 		if (rcuidle) {						\
213 			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
214 			ct_irq_enter_irqson();				\
215 		}							\
216 									\
217 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
218 									\
219 		if (rcuidle) {						\
220 			ct_irq_exit_irqson();				\
221 			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
222 		}							\
223 									\
224 		preempt_enable_notrace();				\
225 	} while (0)
226 
227 #ifndef MODULE
228 #define __DECLARE_TRACE_RCU(name, proto, args, cond)			\
229 	static inline void trace_##name##_rcuidle(proto)		\
230 	{								\
231 		if (static_key_false(&__tracepoint_##name.key))		\
232 			__DO_TRACE(name,				\
233 				TP_ARGS(args),				\
234 				TP_CONDITION(cond), 1);			\
235 	}
236 #else
237 #define __DECLARE_TRACE_RCU(name, proto, args, cond)
238 #endif
239 
240 /*
241  * Make sure the alignment of the structure in the __tracepoints section will
242  * not add unwanted padding between the beginning of the section and the
243  * structure. Force alignment to the same alignment as the section start.
244  *
245  * When lockdep is enabled, we make sure to always test if RCU is
246  * "watching" regardless if the tracepoint is enabled or not. Tracepoints
247  * require RCU to be active, and it should always warn at the tracepoint
248  * site if it is not watching, as it will need to be active when the
249  * tracepoint is enabled.
250  */
251 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
252 	extern int __traceiter_##name(data_proto);			\
253 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
254 	extern struct tracepoint __tracepoint_##name;			\
255 	static inline void trace_##name(proto)				\
256 	{								\
257 		if (static_key_false(&__tracepoint_##name.key))		\
258 			__DO_TRACE(name,				\
259 				TP_ARGS(args),				\
260 				TP_CONDITION(cond), 0);			\
261 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
262 			WARN_ONCE(!rcu_is_watching(),			\
263 				  "RCU not watching for tracepoint");	\
264 		}							\
265 	}								\
266 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
267 			    PARAMS(cond))				\
268 	static inline int						\
269 	register_trace_##name(void (*probe)(data_proto), void *data)	\
270 	{								\
271 		return tracepoint_probe_register(&__tracepoint_##name,	\
272 						(void *)probe, data);	\
273 	}								\
274 	static inline int						\
275 	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
276 				   int prio)				\
277 	{								\
278 		return tracepoint_probe_register_prio(&__tracepoint_##name, \
279 					      (void *)probe, data, prio); \
280 	}								\
281 	static inline int						\
282 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
283 	{								\
284 		return tracepoint_probe_unregister(&__tracepoint_##name,\
285 						(void *)probe, data);	\
286 	}								\
287 	static inline void						\
288 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
289 	{								\
290 	}								\
291 	static inline bool						\
292 	trace_##name##_enabled(void)					\
293 	{								\
294 		return static_key_false(&__tracepoint_##name.key);	\
295 	}
296 
297 /*
298  * We have no guarantee that gcc and the linker won't up-align the tracepoint
299  * structures, so we create an array of pointers that will be used for iteration
300  * on the tracepoints.
301  */
302 #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
303 	static const char __tpstrtab_##_name[]				\
304 	__section("__tracepoints_strings") = #_name;			\
305 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
306 	int __traceiter_##_name(void *__data, proto);			\
307 	void __probestub_##_name(void *__data, proto);			\
308 	struct tracepoint __tracepoint_##_name	__used			\
309 	__section("__tracepoints") = {					\
310 		.name = __tpstrtab_##_name,				\
311 		.key = STATIC_KEY_INIT_FALSE,				\
312 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
313 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
314 		.iterator = &__traceiter_##_name,			\
315 		.probestub = &__probestub_##_name,			\
316 		.regfunc = _reg,					\
317 		.unregfunc = _unreg,					\
318 		.funcs = NULL };					\
319 	__TRACEPOINT_ENTRY(_name);					\
320 	int __traceiter_##_name(void *__data, proto)			\
321 	{								\
322 		struct tracepoint_func *it_func_ptr;			\
323 		void *it_func;						\
324 									\
325 		it_func_ptr =						\
326 			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
327 		if (it_func_ptr) {					\
328 			do {						\
329 				it_func = READ_ONCE((it_func_ptr)->func); \
330 				__data = (it_func_ptr)->data;		\
331 				((void(*)(void *, proto))(it_func))(__data, args); \
332 			} while ((++it_func_ptr)->func);		\
333 		}							\
334 		return 0;						\
335 	}								\
336 	void __probestub_##_name(void *__data, proto)			\
337 	{								\
338 	}								\
339 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
340 
341 #define DEFINE_TRACE(name, proto, args)		\
342 	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
343 
344 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
345 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
346 	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
347 	EXPORT_STATIC_CALL_GPL(tp_func_##name)
348 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
349 	EXPORT_SYMBOL(__tracepoint_##name);				\
350 	EXPORT_SYMBOL(__traceiter_##name);				\
351 	EXPORT_STATIC_CALL(tp_func_##name)
352 
353 
354 #else /* !TRACEPOINTS_ENABLED */
355 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
356 	static inline void trace_##name(proto)				\
357 	{ }								\
358 	static inline void trace_##name##_rcuidle(proto)		\
359 	{ }								\
360 	static inline int						\
361 	register_trace_##name(void (*probe)(data_proto),		\
362 			      void *data)				\
363 	{								\
364 		return -ENOSYS;						\
365 	}								\
366 	static inline int						\
367 	unregister_trace_##name(void (*probe)(data_proto),		\
368 				void *data)				\
369 	{								\
370 		return -ENOSYS;						\
371 	}								\
372 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
373 	{								\
374 	}								\
375 	static inline bool						\
376 	trace_##name##_enabled(void)					\
377 	{								\
378 		return false;						\
379 	}
380 
381 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
382 #define DEFINE_TRACE(name, proto, args)
383 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
384 #define EXPORT_TRACEPOINT_SYMBOL(name)
385 
386 #endif /* TRACEPOINTS_ENABLED */
387 
388 #ifdef CONFIG_TRACING
389 /**
390  * tracepoint_string - register constant persistent string to trace system
391  * @str - a constant persistent string that will be referenced in tracepoints
392  *
393  * If constant strings are being used in tracepoints, it is faster and
394  * more efficient to just save the pointer to the string and reference
395  * that with a printf "%s" instead of saving the string in the ring buffer
396  * and wasting space and time.
397  *
398  * The problem with the above approach is that userspace tools that read
399  * the binary output of the trace buffers do not have access to the string.
400  * Instead they just show the address of the string which is not very
401  * useful to users.
402  *
403  * With tracepoint_string(), the string will be registered to the tracing
404  * system and exported to userspace via the debugfs/tracing/printk_formats
405  * file that maps the string address to the string text. This way userspace
406  * tools that read the binary buffers have a way to map the pointers to
407  * the ASCII strings they represent.
408  *
409  * The @str used must be a constant string and persistent as it would not
410  * make sense to show a string that no longer exists. But it is still fine
411  * to be used with modules, because when modules are unloaded, if they
412  * had tracepoints, the ring buffers are cleared too. As long as the string
413  * does not change during the life of the module, it is fine to use
414  * tracepoint_string() within a module.
415  */
416 #define tracepoint_string(str)						\
417 	({								\
418 		static const char *___tp_str __tracepoint_string = str; \
419 		___tp_str;						\
420 	})
421 #define __tracepoint_string	__used __section("__tracepoint_str")
422 #else
423 /*
424  * tracepoint_string() is used to save the string address for userspace
425  * tracing tools. When tracing isn't configured, there's no need to save
426  * anything.
427  */
428 # define tracepoint_string(str) str
429 # define __tracepoint_string
430 #endif
431 
432 #define DECLARE_TRACE(name, proto, args)				\
433 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
434 			cpu_online(raw_smp_processor_id()),		\
435 			PARAMS(void *__data, proto))
436 
437 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
438 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
439 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
440 			PARAMS(void *__data, proto))
441 
442 #define TRACE_EVENT_FLAGS(event, flag)
443 
444 #define TRACE_EVENT_PERF_PERM(event, expr...)
445 
446 #endif /* DECLARE_TRACE */
447 
448 #ifndef TRACE_EVENT
449 /*
450  * For use with the TRACE_EVENT macro:
451  *
452  * We define a tracepoint, its arguments, its printk format
453  * and its 'fast binary record' layout.
454  *
455  * Firstly, name your tracepoint via TRACE_EVENT(name : the
456  * 'subsystem_event' notation is fine.
457  *
458  * Think about this whole construct as the
459  * 'trace_sched_switch() function' from now on.
460  *
461  *
462  *  TRACE_EVENT(sched_switch,
463  *
464  *	*
465  *	* A function has a regular function arguments
466  *	* prototype, declare it via TP_PROTO():
467  *	*
468  *
469  *	TP_PROTO(struct rq *rq, struct task_struct *prev,
470  *		 struct task_struct *next),
471  *
472  *	*
473  *	* Define the call signature of the 'function'.
474  *	* (Design sidenote: we use this instead of a
475  *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
476  *	*
477  *
478  *	TP_ARGS(rq, prev, next),
479  *
480  *	*
481  *	* Fast binary tracing: define the trace record via
482  *	* TP_STRUCT__entry(). You can think about it like a
483  *	* regular C structure local variable definition.
484  *	*
485  *	* This is how the trace record is structured and will
486  *	* be saved into the ring buffer. These are the fields
487  *	* that will be exposed to user-space in
488  *	* /sys/kernel/tracing/events/<*>/format.
489  *	*
490  *	* The declared 'local variable' is called '__entry'
491  *	*
492  *	* __field(pid_t, prev_pid) is equivalent to a standard declaration:
493  *	*
494  *	*	pid_t	prev_pid;
495  *	*
496  *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
497  *	*
498  *	*	char	prev_comm[TASK_COMM_LEN];
499  *	*
500  *
501  *	TP_STRUCT__entry(
502  *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
503  *		__field(	pid_t,	prev_pid			)
504  *		__field(	int,	prev_prio			)
505  *		__array(	char,	next_comm,	TASK_COMM_LEN	)
506  *		__field(	pid_t,	next_pid			)
507  *		__field(	int,	next_prio			)
508  *	),
509  *
510  *	*
511  *	* Assign the entry into the trace record, by embedding
512  *	* a full C statement block into TP_fast_assign(). You
513  *	* can refer to the trace record as '__entry' -
514  *	* otherwise you can put arbitrary C code in here.
515  *	*
516  *	* Note: this C code will execute every time a trace event
517  *	* happens, on an active tracepoint.
518  *	*
519  *
520  *	TP_fast_assign(
521  *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
522  *		__entry->prev_pid	= prev->pid;
523  *		__entry->prev_prio	= prev->prio;
524  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
525  *		__entry->next_pid	= next->pid;
526  *		__entry->next_prio	= next->prio;
527  *	),
528  *
529  *	*
530  *	* Formatted output of a trace record via TP_printk().
531  *	* This is how the tracepoint will appear under ftrace
532  *	* plugins that make use of this tracepoint.
533  *	*
534  *	* (raw-binary tracing wont actually perform this step.)
535  *	*
536  *
537  *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
538  *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
539  *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
540  *
541  * );
542  *
543  * This macro construct is thus used for the regular printk format
544  * tracing setup, it is used to construct a function pointer based
545  * tracepoint callback (this is used by programmatic plugins and
546  * can also by used by generic instrumentation like SystemTap), and
547  * it is also used to expose a structured trace record in
548  * /sys/kernel/tracing/events/.
549  *
550  * A set of (un)registration functions can be passed to the variant
551  * TRACE_EVENT_FN to perform any (un)registration work.
552  */
553 
554 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
555 #define DEFINE_EVENT(template, name, proto, args)		\
556 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
557 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
558 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
559 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
560 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
561 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
562 			       args, cond)			\
563 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
564 				PARAMS(args), PARAMS(cond))
565 
566 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
567 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
568 #define TRACE_EVENT_FN(name, proto, args, struct,		\
569 		assign, print, reg, unreg)			\
570 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
571 #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
572 		assign, print, reg, unreg)			\
573 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
574 			PARAMS(args), PARAMS(cond))
575 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
576 			      struct, assign, print)		\
577 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
578 				PARAMS(args), PARAMS(cond))
579 
580 #define TRACE_EVENT_FLAGS(event, flag)
581 
582 #define TRACE_EVENT_PERF_PERM(event, expr...)
583 
584 #define DECLARE_EVENT_NOP(name, proto, args)				\
585 	static inline void trace_##name(proto)				\
586 	{ }								\
587 	static inline bool trace_##name##_enabled(void)			\
588 	{								\
589 		return false;						\
590 	}
591 
592 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)	\
593 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
594 
595 #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
596 #define DEFINE_EVENT_NOP(template, name, proto, args)			\
597 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
598 
599 #endif /* ifdef TRACE_EVENT (see note above) */
600