xref: /linux-6.15/include/linux/tracepoint.h (revision 48bcda68)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _LINUX_TRACEPOINT_H
3 #define _LINUX_TRACEPOINT_H
4 
5 /*
6  * Kernel Tracepoint API.
7  *
8  * See Documentation/trace/tracepoints.rst.
9  *
10  * Copyright (C) 2008-2014 Mathieu Desnoyers <[email protected]>
11  *
12  * Heavily inspired from the Linux Kernel Markers.
13  */
14 
15 #include <linux/smp.h>
16 #include <linux/srcu.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/rcupdate.h>
20 #include <linux/tracepoint-defs.h>
21 #include <linux/static_call.h>
22 
23 struct module;
24 struct tracepoint;
25 struct notifier_block;
26 
27 struct trace_eval_map {
28 	const char		*system;
29 	const char		*eval_string;
30 	unsigned long		eval_value;
31 };
32 
33 #define TRACEPOINT_DEFAULT_PRIO	10
34 
35 extern struct srcu_struct tracepoint_srcu;
36 
37 extern int
38 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
39 extern int
40 tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
41 			       int prio);
42 extern int
43 tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data,
44 					 int prio);
45 extern int
46 tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
47 static inline int
48 tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe,
49 				    void *data)
50 {
51 	return tracepoint_probe_register_prio_may_exist(tp, probe, data,
52 							TRACEPOINT_DEFAULT_PRIO);
53 }
54 extern void
55 for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
56 		void *priv);
57 
58 #ifdef CONFIG_MODULES
59 struct tp_module {
60 	struct list_head list;
61 	struct module *mod;
62 };
63 
64 bool trace_module_has_bad_taint(struct module *mod);
65 extern int register_tracepoint_module_notifier(struct notifier_block *nb);
66 extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
67 void for_each_module_tracepoint(void (*fct)(struct tracepoint *,
68 					struct module *, void *),
69 				void *priv);
70 void for_each_tracepoint_in_module(struct module *,
71 				   void (*fct)(struct tracepoint *,
72 					struct module *, void *),
73 				   void *priv);
74 #else
75 static inline bool trace_module_has_bad_taint(struct module *mod)
76 {
77 	return false;
78 }
79 static inline
80 int register_tracepoint_module_notifier(struct notifier_block *nb)
81 {
82 	return 0;
83 }
84 static inline
85 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
86 {
87 	return 0;
88 }
89 static inline
90 void for_each_module_tracepoint(void (*fct)(struct tracepoint *,
91 					struct module *, void *),
92 				void *priv)
93 {
94 }
95 static inline
96 void for_each_tracepoint_in_module(struct module *mod,
97 				   void (*fct)(struct tracepoint *,
98 					struct module *, void *),
99 				   void *priv)
100 {
101 }
102 #endif /* CONFIG_MODULES */
103 
104 /*
105  * tracepoint_synchronize_unregister must be called between the last tracepoint
106  * probe unregistration and the end of module exit to make sure there is no
107  * caller executing a probe when it is freed.
108  */
109 #ifdef CONFIG_TRACEPOINTS
110 static inline void tracepoint_synchronize_unregister(void)
111 {
112 	synchronize_srcu(&tracepoint_srcu);
113 	synchronize_rcu();
114 }
115 #else
116 static inline void tracepoint_synchronize_unregister(void)
117 { }
118 #endif
119 
120 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
121 extern int syscall_regfunc(void);
122 extern void syscall_unregfunc(void);
123 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
124 
125 #ifndef PARAMS
126 #define PARAMS(args...) args
127 #endif
128 
129 #define TRACE_DEFINE_ENUM(x)
130 #define TRACE_DEFINE_SIZEOF(x)
131 
132 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
133 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
134 {
135 	return offset_to_ptr(p);
136 }
137 
138 #define __TRACEPOINT_ENTRY(name)					\
139 	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
140 	    "	.balign 4					\n"	\
141 	    "	.long 	__tracepoint_" #name " - .		\n"	\
142 	    "	.previous					\n")
143 #else
144 static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
145 {
146 	return *p;
147 }
148 
149 #define __TRACEPOINT_ENTRY(name)					 \
150 	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
151 	__section("__tracepoints_ptrs") = &__tracepoint_##name
152 #endif
153 
154 #endif /* _LINUX_TRACEPOINT_H */
155 
156 /*
157  * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
158  *  file ifdef protection.
159  *  This is due to the way trace events work. If a file includes two
160  *  trace event headers under one "CREATE_TRACE_POINTS" the first include
161  *  will override the TRACE_EVENT and break the second include.
162  */
163 
164 #ifndef DECLARE_TRACE
165 
166 #define TP_PROTO(args...)	args
167 #define TP_ARGS(args...)	args
168 #define TP_CONDITION(args...)	args
169 
170 /*
171  * Individual subsystem my have a separate configuration to
172  * enable their tracepoints. By default, this file will create
173  * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem
174  * wants to be able to disable its tracepoints from being created
175  * it can define NOTRACE before including the tracepoint headers.
176  */
177 #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
178 #define TRACEPOINTS_ENABLED
179 #endif
180 
181 #ifdef TRACEPOINTS_ENABLED
182 
183 #ifdef CONFIG_HAVE_STATIC_CALL
184 #define __DO_TRACE_CALL(name, args)					\
185 	do {								\
186 		struct tracepoint_func *it_func_ptr;			\
187 		void *__data;						\
188 		it_func_ptr =						\
189 			rcu_dereference_raw((&__tracepoint_##name)->funcs); \
190 		if (it_func_ptr) {					\
191 			__data = (it_func_ptr)->data;			\
192 			static_call(tp_func_##name)(__data, args);	\
193 		}							\
194 	} while (0)
195 #else
196 #define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
197 #endif /* CONFIG_HAVE_STATIC_CALL */
198 
199 /*
200  * it_func[0] is never NULL because there is at least one element in the array
201  * when the array itself is non NULL.
202  */
203 #define __DO_TRACE(name, args, cond)					\
204 	do {								\
205 		int __maybe_unused __idx = 0;				\
206 									\
207 		if (!(cond))						\
208 			return;						\
209 									\
210 		/* keep srcu and sched-rcu usage consistent */		\
211 		preempt_disable_notrace();				\
212 									\
213 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
214 									\
215 		preempt_enable_notrace();				\
216 	} while (0)
217 
218 /*
219  * Make sure the alignment of the structure in the __tracepoints section will
220  * not add unwanted padding between the beginning of the section and the
221  * structure. Force alignment to the same alignment as the section start.
222  *
223  * When lockdep is enabled, we make sure to always test if RCU is
224  * "watching" regardless if the tracepoint is enabled or not. Tracepoints
225  * require RCU to be active, and it should always warn at the tracepoint
226  * site if it is not watching, as it will need to be active when the
227  * tracepoint is enabled.
228  */
229 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
230 	extern int __traceiter_##name(data_proto);			\
231 	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
232 	extern struct tracepoint __tracepoint_##name;			\
233 	static inline void trace_##name(proto)				\
234 	{								\
235 		if (static_branch_unlikely(&__tracepoint_##name.key))	\
236 			__DO_TRACE(name,				\
237 				TP_ARGS(args),				\
238 				TP_CONDITION(cond));			\
239 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
240 			WARN_ONCE(!rcu_is_watching(),			\
241 				  "RCU not watching for tracepoint");	\
242 		}							\
243 	}								\
244 	static inline int						\
245 	register_trace_##name(void (*probe)(data_proto), void *data)	\
246 	{								\
247 		return tracepoint_probe_register(&__tracepoint_##name,	\
248 						(void *)probe, data);	\
249 	}								\
250 	static inline int						\
251 	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
252 				   int prio)				\
253 	{								\
254 		return tracepoint_probe_register_prio(&__tracepoint_##name, \
255 					      (void *)probe, data, prio); \
256 	}								\
257 	static inline int						\
258 	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
259 	{								\
260 		return tracepoint_probe_unregister(&__tracepoint_##name,\
261 						(void *)probe, data);	\
262 	}								\
263 	static inline void						\
264 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
265 	{								\
266 	}								\
267 	static inline bool						\
268 	trace_##name##_enabled(void)					\
269 	{								\
270 		return static_branch_unlikely(&__tracepoint_##name.key);\
271 	}
272 
273 /*
274  * We have no guarantee that gcc and the linker won't up-align the tracepoint
275  * structures, so we create an array of pointers that will be used for iteration
276  * on the tracepoints.
277  */
278 #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
279 	static const char __tpstrtab_##_name[]				\
280 	__section("__tracepoints_strings") = #_name;			\
281 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
282 	int __traceiter_##_name(void *__data, proto);			\
283 	void __probestub_##_name(void *__data, proto);			\
284 	struct tracepoint __tracepoint_##_name	__used			\
285 	__section("__tracepoints") = {					\
286 		.name = __tpstrtab_##_name,				\
287 		.key = STATIC_KEY_FALSE_INIT,				\
288 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
289 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
290 		.iterator = &__traceiter_##_name,			\
291 		.probestub = &__probestub_##_name,			\
292 		.regfunc = _reg,					\
293 		.unregfunc = _unreg,					\
294 		.funcs = NULL };					\
295 	__TRACEPOINT_ENTRY(_name);					\
296 	int __traceiter_##_name(void *__data, proto)			\
297 	{								\
298 		struct tracepoint_func *it_func_ptr;			\
299 		void *it_func;						\
300 									\
301 		it_func_ptr =						\
302 			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
303 		if (it_func_ptr) {					\
304 			do {						\
305 				it_func = READ_ONCE((it_func_ptr)->func); \
306 				__data = (it_func_ptr)->data;		\
307 				((void(*)(void *, proto))(it_func))(__data, args); \
308 			} while ((++it_func_ptr)->func);		\
309 		}							\
310 		return 0;						\
311 	}								\
312 	void __probestub_##_name(void *__data, proto)			\
313 	{								\
314 	}								\
315 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
316 
317 #define DEFINE_TRACE(name, proto, args)		\
318 	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
319 
320 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
321 	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
322 	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
323 	EXPORT_STATIC_CALL_GPL(tp_func_##name)
324 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
325 	EXPORT_SYMBOL(__tracepoint_##name);				\
326 	EXPORT_SYMBOL(__traceiter_##name);				\
327 	EXPORT_STATIC_CALL(tp_func_##name)
328 
329 
330 #else /* !TRACEPOINTS_ENABLED */
331 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
332 	static inline void trace_##name(proto)				\
333 	{ }								\
334 	static inline int						\
335 	register_trace_##name(void (*probe)(data_proto),		\
336 			      void *data)				\
337 	{								\
338 		return -ENOSYS;						\
339 	}								\
340 	static inline int						\
341 	unregister_trace_##name(void (*probe)(data_proto),		\
342 				void *data)				\
343 	{								\
344 		return -ENOSYS;						\
345 	}								\
346 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
347 	{								\
348 	}								\
349 	static inline bool						\
350 	trace_##name##_enabled(void)					\
351 	{								\
352 		return false;						\
353 	}
354 
355 #define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
356 #define DEFINE_TRACE(name, proto, args)
357 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
358 #define EXPORT_TRACEPOINT_SYMBOL(name)
359 
360 #endif /* TRACEPOINTS_ENABLED */
361 
362 #ifdef CONFIG_TRACING
363 /**
364  * tracepoint_string - register constant persistent string to trace system
365  * @str - a constant persistent string that will be referenced in tracepoints
366  *
367  * If constant strings are being used in tracepoints, it is faster and
368  * more efficient to just save the pointer to the string and reference
369  * that with a printf "%s" instead of saving the string in the ring buffer
370  * and wasting space and time.
371  *
372  * The problem with the above approach is that userspace tools that read
373  * the binary output of the trace buffers do not have access to the string.
374  * Instead they just show the address of the string which is not very
375  * useful to users.
376  *
377  * With tracepoint_string(), the string will be registered to the tracing
378  * system and exported to userspace via the debugfs/tracing/printk_formats
379  * file that maps the string address to the string text. This way userspace
380  * tools that read the binary buffers have a way to map the pointers to
381  * the ASCII strings they represent.
382  *
383  * The @str used must be a constant string and persistent as it would not
384  * make sense to show a string that no longer exists. But it is still fine
385  * to be used with modules, because when modules are unloaded, if they
386  * had tracepoints, the ring buffers are cleared too. As long as the string
387  * does not change during the life of the module, it is fine to use
388  * tracepoint_string() within a module.
389  */
390 #define tracepoint_string(str)						\
391 	({								\
392 		static const char *___tp_str __tracepoint_string = str; \
393 		___tp_str;						\
394 	})
395 #define __tracepoint_string	__used __section("__tracepoint_str")
396 #else
397 /*
398  * tracepoint_string() is used to save the string address for userspace
399  * tracing tools. When tracing isn't configured, there's no need to save
400  * anything.
401  */
402 # define tracepoint_string(str) str
403 # define __tracepoint_string
404 #endif
405 
406 #define DECLARE_TRACE(name, proto, args)				\
407 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
408 			cpu_online(raw_smp_processor_id()),		\
409 			PARAMS(void *__data, proto))
410 
411 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
412 	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
413 			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
414 			PARAMS(void *__data, proto))
415 
416 #define TRACE_EVENT_FLAGS(event, flag)
417 
418 #define TRACE_EVENT_PERF_PERM(event, expr...)
419 
420 #endif /* DECLARE_TRACE */
421 
422 #ifndef TRACE_EVENT
423 /*
424  * For use with the TRACE_EVENT macro:
425  *
426  * We define a tracepoint, its arguments, its printk format
427  * and its 'fast binary record' layout.
428  *
429  * Firstly, name your tracepoint via TRACE_EVENT(name : the
430  * 'subsystem_event' notation is fine.
431  *
432  * Think about this whole construct as the
433  * 'trace_sched_switch() function' from now on.
434  *
435  *
436  *  TRACE_EVENT(sched_switch,
437  *
438  *	*
439  *	* A function has a regular function arguments
440  *	* prototype, declare it via TP_PROTO():
441  *	*
442  *
443  *	TP_PROTO(struct rq *rq, struct task_struct *prev,
444  *		 struct task_struct *next),
445  *
446  *	*
447  *	* Define the call signature of the 'function'.
448  *	* (Design sidenote: we use this instead of a
449  *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
450  *	*
451  *
452  *	TP_ARGS(rq, prev, next),
453  *
454  *	*
455  *	* Fast binary tracing: define the trace record via
456  *	* TP_STRUCT__entry(). You can think about it like a
457  *	* regular C structure local variable definition.
458  *	*
459  *	* This is how the trace record is structured and will
460  *	* be saved into the ring buffer. These are the fields
461  *	* that will be exposed to user-space in
462  *	* /sys/kernel/tracing/events/<*>/format.
463  *	*
464  *	* The declared 'local variable' is called '__entry'
465  *	*
466  *	* __field(pid_t, prev_pid) is equivalent to a standard declaration:
467  *	*
468  *	*	pid_t	prev_pid;
469  *	*
470  *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
471  *	*
472  *	*	char	prev_comm[TASK_COMM_LEN];
473  *	*
474  *
475  *	TP_STRUCT__entry(
476  *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
477  *		__field(	pid_t,	prev_pid			)
478  *		__field(	int,	prev_prio			)
479  *		__array(	char,	next_comm,	TASK_COMM_LEN	)
480  *		__field(	pid_t,	next_pid			)
481  *		__field(	int,	next_prio			)
482  *	),
483  *
484  *	*
485  *	* Assign the entry into the trace record, by embedding
486  *	* a full C statement block into TP_fast_assign(). You
487  *	* can refer to the trace record as '__entry' -
488  *	* otherwise you can put arbitrary C code in here.
489  *	*
490  *	* Note: this C code will execute every time a trace event
491  *	* happens, on an active tracepoint.
492  *	*
493  *
494  *	TP_fast_assign(
495  *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
496  *		__entry->prev_pid	= prev->pid;
497  *		__entry->prev_prio	= prev->prio;
498  *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
499  *		__entry->next_pid	= next->pid;
500  *		__entry->next_prio	= next->prio;
501  *	),
502  *
503  *	*
504  *	* Formatted output of a trace record via TP_printk().
505  *	* This is how the tracepoint will appear under ftrace
506  *	* plugins that make use of this tracepoint.
507  *	*
508  *	* (raw-binary tracing wont actually perform this step.)
509  *	*
510  *
511  *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
512  *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
513  *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
514  *
515  * );
516  *
517  * This macro construct is thus used for the regular printk format
518  * tracing setup, it is used to construct a function pointer based
519  * tracepoint callback (this is used by programmatic plugins and
520  * can also by used by generic instrumentation like SystemTap), and
521  * it is also used to expose a structured trace record in
522  * /sys/kernel/tracing/events/.
523  *
524  * A set of (un)registration functions can be passed to the variant
525  * TRACE_EVENT_FN to perform any (un)registration work.
526  */
527 
528 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
529 #define DEFINE_EVENT(template, name, proto, args)		\
530 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
531 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
532 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
533 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
534 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
535 #define DEFINE_EVENT_CONDITION(template, name, proto,		\
536 			       args, cond)			\
537 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
538 				PARAMS(args), PARAMS(cond))
539 
540 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
541 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
542 #define TRACE_EVENT_FN(name, proto, args, struct,		\
543 		assign, print, reg, unreg)			\
544 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
545 #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
546 		assign, print, reg, unreg)			\
547 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
548 			PARAMS(args), PARAMS(cond))
549 #define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
550 			      struct, assign, print)		\
551 	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
552 				PARAMS(args), PARAMS(cond))
553 
554 #define TRACE_EVENT_FLAGS(event, flag)
555 
556 #define TRACE_EVENT_PERF_PERM(event, expr...)
557 
558 #define DECLARE_EVENT_NOP(name, proto, args)				\
559 	static inline void trace_##name(proto)				\
560 	{ }								\
561 	static inline bool trace_##name##_enabled(void)			\
562 	{								\
563 		return false;						\
564 	}
565 
566 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)	\
567 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
568 
569 #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
570 #define DEFINE_EVENT_NOP(template, name, proto, args)			\
571 	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
572 
573 #endif /* ifdef TRACE_EVENT (see note above) */
574