xref: /linux-6.15/kernel/trace/trace_syscalls.c (revision cdb537ac)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
247788c58SFrederic Weisbecker #include <trace/syscall.h>
31c569f02SJosh Stone #include <trace/events/syscalls.h>
4f431b634SSteven Rostedt #include <linux/syscalls.h>
55a0e3ad6STejun Heo #include <linux/slab.h>
6ee08c6ecSFrederic Weisbecker #include <linux/kernel.h>
756d82e00SPaul Gortmaker #include <linux/module.h>	/* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
8fb34a08cSJason Baron #include <linux/ftrace.h>
9cdd6c482SIngo Molnar #include <linux/perf_event.h>
100e242208SHassan Naveed #include <linux/xarray.h>
11ee08c6ecSFrederic Weisbecker #include <asm/syscall.h>
12ee08c6ecSFrederic Weisbecker 
13ee08c6ecSFrederic Weisbecker #include "trace_output.h"
14ee08c6ecSFrederic Weisbecker #include "trace.h"
15ee08c6ecSFrederic Weisbecker 
165be71b61SFrederic Weisbecker static DEFINE_MUTEX(syscall_trace_lock);
17ee08c6ecSFrederic Weisbecker 
182425bcb9SSteven Rostedt (Red Hat) static int syscall_enter_register(struct trace_event_call *event,
19ceec0b6fSJiri Olsa 				 enum trace_reg type, void *data);
202425bcb9SSteven Rostedt (Red Hat) static int syscall_exit_register(struct trace_event_call *event,
21ceec0b6fSJiri Olsa 				 enum trace_reg type, void *data);
222239291aSSteven Rostedt 
232e33af02SSteven Rostedt static struct list_head *
syscall_get_enter_fields(struct trace_event_call * call)242425bcb9SSteven Rostedt (Red Hat) syscall_get_enter_fields(struct trace_event_call *call)
252e33af02SSteven Rostedt {
262e33af02SSteven Rostedt 	struct syscall_metadata *entry = call->data;
272e33af02SSteven Rostedt 
282e33af02SSteven Rostedt 	return &entry->enter_fields;
292e33af02SSteven Rostedt }
302e33af02SSteven Rostedt 
313d56e331SSteven Rostedt extern struct syscall_metadata *__start_syscalls_metadata[];
323d56e331SSteven Rostedt extern struct syscall_metadata *__stop_syscalls_metadata[];
33c44fc770SFrederic Weisbecker 
340e242208SHassan Naveed static DEFINE_XARRAY(syscalls_metadata_sparse);
35c44fc770SFrederic Weisbecker static struct syscall_metadata **syscalls_metadata;
36c44fc770SFrederic Weisbecker 
37b2d55496SIan Munsie #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
arch_syscall_match_sym_name(const char * sym,const char * name)38b2d55496SIan Munsie static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
39b2d55496SIan Munsie {
40b2d55496SIan Munsie 	/*
41b2d55496SIan Munsie 	 * Only compare after the "sys" prefix. Archs that use
42b2d55496SIan Munsie 	 * syscall wrappers may have syscalls symbols aliases prefixed
4336a78e9eSzhangwei(Jovi) 	 * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
44b2d55496SIan Munsie 	 * mismatch.
45b2d55496SIan Munsie 	 */
46b2d55496SIan Munsie 	return !strcmp(sym + 3, name + 3);
47b2d55496SIan Munsie }
48b2d55496SIan Munsie #endif
49b2d55496SIan Munsie 
50f431b634SSteven Rostedt #ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
51f431b634SSteven Rostedt /*
52f431b634SSteven Rostedt  * Some architectures that allow for 32bit applications
53f431b634SSteven Rostedt  * to run on a 64bit kernel, do not map the syscalls for
54f431b634SSteven Rostedt  * the 32bit tasks the same as they do for 64bit tasks.
55f431b634SSteven Rostedt  *
56f431b634SSteven Rostedt  *     *cough*x86*cough*
57f431b634SSteven Rostedt  *
58f431b634SSteven Rostedt  * In such a case, instead of reporting the wrong syscalls,
59f431b634SSteven Rostedt  * simply ignore them.
60f431b634SSteven Rostedt  *
61f431b634SSteven Rostedt  * For an arch to ignore the compat syscalls it needs to
62f431b634SSteven Rostedt  * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
63f431b634SSteven Rostedt  * define the function arch_trace_is_compat_syscall() to let
64f431b634SSteven Rostedt  * the tracing system know that it should ignore it.
65f431b634SSteven Rostedt  */
66f431b634SSteven Rostedt static int
trace_get_syscall_nr(struct task_struct * task,struct pt_regs * regs)67f431b634SSteven Rostedt trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
68f431b634SSteven Rostedt {
69f431b634SSteven Rostedt 	if (unlikely(arch_trace_is_compat_syscall(regs)))
70f431b634SSteven Rostedt 		return -1;
71f431b634SSteven Rostedt 
72f431b634SSteven Rostedt 	return syscall_get_nr(task, regs);
73f431b634SSteven Rostedt }
74f431b634SSteven Rostedt #else
75f431b634SSteven Rostedt static inline int
trace_get_syscall_nr(struct task_struct * task,struct pt_regs * regs)76f431b634SSteven Rostedt trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
77f431b634SSteven Rostedt {
78f431b634SSteven Rostedt 	return syscall_get_nr(task, regs);
79f431b634SSteven Rostedt }
80f431b634SSteven Rostedt #endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
81f431b634SSteven Rostedt 
823d56e331SSteven Rostedt static __init struct syscall_metadata *
find_syscall_meta(unsigned long syscall)833d56e331SSteven Rostedt find_syscall_meta(unsigned long syscall)
84c44fc770SFrederic Weisbecker {
853d56e331SSteven Rostedt 	struct syscall_metadata **start;
863d56e331SSteven Rostedt 	struct syscall_metadata **stop;
87c44fc770SFrederic Weisbecker 	char str[KSYM_SYMBOL_LEN];
88c44fc770SFrederic Weisbecker 
89c44fc770SFrederic Weisbecker 
903d56e331SSteven Rostedt 	start = __start_syscalls_metadata;
913d56e331SSteven Rostedt 	stop = __stop_syscalls_metadata;
92c44fc770SFrederic Weisbecker 	kallsyms_lookup(syscall, NULL, NULL, NULL, str);
93c44fc770SFrederic Weisbecker 
94ae07f551SIan Munsie 	if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
95ae07f551SIan Munsie 		return NULL;
96ae07f551SIan Munsie 
97c44fc770SFrederic Weisbecker 	for ( ; start < stop; start++) {
98b2d55496SIan Munsie 		if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
993d56e331SSteven Rostedt 			return *start;
100c44fc770SFrederic Weisbecker 	}
101c44fc770SFrederic Weisbecker 	return NULL;
102c44fc770SFrederic Weisbecker }
103c44fc770SFrederic Weisbecker 
syscall_nr_to_meta(int nr)104c44fc770SFrederic Weisbecker static struct syscall_metadata *syscall_nr_to_meta(int nr)
105c44fc770SFrederic Weisbecker {
1060e242208SHassan Naveed 	if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR))
1070e242208SHassan Naveed 		return xa_load(&syscalls_metadata_sparse, (unsigned long)nr);
1080e242208SHassan Naveed 
109c44fc770SFrederic Weisbecker 	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
110c44fc770SFrederic Weisbecker 		return NULL;
111c44fc770SFrederic Weisbecker 
112c44fc770SFrederic Weisbecker 	return syscalls_metadata[nr];
113c44fc770SFrederic Weisbecker }
114c44fc770SFrederic Weisbecker 
get_syscall_name(int syscall)115dbfeaa7aSTom Zanussi const char *get_syscall_name(int syscall)
116dbfeaa7aSTom Zanussi {
117dbfeaa7aSTom Zanussi 	struct syscall_metadata *entry;
118dbfeaa7aSTom Zanussi 
119dbfeaa7aSTom Zanussi 	entry = syscall_nr_to_meta(syscall);
120dbfeaa7aSTom Zanussi 	if (!entry)
121dbfeaa7aSTom Zanussi 		return NULL;
122dbfeaa7aSTom Zanussi 
123dbfeaa7aSTom Zanussi 	return entry->name;
124dbfeaa7aSTom Zanussi }
125dbfeaa7aSTom Zanussi 
1266aea49cbSFengguang Wu static enum print_line_t
print_syscall_enter(struct trace_iterator * iter,int flags,struct trace_event * event)127a9a57763SSteven Rostedt print_syscall_enter(struct trace_iterator *iter, int flags,
128a9a57763SSteven Rostedt 		    struct trace_event *event)
129bed1ffcaSFrederic Weisbecker {
130983f938aSSteven Rostedt (Red Hat) 	struct trace_array *tr = iter->tr;
131bed1ffcaSFrederic Weisbecker 	struct trace_seq *s = &iter->seq;
132bed1ffcaSFrederic Weisbecker 	struct trace_entry *ent = iter->ent;
133bed1ffcaSFrederic Weisbecker 	struct syscall_trace_enter *trace;
134bed1ffcaSFrederic Weisbecker 	struct syscall_metadata *entry;
135183742f0SSteven Rostedt (Red Hat) 	int i, syscall;
136bed1ffcaSFrederic Weisbecker 
13764c12e04SJason Baron 	trace = (typeof(trace))ent;
138bed1ffcaSFrederic Weisbecker 	syscall = trace->nr;
139bed1ffcaSFrederic Weisbecker 	entry = syscall_nr_to_meta(syscall);
14064c12e04SJason Baron 
141bed1ffcaSFrederic Weisbecker 	if (!entry)
142bed1ffcaSFrederic Weisbecker 		goto end;
143bed1ffcaSFrederic Weisbecker 
14432c0edaeSSteven Rostedt 	if (entry->enter_event->event.type != ent->type) {
14564c12e04SJason Baron 		WARN_ON_ONCE(1);
14664c12e04SJason Baron 		goto end;
14764c12e04SJason Baron 	}
14864c12e04SJason Baron 
149183742f0SSteven Rostedt (Red Hat) 	trace_seq_printf(s, "%s(", entry->name);
150bed1ffcaSFrederic Weisbecker 
151bed1ffcaSFrederic Weisbecker 	for (i = 0; i < entry->nb_args; i++) {
152183742f0SSteven Rostedt (Red Hat) 
153183742f0SSteven Rostedt (Red Hat) 		if (trace_seq_has_overflowed(s))
154183742f0SSteven Rostedt (Red Hat) 			goto end;
155183742f0SSteven Rostedt (Red Hat) 
156bed1ffcaSFrederic Weisbecker 		/* parameter types */
157cb1c45fbSJeff Xie 		if (tr && tr->trace_flags & TRACE_ITER_VERBOSE)
158183742f0SSteven Rostedt (Red Hat) 			trace_seq_printf(s, "%s ", entry->types[i]);
159183742f0SSteven Rostedt (Red Hat) 
160bed1ffcaSFrederic Weisbecker 		/* parameter values */
161183742f0SSteven Rostedt (Red Hat) 		trace_seq_printf(s, "%s: %lx%s", entry->args[i],
162bed1ffcaSFrederic Weisbecker 				 trace->args[i],
1634539f077SLi Zefan 				 i == entry->nb_args - 1 ? "" : ", ");
164bed1ffcaSFrederic Weisbecker 	}
165bed1ffcaSFrederic Weisbecker 
166183742f0SSteven Rostedt (Red Hat) 	trace_seq_putc(s, ')');
167bed1ffcaSFrederic Weisbecker end:
168183742f0SSteven Rostedt (Red Hat) 	trace_seq_putc(s, '\n');
1694539f077SLi Zefan 
170183742f0SSteven Rostedt (Red Hat) 	return trace_handle_return(s);
171bed1ffcaSFrederic Weisbecker }
172bed1ffcaSFrederic Weisbecker 
1736aea49cbSFengguang Wu static enum print_line_t
print_syscall_exit(struct trace_iterator * iter,int flags,struct trace_event * event)174a9a57763SSteven Rostedt print_syscall_exit(struct trace_iterator *iter, int flags,
175a9a57763SSteven Rostedt 		   struct trace_event *event)
176bed1ffcaSFrederic Weisbecker {
177bed1ffcaSFrederic Weisbecker 	struct trace_seq *s = &iter->seq;
178bed1ffcaSFrederic Weisbecker 	struct trace_entry *ent = iter->ent;
179bed1ffcaSFrederic Weisbecker 	struct syscall_trace_exit *trace;
180bed1ffcaSFrederic Weisbecker 	int syscall;
181bed1ffcaSFrederic Weisbecker 	struct syscall_metadata *entry;
182bed1ffcaSFrederic Weisbecker 
18364c12e04SJason Baron 	trace = (typeof(trace))ent;
184bed1ffcaSFrederic Weisbecker 	syscall = trace->nr;
185bed1ffcaSFrederic Weisbecker 	entry = syscall_nr_to_meta(syscall);
18664c12e04SJason Baron 
187bed1ffcaSFrederic Weisbecker 	if (!entry) {
188146c3442Szhangwei(Jovi) 		trace_seq_putc(s, '\n');
189183742f0SSteven Rostedt (Red Hat) 		goto out;
190bed1ffcaSFrederic Weisbecker 	}
191bed1ffcaSFrederic Weisbecker 
19232c0edaeSSteven Rostedt 	if (entry->exit_event->event.type != ent->type) {
19364c12e04SJason Baron 		WARN_ON_ONCE(1);
19464c12e04SJason Baron 		return TRACE_TYPE_UNHANDLED;
19564c12e04SJason Baron 	}
19664c12e04SJason Baron 
197183742f0SSteven Rostedt (Red Hat) 	trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
198bed1ffcaSFrederic Weisbecker 				trace->ret);
199bed1ffcaSFrederic Weisbecker 
200183742f0SSteven Rostedt (Red Hat)  out:
201183742f0SSteven Rostedt (Red Hat) 	return trace_handle_return(s);
202bed1ffcaSFrederic Weisbecker }
203bed1ffcaSFrederic Weisbecker 
20404ae87a5SPeter Zijlstra #define SYSCALL_FIELD(_type, _name) {					\
20504ae87a5SPeter Zijlstra 	.type = #_type, .name = #_name,					\
20604ae87a5SPeter Zijlstra 	.size = sizeof(_type), .align = __alignof__(_type),		\
20704ae87a5SPeter Zijlstra 	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }
208e6971969SLi Zefan 
2093ddc77f6SLi Zefan static int __init
__set_enter_print_fmt(struct syscall_metadata * entry,char * buf,int len)2103ddc77f6SLi Zefan __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
21150307a45SLai Jiangshan {
21250307a45SLai Jiangshan 	int i;
21350307a45SLai Jiangshan 	int pos = 0;
21450307a45SLai Jiangshan 
21550307a45SLai Jiangshan 	/* When len=0, we just calculate the needed length */
21650307a45SLai Jiangshan #define LEN_OR_ZERO (len ? len - pos : 0)
21750307a45SLai Jiangshan 
21850307a45SLai Jiangshan 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
21950307a45SLai Jiangshan 	for (i = 0; i < entry->nb_args; i++) {
22050307a45SLai Jiangshan 		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
22150307a45SLai Jiangshan 				entry->args[i], sizeof(unsigned long),
22250307a45SLai Jiangshan 				i == entry->nb_args - 1 ? "" : ", ");
22350307a45SLai Jiangshan 	}
22450307a45SLai Jiangshan 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
22550307a45SLai Jiangshan 
22650307a45SLai Jiangshan 	for (i = 0; i < entry->nb_args; i++) {
22750307a45SLai Jiangshan 		pos += snprintf(buf + pos, LEN_OR_ZERO,
22850307a45SLai Jiangshan 				", ((unsigned long)(REC->%s))", entry->args[i]);
22950307a45SLai Jiangshan 	}
23050307a45SLai Jiangshan 
23150307a45SLai Jiangshan #undef LEN_OR_ZERO
23250307a45SLai Jiangshan 
23350307a45SLai Jiangshan 	/* return the length of print_fmt */
23450307a45SLai Jiangshan 	return pos;
23550307a45SLai Jiangshan }
23650307a45SLai Jiangshan 
set_syscall_print_fmt(struct trace_event_call * call)2372425bcb9SSteven Rostedt (Red Hat) static int __init set_syscall_print_fmt(struct trace_event_call *call)
23850307a45SLai Jiangshan {
23950307a45SLai Jiangshan 	char *print_fmt;
24050307a45SLai Jiangshan 	int len;
24150307a45SLai Jiangshan 	struct syscall_metadata *entry = call->data;
24250307a45SLai Jiangshan 
24350307a45SLai Jiangshan 	if (entry->enter_event != call) {
24450307a45SLai Jiangshan 		call->print_fmt = "\"0x%lx\", REC->ret";
24550307a45SLai Jiangshan 		return 0;
24650307a45SLai Jiangshan 	}
24750307a45SLai Jiangshan 
24850307a45SLai Jiangshan 	/* First: called with 0 length to calculate the needed length */
24950307a45SLai Jiangshan 	len = __set_enter_print_fmt(entry, NULL, 0);
25050307a45SLai Jiangshan 
25150307a45SLai Jiangshan 	print_fmt = kmalloc(len + 1, GFP_KERNEL);
25250307a45SLai Jiangshan 	if (!print_fmt)
25350307a45SLai Jiangshan 		return -ENOMEM;
25450307a45SLai Jiangshan 
25550307a45SLai Jiangshan 	/* Second: actually write the @print_fmt */
25650307a45SLai Jiangshan 	__set_enter_print_fmt(entry, print_fmt, len + 1);
25750307a45SLai Jiangshan 	call->print_fmt = print_fmt;
25850307a45SLai Jiangshan 
25950307a45SLai Jiangshan 	return 0;
26050307a45SLai Jiangshan }
26150307a45SLai Jiangshan 
free_syscall_print_fmt(struct trace_event_call * call)2622425bcb9SSteven Rostedt (Red Hat) static void __init free_syscall_print_fmt(struct trace_event_call *call)
26350307a45SLai Jiangshan {
26450307a45SLai Jiangshan 	struct syscall_metadata *entry = call->data;
26550307a45SLai Jiangshan 
26650307a45SLai Jiangshan 	if (entry->enter_event == call)
26750307a45SLai Jiangshan 		kfree(call->print_fmt);
26850307a45SLai Jiangshan }
26950307a45SLai Jiangshan 
syscall_enter_define_fields(struct trace_event_call * call)2702425bcb9SSteven Rostedt (Red Hat) static int __init syscall_enter_define_fields(struct trace_event_call *call)
271540b7b8dSLi Zefan {
272540b7b8dSLi Zefan 	struct syscall_trace_enter trace;
27331c16b13SLai Jiangshan 	struct syscall_metadata *meta = call->data;
274540b7b8dSLi Zefan 	int offset = offsetof(typeof(trace), args);
27531537cf8SSteven Rostedt (VMware) 	int ret = 0;
27631537cf8SSteven Rostedt (VMware) 	int i;
2770f1ef51dSLai Jiangshan 
278540b7b8dSLi Zefan 	for (i = 0; i < meta->nb_args; i++) {
279aeaeae11SFrederic Weisbecker 		ret = trace_define_field(call, meta->types[i],
280aeaeae11SFrederic Weisbecker 					 meta->args[i], offset,
28143b51eadSLi Zefan 					 sizeof(unsigned long), 0,
28243b51eadSLi Zefan 					 FILTER_OTHER);
28304ae87a5SPeter Zijlstra 		if (ret)
28404ae87a5SPeter Zijlstra 			break;
285540b7b8dSLi Zefan 		offset += sizeof(unsigned long);
286540b7b8dSLi Zefan 	}
287540b7b8dSLi Zefan 
288540b7b8dSLi Zefan 	return ret;
289540b7b8dSLi Zefan }
290540b7b8dSLi Zefan 
ftrace_syscall_enter(void * data,struct pt_regs * regs,long id)29112ab74eeSSteven Rostedt static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
292ee08c6ecSFrederic Weisbecker {
29312ab74eeSSteven Rostedt 	struct trace_array *tr = data;
2947f1d2f82SSteven Rostedt (Red Hat) 	struct trace_event_file *trace_file;
295bed1ffcaSFrederic Weisbecker 	struct syscall_trace_enter *entry;
296bed1ffcaSFrederic Weisbecker 	struct syscall_metadata *sys_data;
297cb1c45fbSJeff Xie 	struct trace_event_buffer fbuffer;
298d08e4113SSteven Rostedt (Red Hat) 	unsigned long args[6];
299ee08c6ecSFrederic Weisbecker 	int syscall_nr;
300f431b634SSteven Rostedt 	int size;
301ee08c6ecSFrederic Weisbecker 
30213d750c2SMathieu Desnoyers 	/*
30313d750c2SMathieu Desnoyers 	 * Syscall probe called with preemption enabled, but the ring
30413d750c2SMathieu Desnoyers 	 * buffer and per-cpu data require preemption to be disabled.
30513d750c2SMathieu Desnoyers 	 */
306a3204c74SMathieu Desnoyers 	might_fault();
30713d750c2SMathieu Desnoyers 	guard(preempt_notrace)();
30813d750c2SMathieu Desnoyers 
309f431b634SSteven Rostedt 	syscall_nr = trace_get_syscall_nr(current, regs);
310086ba77aSRabin Vincent 	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
311cd0980fcSHendrik Brueckner 		return;
312d562aff9STom Zanussi 
313d562aff9STom Zanussi 	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
3147f1d2f82SSteven Rostedt (Red Hat) 	trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
3157f1d2f82SSteven Rostedt (Red Hat) 	if (!trace_file)
316d562aff9STom Zanussi 		return;
317d562aff9STom Zanussi 
31809a5059aSSteven Rostedt (Red Hat) 	if (trace_trigger_soft_disabled(trace_file))
319fb34a08cSJason Baron 		return;
320ee08c6ecSFrederic Weisbecker 
321bed1ffcaSFrederic Weisbecker 	sys_data = syscall_nr_to_meta(syscall_nr);
322bed1ffcaSFrederic Weisbecker 	if (!sys_data)
323bed1ffcaSFrederic Weisbecker 		return;
324bed1ffcaSFrederic Weisbecker 
325bed1ffcaSFrederic Weisbecker 	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
326bed1ffcaSFrederic Weisbecker 
327cb1c45fbSJeff Xie 	entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
328cb1c45fbSJeff Xie 	if (!entry)
329bed1ffcaSFrederic Weisbecker 		return;
330bed1ffcaSFrederic Weisbecker 
331cb1c45fbSJeff Xie 	entry = ring_buffer_event_data(fbuffer.event);
332bed1ffcaSFrederic Weisbecker 	entry->nr = syscall_nr;
333b35f549dSSteven Rostedt (Red Hat) 	syscall_get_arguments(current, regs, args);
334d08e4113SSteven Rostedt (Red Hat) 	memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
335bed1ffcaSFrederic Weisbecker 
336cb1c45fbSJeff Xie 	trace_event_buffer_commit(&fbuffer);
337ee08c6ecSFrederic Weisbecker }
338ee08c6ecSFrederic Weisbecker 
ftrace_syscall_exit(void * data,struct pt_regs * regs,long ret)33912ab74eeSSteven Rostedt static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
340ee08c6ecSFrederic Weisbecker {
34112ab74eeSSteven Rostedt 	struct trace_array *tr = data;
3427f1d2f82SSteven Rostedt (Red Hat) 	struct trace_event_file *trace_file;
343bed1ffcaSFrederic Weisbecker 	struct syscall_trace_exit *entry;
344bed1ffcaSFrederic Weisbecker 	struct syscall_metadata *sys_data;
345cb1c45fbSJeff Xie 	struct trace_event_buffer fbuffer;
346ee08c6ecSFrederic Weisbecker 	int syscall_nr;
347ee08c6ecSFrederic Weisbecker 
34813d750c2SMathieu Desnoyers 	/*
34913d750c2SMathieu Desnoyers 	 * Syscall probe called with preemption enabled, but the ring
35013d750c2SMathieu Desnoyers 	 * buffer and per-cpu data require preemption to be disabled.
35113d750c2SMathieu Desnoyers 	 */
352a3204c74SMathieu Desnoyers 	might_fault();
35313d750c2SMathieu Desnoyers 	guard(preempt_notrace)();
35413d750c2SMathieu Desnoyers 
355f431b634SSteven Rostedt 	syscall_nr = trace_get_syscall_nr(current, regs);
356086ba77aSRabin Vincent 	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
357cd0980fcSHendrik Brueckner 		return;
358d562aff9STom Zanussi 
359d562aff9STom Zanussi 	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
3607f1d2f82SSteven Rostedt (Red Hat) 	trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
3617f1d2f82SSteven Rostedt (Red Hat) 	if (!trace_file)
362d562aff9STom Zanussi 		return;
363d562aff9STom Zanussi 
36409a5059aSSteven Rostedt (Red Hat) 	if (trace_trigger_soft_disabled(trace_file))
365fb34a08cSJason Baron 		return;
366ee08c6ecSFrederic Weisbecker 
367bed1ffcaSFrederic Weisbecker 	sys_data = syscall_nr_to_meta(syscall_nr);
368bed1ffcaSFrederic Weisbecker 	if (!sys_data)
369bed1ffcaSFrederic Weisbecker 		return;
370bed1ffcaSFrederic Weisbecker 
371cb1c45fbSJeff Xie 	entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry));
372cb1c45fbSJeff Xie 	if (!entry)
373bed1ffcaSFrederic Weisbecker 		return;
374bed1ffcaSFrederic Weisbecker 
375cb1c45fbSJeff Xie 	entry = ring_buffer_event_data(fbuffer.event);
376bed1ffcaSFrederic Weisbecker 	entry->nr = syscall_nr;
377bed1ffcaSFrederic Weisbecker 	entry->ret = syscall_get_return_value(current, regs);
378bed1ffcaSFrederic Weisbecker 
379cb1c45fbSJeff Xie 	trace_event_buffer_commit(&fbuffer);
380ee08c6ecSFrederic Weisbecker }
381ee08c6ecSFrederic Weisbecker 
reg_event_syscall_enter(struct trace_event_file * file,struct trace_event_call * call)3827f1d2f82SSteven Rostedt (Red Hat) static int reg_event_syscall_enter(struct trace_event_file *file,
3832425bcb9SSteven Rostedt (Red Hat) 				   struct trace_event_call *call)
384ee08c6ecSFrederic Weisbecker {
38512ab74eeSSteven Rostedt 	struct trace_array *tr = file->tr;
386fb34a08cSJason Baron 	int ret = 0;
387fb34a08cSJason Baron 	int num;
388ee08c6ecSFrederic Weisbecker 
389c252f657SLai Jiangshan 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
3903773b389SIan Munsie 	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
391fb34a08cSJason Baron 		return -ENOSYS;
392fb34a08cSJason Baron 	mutex_lock(&syscall_trace_lock);
39312ab74eeSSteven Rostedt 	if (!tr->sys_refcount_enter)
39412ab74eeSSteven Rostedt 		ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
3953b8e4273SLi Zefan 	if (!ret) {
396d562aff9STom Zanussi 		rcu_assign_pointer(tr->enter_syscall_files[num], file);
39712ab74eeSSteven Rostedt 		tr->sys_refcount_enter++;
398fb34a08cSJason Baron 	}
399fb34a08cSJason Baron 	mutex_unlock(&syscall_trace_lock);
400fb34a08cSJason Baron 	return ret;
401ee08c6ecSFrederic Weisbecker }
402ee08c6ecSFrederic Weisbecker 
unreg_event_syscall_enter(struct trace_event_file * file,struct trace_event_call * call)4037f1d2f82SSteven Rostedt (Red Hat) static void unreg_event_syscall_enter(struct trace_event_file *file,
4042425bcb9SSteven Rostedt (Red Hat) 				      struct trace_event_call *call)
405ee08c6ecSFrederic Weisbecker {
40612ab74eeSSteven Rostedt 	struct trace_array *tr = file->tr;
407fb34a08cSJason Baron 	int num;
408fb34a08cSJason Baron 
409c252f657SLai Jiangshan 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
4103773b389SIan Munsie 	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
411fb34a08cSJason Baron 		return;
412fb34a08cSJason Baron 	mutex_lock(&syscall_trace_lock);
41312ab74eeSSteven Rostedt 	tr->sys_refcount_enter--;
414fb5a613bSAndreea-Cristina Bernat 	RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
41512ab74eeSSteven Rostedt 	if (!tr->sys_refcount_enter)
41612ab74eeSSteven Rostedt 		unregister_trace_sys_enter(ftrace_syscall_enter, tr);
417fb34a08cSJason Baron 	mutex_unlock(&syscall_trace_lock);
418ee08c6ecSFrederic Weisbecker }
419ee08c6ecSFrederic Weisbecker 
reg_event_syscall_exit(struct trace_event_file * file,struct trace_event_call * call)4207f1d2f82SSteven Rostedt (Red Hat) static int reg_event_syscall_exit(struct trace_event_file *file,
4212425bcb9SSteven Rostedt (Red Hat) 				  struct trace_event_call *call)
422fb34a08cSJason Baron {
42312ab74eeSSteven Rostedt 	struct trace_array *tr = file->tr;
424fb34a08cSJason Baron 	int ret = 0;
425fb34a08cSJason Baron 	int num;
426fb34a08cSJason Baron 
427c252f657SLai Jiangshan 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
4283773b389SIan Munsie 	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
429fb34a08cSJason Baron 		return -ENOSYS;
430fb34a08cSJason Baron 	mutex_lock(&syscall_trace_lock);
43112ab74eeSSteven Rostedt 	if (!tr->sys_refcount_exit)
43212ab74eeSSteven Rostedt 		ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
4333b8e4273SLi Zefan 	if (!ret) {
434d562aff9STom Zanussi 		rcu_assign_pointer(tr->exit_syscall_files[num], file);
43512ab74eeSSteven Rostedt 		tr->sys_refcount_exit++;
436fb34a08cSJason Baron 	}
437fb34a08cSJason Baron 	mutex_unlock(&syscall_trace_lock);
438fb34a08cSJason Baron 	return ret;
439fb34a08cSJason Baron }
440fb34a08cSJason Baron 
unreg_event_syscall_exit(struct trace_event_file * file,struct trace_event_call * call)4417f1d2f82SSteven Rostedt (Red Hat) static void unreg_event_syscall_exit(struct trace_event_file *file,
4422425bcb9SSteven Rostedt (Red Hat) 				     struct trace_event_call *call)
443fb34a08cSJason Baron {
44412ab74eeSSteven Rostedt 	struct trace_array *tr = file->tr;
445fb34a08cSJason Baron 	int num;
446fb34a08cSJason Baron 
447c252f657SLai Jiangshan 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
4483773b389SIan Munsie 	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
449fb34a08cSJason Baron 		return;
450fb34a08cSJason Baron 	mutex_lock(&syscall_trace_lock);
45112ab74eeSSteven Rostedt 	tr->sys_refcount_exit--;
452fb5a613bSAndreea-Cristina Bernat 	RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
45312ab74eeSSteven Rostedt 	if (!tr->sys_refcount_exit)
45412ab74eeSSteven Rostedt 		unregister_trace_sys_exit(ftrace_syscall_exit, tr);
455fb34a08cSJason Baron 	mutex_unlock(&syscall_trace_lock);
456fb34a08cSJason Baron }
457fb34a08cSJason Baron 
init_syscall_trace(struct trace_event_call * call)4582425bcb9SSteven Rostedt (Red Hat) static int __init init_syscall_trace(struct trace_event_call *call)
459a1301da0SLai Jiangshan {
460a1301da0SLai Jiangshan 	int id;
461ba976970SIan Munsie 	int num;
462ba976970SIan Munsie 
463ba976970SIan Munsie 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
464ba976970SIan Munsie 	if (num < 0 || num >= NR_syscalls) {
465ba976970SIan Munsie 		pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
466ba976970SIan Munsie 				((struct syscall_metadata *)call->data)->name);
467ba976970SIan Munsie 		return -ENOSYS;
468ba976970SIan Munsie 	}
469a1301da0SLai Jiangshan 
47050307a45SLai Jiangshan 	if (set_syscall_print_fmt(call) < 0)
47150307a45SLai Jiangshan 		return -ENOMEM;
47250307a45SLai Jiangshan 
473c7ef3a90SSteven Rostedt 	id = trace_event_raw_init(call);
474c7ef3a90SSteven Rostedt 
475c7ef3a90SSteven Rostedt 	if (id < 0) {
47650307a45SLai Jiangshan 		free_syscall_print_fmt(call);
477c7ef3a90SSteven Rostedt 		return id;
47850307a45SLai Jiangshan 	}
479c7ef3a90SSteven Rostedt 
480c7ef3a90SSteven Rostedt 	return id;
481a1301da0SLai Jiangshan }
482a1301da0SLai Jiangshan 
48304ae87a5SPeter Zijlstra static struct trace_event_fields __refdata syscall_enter_fields_array[] = {
48404ae87a5SPeter Zijlstra 	SYSCALL_FIELD(int, __syscall_nr),
48504ae87a5SPeter Zijlstra 	{ .type = TRACE_FUNCTION_TYPE,
48604ae87a5SPeter Zijlstra 	  .define_fields = syscall_enter_define_fields },
48704ae87a5SPeter Zijlstra 	{}
48804ae87a5SPeter Zijlstra };
48904ae87a5SPeter Zijlstra 
4906f86ab9fSVaibhav Nagarnaik struct trace_event_functions enter_syscall_print_funcs = {
4916f86ab9fSVaibhav Nagarnaik 	.trace		= print_syscall_enter,
4926f86ab9fSVaibhav Nagarnaik };
4936f86ab9fSVaibhav Nagarnaik 
4946f86ab9fSVaibhav Nagarnaik struct trace_event_functions exit_syscall_print_funcs = {
4956f86ab9fSVaibhav Nagarnaik 	.trace		= print_syscall_exit,
4966f86ab9fSVaibhav Nagarnaik };
4976f86ab9fSVaibhav Nagarnaik 
4982425bcb9SSteven Rostedt (Red Hat) struct trace_event_class __refdata event_class_syscall_enter = {
4996f86ab9fSVaibhav Nagarnaik 	.system		= "syscalls",
5006f86ab9fSVaibhav Nagarnaik 	.reg		= syscall_enter_register,
50104ae87a5SPeter Zijlstra 	.fields_array	= syscall_enter_fields_array,
5026f86ab9fSVaibhav Nagarnaik 	.get_fields	= syscall_get_enter_fields,
5036f86ab9fSVaibhav Nagarnaik 	.raw_init	= init_syscall_trace,
5046f86ab9fSVaibhav Nagarnaik };
5056f86ab9fSVaibhav Nagarnaik 
5062425bcb9SSteven Rostedt (Red Hat) struct trace_event_class __refdata event_class_syscall_exit = {
5076f86ab9fSVaibhav Nagarnaik 	.system		= "syscalls",
5086f86ab9fSVaibhav Nagarnaik 	.reg		= syscall_exit_register,
50904ae87a5SPeter Zijlstra 	.fields_array	= (struct trace_event_fields[]){
51004ae87a5SPeter Zijlstra 		SYSCALL_FIELD(int, __syscall_nr),
51104ae87a5SPeter Zijlstra 		SYSCALL_FIELD(long, ret),
51204ae87a5SPeter Zijlstra 		{}
51304ae87a5SPeter Zijlstra 	},
5146f86ab9fSVaibhav Nagarnaik 	.fields		= LIST_HEAD_INIT(event_class_syscall_exit.fields),
5156f86ab9fSVaibhav Nagarnaik 	.raw_init	= init_syscall_trace,
5166f86ab9fSVaibhav Nagarnaik };
5176f86ab9fSVaibhav Nagarnaik 
arch_syscall_addr(int nr)518c763ba06SIan Munsie unsigned long __init __weak arch_syscall_addr(int nr)
519e7b8e675SMike Frysinger {
520e7b8e675SMike Frysinger 	return (unsigned long)sys_call_table[nr];
521e7b8e675SMike Frysinger }
522e7b8e675SMike Frysinger 
init_ftrace_syscalls(void)5235f893b26SSteven Rostedt (Red Hat) void __init init_ftrace_syscalls(void)
524c44fc770SFrederic Weisbecker {
525c44fc770SFrederic Weisbecker 	struct syscall_metadata *meta;
526c44fc770SFrederic Weisbecker 	unsigned long addr;
527c44fc770SFrederic Weisbecker 	int i;
5280e242208SHassan Naveed 	void *ret;
529c44fc770SFrederic Weisbecker 
5300e242208SHassan Naveed 	if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
5310e242208SHassan Naveed 		syscalls_metadata = kcalloc(NR_syscalls,
5320e242208SHassan Naveed 					sizeof(*syscalls_metadata),
53347b0edcbSThomas Meyer 					GFP_KERNEL);
534c44fc770SFrederic Weisbecker 		if (!syscalls_metadata) {
535c44fc770SFrederic Weisbecker 			WARN_ON(1);
5365f893b26SSteven Rostedt (Red Hat) 			return;
537c44fc770SFrederic Weisbecker 		}
5380e242208SHassan Naveed 	}
539c44fc770SFrederic Weisbecker 
540c44fc770SFrederic Weisbecker 	for (i = 0; i < NR_syscalls; i++) {
541c44fc770SFrederic Weisbecker 		addr = arch_syscall_addr(i);
542c44fc770SFrederic Weisbecker 		meta = find_syscall_meta(addr);
543c252f657SLai Jiangshan 		if (!meta)
544c252f657SLai Jiangshan 			continue;
545c252f657SLai Jiangshan 
546c252f657SLai Jiangshan 		meta->syscall_nr = i;
5470e242208SHassan Naveed 
5480e242208SHassan Naveed 		if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
549c44fc770SFrederic Weisbecker 			syscalls_metadata[i] = meta;
5500e242208SHassan Naveed 		} else {
5510e242208SHassan Naveed 			ret = xa_store(&syscalls_metadata_sparse, i, meta,
5520e242208SHassan Naveed 					GFP_KERNEL);
5530e242208SHassan Naveed 			WARN(xa_is_err(ret),
5540e242208SHassan Naveed 				"Syscall memory allocation failed\n");
5550e242208SHassan Naveed 		}
5560e242208SHassan Naveed 
557c44fc770SFrederic Weisbecker 	}
558c44fc770SFrederic Weisbecker }
559c44fc770SFrederic Weisbecker 
56007b139c8SLi Zefan #ifdef CONFIG_PERF_EVENTS
56119007a67SFrederic Weisbecker 
56297d5a220SFrederic Weisbecker static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
56397d5a220SFrederic Weisbecker static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
56497d5a220SFrederic Weisbecker static int sys_perf_refcount_enter;
56597d5a220SFrederic Weisbecker static int sys_perf_refcount_exit;
566f4b5ffccSJason Baron 
perf_call_bpf_enter(struct trace_event_call * call,struct pt_regs * regs,struct syscall_metadata * sys_data,struct syscall_trace_enter * rec)567e87c6bc3SYonghong Song static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs,
568cf5f5ceaSYonghong Song 			       struct syscall_metadata *sys_data,
569e87c6bc3SYonghong Song 			       struct syscall_trace_enter *rec)
570e87c6bc3SYonghong Song {
571cf5f5ceaSYonghong Song 	struct syscall_tp_t {
572d3c4db86SYauheni Kaliuta 		struct trace_entry ent;
573ba8ea723SArtem Savkov 		int syscall_nr;
574609320c8SYonghong Song 		unsigned long args[SYSCALL_DEFINE_MAXARGS];
575d3c4db86SYauheni Kaliuta 	} __aligned(8) param;
576cf5f5ceaSYonghong Song 	int i;
577cf5f5ceaSYonghong Song 
578d3c4db86SYauheni Kaliuta 	BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *));
579d3c4db86SYauheni Kaliuta 
580d3c4db86SYauheni Kaliuta 	/* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. &param) */
581376bd59eSYonghong Song 	perf_fetch_caller_regs(regs);
582cf5f5ceaSYonghong Song 	*(struct pt_regs **)&param = regs;
583cf5f5ceaSYonghong Song 	param.syscall_nr = rec->nr;
584cf5f5ceaSYonghong Song 	for (i = 0; i < sys_data->nb_args; i++)
585cf5f5ceaSYonghong Song 		param.args[i] = rec->args[i];
586e87c6bc3SYonghong Song 	return trace_call_bpf(call, &param);
587cf5f5ceaSYonghong Song }
588cf5f5ceaSYonghong Song 
perf_syscall_enter(void * ignore,struct pt_regs * regs,long id)58938516ab5SSteven Rostedt static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
590f4b5ffccSJason Baron {
591f4b5ffccSJason Baron 	struct syscall_metadata *sys_data;
59220ab4425SFrederic Weisbecker 	struct syscall_trace_enter *rec;
593376bd59eSYonghong Song 	struct pt_regs *fake_regs;
5941c024ecaSPeter Zijlstra 	struct hlist_head *head;
595d08e4113SSteven Rostedt (Red Hat) 	unsigned long args[6];
596e87c6bc3SYonghong Song 	bool valid_prog_array;
597f4b5ffccSJason Baron 	int syscall_nr;
5984ed7c92dSPeter Zijlstra 	int rctx;
59919007a67SFrederic Weisbecker 	int size;
600f4b5ffccSJason Baron 
60165e7462aSMathieu Desnoyers 	/*
60265e7462aSMathieu Desnoyers 	 * Syscall probe called with preemption enabled, but the ring
60365e7462aSMathieu Desnoyers 	 * buffer and per-cpu data require preemption to be disabled.
60465e7462aSMathieu Desnoyers 	 */
605*cdb537acSMathieu Desnoyers 	might_fault();
60665e7462aSMathieu Desnoyers 	guard(preempt_notrace)();
60765e7462aSMathieu Desnoyers 
608f431b634SSteven Rostedt 	syscall_nr = trace_get_syscall_nr(current, regs);
609086ba77aSRabin Vincent 	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
61060916a93SWill Deacon 		return;
61197d5a220SFrederic Weisbecker 	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
612f4b5ffccSJason Baron 		return;
613f4b5ffccSJason Baron 
614f4b5ffccSJason Baron 	sys_data = syscall_nr_to_meta(syscall_nr);
615f4b5ffccSJason Baron 	if (!sys_data)
616f4b5ffccSJason Baron 		return;
617f4b5ffccSJason Baron 
618421c7860SOleg Nesterov 	head = this_cpu_ptr(sys_data->enter_event->perf_events);
619e87c6bc3SYonghong Song 	valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
620e87c6bc3SYonghong Song 	if (!valid_prog_array && hlist_empty(head))
621421c7860SOleg Nesterov 		return;
622421c7860SOleg Nesterov 
62319007a67SFrederic Weisbecker 	/* get the size after alignment with the u32 buffer size field */
62419007a67SFrederic Weisbecker 	size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
62519007a67SFrederic Weisbecker 	size = ALIGN(size + sizeof(u32), sizeof(u64));
62619007a67SFrederic Weisbecker 	size -= sizeof(u32);
62719007a67SFrederic Weisbecker 
628376bd59eSYonghong Song 	rec = perf_trace_buf_alloc(size, &fake_regs, &rctx);
629430ad5a6SXiao Guangrong 	if (!rec)
630430ad5a6SXiao Guangrong 		return;
63120ab4425SFrederic Weisbecker 
632e6971969SLi Zefan 	rec->nr = syscall_nr;
633b35f549dSSteven Rostedt (Red Hat) 	syscall_get_arguments(current, regs, args);
634d08e4113SSteven Rostedt (Red Hat) 	memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
635cf5f5ceaSYonghong Song 
636e87c6bc3SYonghong Song 	if ((valid_prog_array &&
637376bd59eSYonghong Song 	     !perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) ||
638cf5f5ceaSYonghong Song 	    hlist_empty(head)) {
639cf5f5ceaSYonghong Song 		perf_swevent_put_recursion_context(rctx);
640cf5f5ceaSYonghong Song 		return;
641cf5f5ceaSYonghong Song 	}
642cf5f5ceaSYonghong Song 
6431e1dcd93SAlexei Starovoitov 	perf_trace_buf_submit(rec, size, rctx,
6441e1dcd93SAlexei Starovoitov 			      sys_data->enter_event->event.type, 1, regs,
6458fd0fbbeSPeter Zijlstra 			      head, NULL);
646f4b5ffccSJason Baron }
647f4b5ffccSJason Baron 
perf_sysenter_enable(struct trace_event_call * call)6482425bcb9SSteven Rostedt (Red Hat) static int perf_sysenter_enable(struct trace_event_call *call)
649f4b5ffccSJason Baron {
650f4b5ffccSJason Baron 	int ret = 0;
651f4b5ffccSJason Baron 	int num;
652f4b5ffccSJason Baron 
6533bbe84e9SLai Jiangshan 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
654f4b5ffccSJason Baron 
655f4b5ffccSJason Baron 	mutex_lock(&syscall_trace_lock);
65697d5a220SFrederic Weisbecker 	if (!sys_perf_refcount_enter)
65738516ab5SSteven Rostedt 		ret = register_trace_sys_enter(perf_syscall_enter, NULL);
658f4b5ffccSJason Baron 	if (ret) {
659d282b9c0SColin Ian King 		pr_info("event trace: Could not activate syscall entry trace point");
660f4b5ffccSJason Baron 	} else {
66197d5a220SFrederic Weisbecker 		set_bit(num, enabled_perf_enter_syscalls);
66297d5a220SFrederic Weisbecker 		sys_perf_refcount_enter++;
663f4b5ffccSJason Baron 	}
664f4b5ffccSJason Baron 	mutex_unlock(&syscall_trace_lock);
665f4b5ffccSJason Baron 	return ret;
666f4b5ffccSJason Baron }
667f4b5ffccSJason Baron 
perf_sysenter_disable(struct trace_event_call * call)6682425bcb9SSteven Rostedt (Red Hat) static void perf_sysenter_disable(struct trace_event_call *call)
669f4b5ffccSJason Baron {
670f4b5ffccSJason Baron 	int num;
671f4b5ffccSJason Baron 
6723bbe84e9SLai Jiangshan 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
673f4b5ffccSJason Baron 
674f4b5ffccSJason Baron 	mutex_lock(&syscall_trace_lock);
67597d5a220SFrederic Weisbecker 	sys_perf_refcount_enter--;
67697d5a220SFrederic Weisbecker 	clear_bit(num, enabled_perf_enter_syscalls);
67797d5a220SFrederic Weisbecker 	if (!sys_perf_refcount_enter)
67838516ab5SSteven Rostedt 		unregister_trace_sys_enter(perf_syscall_enter, NULL);
679f4b5ffccSJason Baron 	mutex_unlock(&syscall_trace_lock);
680f4b5ffccSJason Baron }
681f4b5ffccSJason Baron 
perf_call_bpf_exit(struct trace_event_call * call,struct pt_regs * regs,struct syscall_trace_exit * rec)682e87c6bc3SYonghong Song static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
683e87c6bc3SYonghong Song 			      struct syscall_trace_exit *rec)
684e87c6bc3SYonghong Song {
685cf5f5ceaSYonghong Song 	struct syscall_tp_t {
686d3c4db86SYauheni Kaliuta 		struct trace_entry ent;
687ba8ea723SArtem Savkov 		int syscall_nr;
688cf5f5ceaSYonghong Song 		unsigned long ret;
689d3c4db86SYauheni Kaliuta 	} __aligned(8) param;
690cf5f5ceaSYonghong Song 
691d3c4db86SYauheni Kaliuta 	/* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. &param) */
692376bd59eSYonghong Song 	perf_fetch_caller_regs(regs);
693cf5f5ceaSYonghong Song 	*(struct pt_regs **)&param = regs;
694cf5f5ceaSYonghong Song 	param.syscall_nr = rec->nr;
695cf5f5ceaSYonghong Song 	param.ret = rec->ret;
696e87c6bc3SYonghong Song 	return trace_call_bpf(call, &param);
697cf5f5ceaSYonghong Song }
698cf5f5ceaSYonghong Song 
perf_syscall_exit(void * ignore,struct pt_regs * regs,long ret)69938516ab5SSteven Rostedt static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
700f4b5ffccSJason Baron {
701f4b5ffccSJason Baron 	struct syscall_metadata *sys_data;
70220ab4425SFrederic Weisbecker 	struct syscall_trace_exit *rec;
703376bd59eSYonghong Song 	struct pt_regs *fake_regs;
7041c024ecaSPeter Zijlstra 	struct hlist_head *head;
705e87c6bc3SYonghong Song 	bool valid_prog_array;
706f4b5ffccSJason Baron 	int syscall_nr;
7074ed7c92dSPeter Zijlstra 	int rctx;
70820ab4425SFrederic Weisbecker 	int size;
709f4b5ffccSJason Baron 
71065e7462aSMathieu Desnoyers 	/*
71165e7462aSMathieu Desnoyers 	 * Syscall probe called with preemption enabled, but the ring
71265e7462aSMathieu Desnoyers 	 * buffer and per-cpu data require preemption to be disabled.
71365e7462aSMathieu Desnoyers 	 */
714*cdb537acSMathieu Desnoyers 	might_fault();
71565e7462aSMathieu Desnoyers 	guard(preempt_notrace)();
71665e7462aSMathieu Desnoyers 
717f431b634SSteven Rostedt 	syscall_nr = trace_get_syscall_nr(current, regs);
718086ba77aSRabin Vincent 	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
71960916a93SWill Deacon 		return;
72097d5a220SFrederic Weisbecker 	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
721f4b5ffccSJason Baron 		return;
722f4b5ffccSJason Baron 
723f4b5ffccSJason Baron 	sys_data = syscall_nr_to_meta(syscall_nr);
724f4b5ffccSJason Baron 	if (!sys_data)
725f4b5ffccSJason Baron 		return;
726f4b5ffccSJason Baron 
727421c7860SOleg Nesterov 	head = this_cpu_ptr(sys_data->exit_event->perf_events);
728e87c6bc3SYonghong Song 	valid_prog_array = bpf_prog_array_valid(sys_data->exit_event);
729e87c6bc3SYonghong Song 	if (!valid_prog_array && hlist_empty(head))
730421c7860SOleg Nesterov 		return;
731421c7860SOleg Nesterov 
73220ab4425SFrederic Weisbecker 	/* We can probably do that at build time */
73320ab4425SFrederic Weisbecker 	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
73420ab4425SFrederic Weisbecker 	size -= sizeof(u32);
73519007a67SFrederic Weisbecker 
736376bd59eSYonghong Song 	rec = perf_trace_buf_alloc(size, &fake_regs, &rctx);
737430ad5a6SXiao Guangrong 	if (!rec)
738430ad5a6SXiao Guangrong 		return;
739ce71b9dfSFrederic Weisbecker 
74020ab4425SFrederic Weisbecker 	rec->nr = syscall_nr;
74120ab4425SFrederic Weisbecker 	rec->ret = syscall_get_return_value(current, regs);
742cf5f5ceaSYonghong Song 
743e87c6bc3SYonghong Song 	if ((valid_prog_array &&
744376bd59eSYonghong Song 	     !perf_call_bpf_exit(sys_data->exit_event, fake_regs, rec)) ||
745cf5f5ceaSYonghong Song 	    hlist_empty(head)) {
746cf5f5ceaSYonghong Song 		perf_swevent_put_recursion_context(rctx);
747cf5f5ceaSYonghong Song 		return;
748cf5f5ceaSYonghong Song 	}
749cf5f5ceaSYonghong Song 
7501e1dcd93SAlexei Starovoitov 	perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
7518fd0fbbeSPeter Zijlstra 			      1, regs, head, NULL);
752f4b5ffccSJason Baron }
753f4b5ffccSJason Baron 
perf_sysexit_enable(struct trace_event_call * call)7542425bcb9SSteven Rostedt (Red Hat) static int perf_sysexit_enable(struct trace_event_call *call)
755f4b5ffccSJason Baron {
756f4b5ffccSJason Baron 	int ret = 0;
757f4b5ffccSJason Baron 	int num;
758f4b5ffccSJason Baron 
7593bbe84e9SLai Jiangshan 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
760f4b5ffccSJason Baron 
761f4b5ffccSJason Baron 	mutex_lock(&syscall_trace_lock);
76297d5a220SFrederic Weisbecker 	if (!sys_perf_refcount_exit)
76338516ab5SSteven Rostedt 		ret = register_trace_sys_exit(perf_syscall_exit, NULL);
764f4b5ffccSJason Baron 	if (ret) {
765d282b9c0SColin Ian King 		pr_info("event trace: Could not activate syscall exit trace point");
766f4b5ffccSJason Baron 	} else {
76797d5a220SFrederic Weisbecker 		set_bit(num, enabled_perf_exit_syscalls);
76897d5a220SFrederic Weisbecker 		sys_perf_refcount_exit++;
769f4b5ffccSJason Baron 	}
770f4b5ffccSJason Baron 	mutex_unlock(&syscall_trace_lock);
771f4b5ffccSJason Baron 	return ret;
772f4b5ffccSJason Baron }
773f4b5ffccSJason Baron 
perf_sysexit_disable(struct trace_event_call * call)7742425bcb9SSteven Rostedt (Red Hat) static void perf_sysexit_disable(struct trace_event_call *call)
775f4b5ffccSJason Baron {
776f4b5ffccSJason Baron 	int num;
777f4b5ffccSJason Baron 
7783bbe84e9SLai Jiangshan 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
779f4b5ffccSJason Baron 
780f4b5ffccSJason Baron 	mutex_lock(&syscall_trace_lock);
78197d5a220SFrederic Weisbecker 	sys_perf_refcount_exit--;
78297d5a220SFrederic Weisbecker 	clear_bit(num, enabled_perf_exit_syscalls);
78397d5a220SFrederic Weisbecker 	if (!sys_perf_refcount_exit)
78438516ab5SSteven Rostedt 		unregister_trace_sys_exit(perf_syscall_exit, NULL);
785f4b5ffccSJason Baron 	mutex_unlock(&syscall_trace_lock);
786f4b5ffccSJason Baron }
787f4b5ffccSJason Baron 
78807b139c8SLi Zefan #endif /* CONFIG_PERF_EVENTS */
789f4b5ffccSJason Baron 
syscall_enter_register(struct trace_event_call * event,enum trace_reg type,void * data)7902425bcb9SSteven Rostedt (Red Hat) static int syscall_enter_register(struct trace_event_call *event,
791ceec0b6fSJiri Olsa 				 enum trace_reg type, void *data)
7922239291aSSteven Rostedt {
7937f1d2f82SSteven Rostedt (Red Hat) 	struct trace_event_file *file = data;
79412ab74eeSSteven Rostedt 
7952239291aSSteven Rostedt 	switch (type) {
7962239291aSSteven Rostedt 	case TRACE_REG_REGISTER:
79712ab74eeSSteven Rostedt 		return reg_event_syscall_enter(file, event);
7982239291aSSteven Rostedt 	case TRACE_REG_UNREGISTER:
79912ab74eeSSteven Rostedt 		unreg_event_syscall_enter(file, event);
8002239291aSSteven Rostedt 		return 0;
8012239291aSSteven Rostedt 
8022239291aSSteven Rostedt #ifdef CONFIG_PERF_EVENTS
8032239291aSSteven Rostedt 	case TRACE_REG_PERF_REGISTER:
8042239291aSSteven Rostedt 		return perf_sysenter_enable(event);
8052239291aSSteven Rostedt 	case TRACE_REG_PERF_UNREGISTER:
8062239291aSSteven Rostedt 		perf_sysenter_disable(event);
8072239291aSSteven Rostedt 		return 0;
808ceec0b6fSJiri Olsa 	case TRACE_REG_PERF_OPEN:
809ceec0b6fSJiri Olsa 	case TRACE_REG_PERF_CLOSE:
810489c75c3SJiri Olsa 	case TRACE_REG_PERF_ADD:
811489c75c3SJiri Olsa 	case TRACE_REG_PERF_DEL:
812ceec0b6fSJiri Olsa 		return 0;
8132239291aSSteven Rostedt #endif
8142239291aSSteven Rostedt 	}
8152239291aSSteven Rostedt 	return 0;
8162239291aSSteven Rostedt }
8172239291aSSteven Rostedt 
syscall_exit_register(struct trace_event_call * event,enum trace_reg type,void * data)8182425bcb9SSteven Rostedt (Red Hat) static int syscall_exit_register(struct trace_event_call *event,
819ceec0b6fSJiri Olsa 				 enum trace_reg type, void *data)
8202239291aSSteven Rostedt {
8217f1d2f82SSteven Rostedt (Red Hat) 	struct trace_event_file *file = data;
82212ab74eeSSteven Rostedt 
8232239291aSSteven Rostedt 	switch (type) {
8242239291aSSteven Rostedt 	case TRACE_REG_REGISTER:
82512ab74eeSSteven Rostedt 		return reg_event_syscall_exit(file, event);
8262239291aSSteven Rostedt 	case TRACE_REG_UNREGISTER:
82712ab74eeSSteven Rostedt 		unreg_event_syscall_exit(file, event);
8282239291aSSteven Rostedt 		return 0;
8292239291aSSteven Rostedt 
8302239291aSSteven Rostedt #ifdef CONFIG_PERF_EVENTS
8312239291aSSteven Rostedt 	case TRACE_REG_PERF_REGISTER:
8322239291aSSteven Rostedt 		return perf_sysexit_enable(event);
8332239291aSSteven Rostedt 	case TRACE_REG_PERF_UNREGISTER:
8342239291aSSteven Rostedt 		perf_sysexit_disable(event);
8352239291aSSteven Rostedt 		return 0;
836ceec0b6fSJiri Olsa 	case TRACE_REG_PERF_OPEN:
837ceec0b6fSJiri Olsa 	case TRACE_REG_PERF_CLOSE:
838489c75c3SJiri Olsa 	case TRACE_REG_PERF_ADD:
839489c75c3SJiri Olsa 	case TRACE_REG_PERF_DEL:
840ceec0b6fSJiri Olsa 		return 0;
8412239291aSSteven Rostedt #endif
8422239291aSSteven Rostedt 	}
8432239291aSSteven Rostedt 	return 0;
8442239291aSSteven Rostedt }
845