1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/kernel.h> 4 #include <linux/ftrace.h> 5 #include <linux/perf_event.h> 6 #include <asm/syscall.h> 7 8 #include "trace_output.h" 9 #include "trace.h" 10 11 static DEFINE_MUTEX(syscall_trace_lock); 12 static int sys_refcount_enter; 13 static int sys_refcount_exit; 14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16 17 extern unsigned long __start_syscalls_metadata[]; 18 extern unsigned long __stop_syscalls_metadata[]; 19 20 static struct syscall_metadata **syscalls_metadata; 21 22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 23 { 24 struct syscall_metadata *start; 25 struct syscall_metadata *stop; 26 char str[KSYM_SYMBOL_LEN]; 27 28 29 start = (struct syscall_metadata *)__start_syscalls_metadata; 30 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 31 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 32 33 for ( ; start < stop; start++) { 34 /* 35 * Only compare after the "sys" prefix. Archs that use 36 * syscall wrappers may have syscalls symbols aliases prefixed 37 * with "SyS" instead of "sys", leading to an unwanted 38 * mismatch. 39 */ 40 if (start->name && !strcmp(start->name + 3, str + 3)) 41 return start; 42 } 43 return NULL; 44 } 45 46 static struct syscall_metadata *syscall_nr_to_meta(int nr) 47 { 48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 49 return NULL; 50 51 return syscalls_metadata[nr]; 52 } 53 54 enum print_line_t 55 print_syscall_enter(struct trace_iterator *iter, int flags) 56 { 57 struct trace_seq *s = &iter->seq; 58 struct trace_entry *ent = iter->ent; 59 struct syscall_trace_enter *trace; 60 struct syscall_metadata *entry; 61 int i, ret, syscall; 62 63 trace = (typeof(trace))ent; 64 syscall = trace->nr; 65 entry = syscall_nr_to_meta(syscall); 66 67 if (!entry) 68 goto end; 69 70 if (entry->enter_event->id != ent->type) { 71 WARN_ON_ONCE(1); 72 goto end; 73 } 74 75 ret = trace_seq_printf(s, "%s(", entry->name); 76 if (!ret) 77 return TRACE_TYPE_PARTIAL_LINE; 78 79 for (i = 0; i < entry->nb_args; i++) { 80 /* parameter types */ 81 if (trace_flags & TRACE_ITER_VERBOSE) { 82 ret = trace_seq_printf(s, "%s ", entry->types[i]); 83 if (!ret) 84 return TRACE_TYPE_PARTIAL_LINE; 85 } 86 /* parameter values */ 87 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 88 trace->args[i], 89 i == entry->nb_args - 1 ? "" : ", "); 90 if (!ret) 91 return TRACE_TYPE_PARTIAL_LINE; 92 } 93 94 ret = trace_seq_putc(s, ')'); 95 if (!ret) 96 return TRACE_TYPE_PARTIAL_LINE; 97 98 end: 99 ret = trace_seq_putc(s, '\n'); 100 if (!ret) 101 return TRACE_TYPE_PARTIAL_LINE; 102 103 return TRACE_TYPE_HANDLED; 104 } 105 106 enum print_line_t 107 print_syscall_exit(struct trace_iterator *iter, int flags) 108 { 109 struct trace_seq *s = &iter->seq; 110 struct trace_entry *ent = iter->ent; 111 struct syscall_trace_exit *trace; 112 int syscall; 113 struct syscall_metadata *entry; 114 int ret; 115 116 trace = (typeof(trace))ent; 117 syscall = trace->nr; 118 entry = syscall_nr_to_meta(syscall); 119 120 if (!entry) { 121 trace_seq_printf(s, "\n"); 122 return TRACE_TYPE_HANDLED; 123 } 124 125 if (entry->exit_event->id != ent->type) { 126 WARN_ON_ONCE(1); 127 return TRACE_TYPE_UNHANDLED; 128 } 129 130 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 131 trace->ret); 132 if (!ret) 133 return TRACE_TYPE_PARTIAL_LINE; 134 135 return TRACE_TYPE_HANDLED; 136 } 137 138 extern char *__bad_type_size(void); 139 140 #define SYSCALL_FIELD(type, name) \ 141 sizeof(type) != sizeof(trace.name) ? \ 142 __bad_type_size() : \ 143 #type, #name, offsetof(typeof(trace), name), \ 144 sizeof(trace.name), is_signed_type(type) 145 146 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 147 { 148 int i; 149 int ret; 150 struct syscall_metadata *entry = call->data; 151 struct syscall_trace_enter trace; 152 int offset = offsetof(struct syscall_trace_enter, args); 153 154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 155 "\tsigned:%u;\n", 156 SYSCALL_FIELD(int, nr)); 157 if (!ret) 158 return 0; 159 160 for (i = 0; i < entry->nb_args; i++) { 161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 162 entry->args[i]); 163 if (!ret) 164 return 0; 165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" 166 "\tsigned:%u;\n", offset, 167 sizeof(unsigned long), 168 is_signed_type(unsigned long)); 169 if (!ret) 170 return 0; 171 offset += sizeof(unsigned long); 172 } 173 174 trace_seq_puts(s, "\nprint fmt: \""); 175 for (i = 0; i < entry->nb_args; i++) { 176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 177 sizeof(unsigned long), 178 i == entry->nb_args - 1 ? "" : ", "); 179 if (!ret) 180 return 0; 181 } 182 trace_seq_putc(s, '"'); 183 184 for (i = 0; i < entry->nb_args; i++) { 185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", 186 entry->args[i]); 187 if (!ret) 188 return 0; 189 } 190 191 return trace_seq_putc(s, '\n'); 192 } 193 194 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 195 { 196 int ret; 197 struct syscall_trace_exit trace; 198 199 ret = trace_seq_printf(s, 200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 201 "\tsigned:%u;\n" 202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 203 "\tsigned:%u;\n", 204 SYSCALL_FIELD(int, nr), 205 SYSCALL_FIELD(long, ret)); 206 if (!ret) 207 return 0; 208 209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 210 } 211 212 int syscall_enter_define_fields(struct ftrace_event_call *call) 213 { 214 struct syscall_trace_enter trace; 215 struct syscall_metadata *meta = call->data; 216 int ret; 217 int i; 218 int offset = offsetof(typeof(trace), args); 219 220 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 221 if (ret) 222 return ret; 223 224 for (i = 0; i < meta->nb_args; i++) { 225 ret = trace_define_field(call, meta->types[i], 226 meta->args[i], offset, 227 sizeof(unsigned long), 0, 228 FILTER_OTHER); 229 offset += sizeof(unsigned long); 230 } 231 232 return ret; 233 } 234 235 int syscall_exit_define_fields(struct ftrace_event_call *call) 236 { 237 struct syscall_trace_exit trace; 238 int ret; 239 240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 241 if (ret) 242 return ret; 243 244 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 245 FILTER_OTHER); 246 247 return ret; 248 } 249 250 void ftrace_syscall_enter(struct pt_regs *regs, long id) 251 { 252 struct syscall_trace_enter *entry; 253 struct syscall_metadata *sys_data; 254 struct ring_buffer_event *event; 255 struct ring_buffer *buffer; 256 int size; 257 int syscall_nr; 258 259 syscall_nr = syscall_get_nr(current, regs); 260 if (syscall_nr < 0) 261 return; 262 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 263 return; 264 265 sys_data = syscall_nr_to_meta(syscall_nr); 266 if (!sys_data) 267 return; 268 269 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 270 271 event = trace_current_buffer_lock_reserve(&buffer, 272 sys_data->enter_event->id, size, 0, 0); 273 if (!event) 274 return; 275 276 entry = ring_buffer_event_data(event); 277 entry->nr = syscall_nr; 278 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 279 280 if (!filter_current_check_discard(buffer, sys_data->enter_event, 281 entry, event)) 282 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 283 } 284 285 void ftrace_syscall_exit(struct pt_regs *regs, long ret) 286 { 287 struct syscall_trace_exit *entry; 288 struct syscall_metadata *sys_data; 289 struct ring_buffer_event *event; 290 struct ring_buffer *buffer; 291 int syscall_nr; 292 293 syscall_nr = syscall_get_nr(current, regs); 294 if (syscall_nr < 0) 295 return; 296 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 297 return; 298 299 sys_data = syscall_nr_to_meta(syscall_nr); 300 if (!sys_data) 301 return; 302 303 event = trace_current_buffer_lock_reserve(&buffer, 304 sys_data->exit_event->id, sizeof(*entry), 0, 0); 305 if (!event) 306 return; 307 308 entry = ring_buffer_event_data(event); 309 entry->nr = syscall_nr; 310 entry->ret = syscall_get_return_value(current, regs); 311 312 if (!filter_current_check_discard(buffer, sys_data->exit_event, 313 entry, event)) 314 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 315 } 316 317 int reg_event_syscall_enter(struct ftrace_event_call *call) 318 { 319 int ret = 0; 320 int num; 321 322 num = ((struct syscall_metadata *)call->data)->syscall_nr; 323 if (num < 0 || num >= NR_syscalls) 324 return -ENOSYS; 325 mutex_lock(&syscall_trace_lock); 326 if (!sys_refcount_enter) 327 ret = register_trace_sys_enter(ftrace_syscall_enter); 328 if (!ret) { 329 set_bit(num, enabled_enter_syscalls); 330 sys_refcount_enter++; 331 } 332 mutex_unlock(&syscall_trace_lock); 333 return ret; 334 } 335 336 void unreg_event_syscall_enter(struct ftrace_event_call *call) 337 { 338 int num; 339 340 num = ((struct syscall_metadata *)call->data)->syscall_nr; 341 if (num < 0 || num >= NR_syscalls) 342 return; 343 mutex_lock(&syscall_trace_lock); 344 sys_refcount_enter--; 345 clear_bit(num, enabled_enter_syscalls); 346 if (!sys_refcount_enter) 347 unregister_trace_sys_enter(ftrace_syscall_enter); 348 mutex_unlock(&syscall_trace_lock); 349 } 350 351 int reg_event_syscall_exit(struct ftrace_event_call *call) 352 { 353 int ret = 0; 354 int num; 355 356 num = ((struct syscall_metadata *)call->data)->syscall_nr; 357 if (num < 0 || num >= NR_syscalls) 358 return -ENOSYS; 359 mutex_lock(&syscall_trace_lock); 360 if (!sys_refcount_exit) 361 ret = register_trace_sys_exit(ftrace_syscall_exit); 362 if (!ret) { 363 set_bit(num, enabled_exit_syscalls); 364 sys_refcount_exit++; 365 } 366 mutex_unlock(&syscall_trace_lock); 367 return ret; 368 } 369 370 void unreg_event_syscall_exit(struct ftrace_event_call *call) 371 { 372 int num; 373 374 num = ((struct syscall_metadata *)call->data)->syscall_nr; 375 if (num < 0 || num >= NR_syscalls) 376 return; 377 mutex_lock(&syscall_trace_lock); 378 sys_refcount_exit--; 379 clear_bit(num, enabled_exit_syscalls); 380 if (!sys_refcount_exit) 381 unregister_trace_sys_exit(ftrace_syscall_exit); 382 mutex_unlock(&syscall_trace_lock); 383 } 384 385 int init_syscall_trace(struct ftrace_event_call *call) 386 { 387 int id; 388 389 id = register_ftrace_event(call->event); 390 if (!id) 391 return -ENODEV; 392 call->id = id; 393 INIT_LIST_HEAD(&call->fields); 394 return 0; 395 } 396 397 int __init init_ftrace_syscalls(void) 398 { 399 struct syscall_metadata *meta; 400 unsigned long addr; 401 int i; 402 403 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 404 NR_syscalls, GFP_KERNEL); 405 if (!syscalls_metadata) { 406 WARN_ON(1); 407 return -ENOMEM; 408 } 409 410 for (i = 0; i < NR_syscalls; i++) { 411 addr = arch_syscall_addr(i); 412 meta = find_syscall_meta(addr); 413 if (!meta) 414 continue; 415 416 meta->syscall_nr = i; 417 syscalls_metadata[i] = meta; 418 } 419 420 return 0; 421 } 422 core_initcall(init_ftrace_syscalls); 423 424 #ifdef CONFIG_PERF_EVENTS 425 426 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 427 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 428 static int sys_prof_refcount_enter; 429 static int sys_prof_refcount_exit; 430 431 static void prof_syscall_enter(struct pt_regs *regs, long id) 432 { 433 struct syscall_metadata *sys_data; 434 struct syscall_trace_enter *rec; 435 unsigned long flags; 436 int syscall_nr; 437 int rctx; 438 int size; 439 440 syscall_nr = syscall_get_nr(current, regs); 441 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 442 return; 443 444 sys_data = syscall_nr_to_meta(syscall_nr); 445 if (!sys_data) 446 return; 447 448 /* get the size after alignment with the u32 buffer size field */ 449 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 450 size = ALIGN(size + sizeof(u32), sizeof(u64)); 451 size -= sizeof(u32); 452 453 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 454 "profile buffer not large enough")) 455 return; 456 457 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, 458 sys_data->enter_event->id, &rctx, &flags); 459 if (!rec) 460 return; 461 462 rec->nr = syscall_nr; 463 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 464 (unsigned long *)&rec->args); 465 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 466 } 467 468 int prof_sysenter_enable(struct ftrace_event_call *call) 469 { 470 int ret = 0; 471 int num; 472 473 num = ((struct syscall_metadata *)call->data)->syscall_nr; 474 475 mutex_lock(&syscall_trace_lock); 476 if (!sys_prof_refcount_enter) 477 ret = register_trace_sys_enter(prof_syscall_enter); 478 if (ret) { 479 pr_info("event trace: Could not activate" 480 "syscall entry trace point"); 481 } else { 482 set_bit(num, enabled_prof_enter_syscalls); 483 sys_prof_refcount_enter++; 484 } 485 mutex_unlock(&syscall_trace_lock); 486 return ret; 487 } 488 489 void prof_sysenter_disable(struct ftrace_event_call *call) 490 { 491 int num; 492 493 num = ((struct syscall_metadata *)call->data)->syscall_nr; 494 495 mutex_lock(&syscall_trace_lock); 496 sys_prof_refcount_enter--; 497 clear_bit(num, enabled_prof_enter_syscalls); 498 if (!sys_prof_refcount_enter) 499 unregister_trace_sys_enter(prof_syscall_enter); 500 mutex_unlock(&syscall_trace_lock); 501 } 502 503 static void prof_syscall_exit(struct pt_regs *regs, long ret) 504 { 505 struct syscall_metadata *sys_data; 506 struct syscall_trace_exit *rec; 507 unsigned long flags; 508 int syscall_nr; 509 int rctx; 510 int size; 511 512 syscall_nr = syscall_get_nr(current, regs); 513 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 514 return; 515 516 sys_data = syscall_nr_to_meta(syscall_nr); 517 if (!sys_data) 518 return; 519 520 /* We can probably do that at build time */ 521 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 522 size -= sizeof(u32); 523 524 /* 525 * Impossible, but be paranoid with the future 526 * How to put this check outside runtime? 527 */ 528 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 529 "exit event has grown above profile buffer size")) 530 return; 531 532 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, 533 sys_data->exit_event->id, &rctx, &flags); 534 if (!rec) 535 return; 536 537 rec->nr = syscall_nr; 538 rec->ret = syscall_get_return_value(current, regs); 539 540 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 541 } 542 543 int prof_sysexit_enable(struct ftrace_event_call *call) 544 { 545 int ret = 0; 546 int num; 547 548 num = ((struct syscall_metadata *)call->data)->syscall_nr; 549 550 mutex_lock(&syscall_trace_lock); 551 if (!sys_prof_refcount_exit) 552 ret = register_trace_sys_exit(prof_syscall_exit); 553 if (ret) { 554 pr_info("event trace: Could not activate" 555 "syscall entry trace point"); 556 } else { 557 set_bit(num, enabled_prof_exit_syscalls); 558 sys_prof_refcount_exit++; 559 } 560 mutex_unlock(&syscall_trace_lock); 561 return ret; 562 } 563 564 void prof_sysexit_disable(struct ftrace_event_call *call) 565 { 566 int num; 567 568 num = ((struct syscall_metadata *)call->data)->syscall_nr; 569 570 mutex_lock(&syscall_trace_lock); 571 sys_prof_refcount_exit--; 572 clear_bit(num, enabled_prof_exit_syscalls); 573 if (!sys_prof_refcount_exit) 574 unregister_trace_sys_exit(prof_syscall_exit); 575 mutex_unlock(&syscall_trace_lock); 576 } 577 578 #endif /* CONFIG_PERF_EVENTS */ 579 580