1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/kernel.h> 4 #include <linux/ftrace.h> 5 #include <linux/perf_event.h> 6 #include <asm/syscall.h> 7 8 #include "trace_output.h" 9 #include "trace.h" 10 11 static DEFINE_MUTEX(syscall_trace_lock); 12 static int sys_refcount_enter; 13 static int sys_refcount_exit; 14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16 17 extern unsigned long __start_syscalls_metadata[]; 18 extern unsigned long __stop_syscalls_metadata[]; 19 20 static struct syscall_metadata **syscalls_metadata; 21 22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 23 { 24 struct syscall_metadata *start; 25 struct syscall_metadata *stop; 26 char str[KSYM_SYMBOL_LEN]; 27 28 29 start = (struct syscall_metadata *)__start_syscalls_metadata; 30 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 31 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 32 33 for ( ; start < stop; start++) { 34 /* 35 * Only compare after the "sys" prefix. Archs that use 36 * syscall wrappers may have syscalls symbols aliases prefixed 37 * with "SyS" instead of "sys", leading to an unwanted 38 * mismatch. 39 */ 40 if (start->name && !strcmp(start->name + 3, str + 3)) 41 return start; 42 } 43 return NULL; 44 } 45 46 static struct syscall_metadata *syscall_nr_to_meta(int nr) 47 { 48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 49 return NULL; 50 51 return syscalls_metadata[nr]; 52 } 53 54 int syscall_name_to_nr(char *name) 55 { 56 int i; 57 58 if (!syscalls_metadata) 59 return -1; 60 61 for (i = 0; i < NR_syscalls; i++) { 62 if (syscalls_metadata[i]) { 63 if (!strcmp(syscalls_metadata[i]->name, name)) 64 return i; 65 } 66 } 67 return -1; 68 } 69 70 void set_syscall_enter_id(int num, int id) 71 { 72 syscalls_metadata[num]->enter_id = id; 73 } 74 75 void set_syscall_exit_id(int num, int id) 76 { 77 syscalls_metadata[num]->exit_id = id; 78 } 79 80 enum print_line_t 81 print_syscall_enter(struct trace_iterator *iter, int flags) 82 { 83 struct trace_seq *s = &iter->seq; 84 struct trace_entry *ent = iter->ent; 85 struct syscall_trace_enter *trace; 86 struct syscall_metadata *entry; 87 int i, ret, syscall; 88 89 trace = (typeof(trace))ent; 90 syscall = trace->nr; 91 entry = syscall_nr_to_meta(syscall); 92 93 if (!entry) 94 goto end; 95 96 if (entry->enter_id != ent->type) { 97 WARN_ON_ONCE(1); 98 goto end; 99 } 100 101 ret = trace_seq_printf(s, "%s(", entry->name); 102 if (!ret) 103 return TRACE_TYPE_PARTIAL_LINE; 104 105 for (i = 0; i < entry->nb_args; i++) { 106 /* parameter types */ 107 if (trace_flags & TRACE_ITER_VERBOSE) { 108 ret = trace_seq_printf(s, "%s ", entry->types[i]); 109 if (!ret) 110 return TRACE_TYPE_PARTIAL_LINE; 111 } 112 /* parameter values */ 113 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 114 trace->args[i], 115 i == entry->nb_args - 1 ? "" : ", "); 116 if (!ret) 117 return TRACE_TYPE_PARTIAL_LINE; 118 } 119 120 ret = trace_seq_putc(s, ')'); 121 if (!ret) 122 return TRACE_TYPE_PARTIAL_LINE; 123 124 end: 125 ret = trace_seq_putc(s, '\n'); 126 if (!ret) 127 return TRACE_TYPE_PARTIAL_LINE; 128 129 return TRACE_TYPE_HANDLED; 130 } 131 132 enum print_line_t 133 print_syscall_exit(struct trace_iterator *iter, int flags) 134 { 135 struct trace_seq *s = &iter->seq; 136 struct trace_entry *ent = iter->ent; 137 struct syscall_trace_exit *trace; 138 int syscall; 139 struct syscall_metadata *entry; 140 int ret; 141 142 trace = (typeof(trace))ent; 143 syscall = trace->nr; 144 entry = syscall_nr_to_meta(syscall); 145 146 if (!entry) { 147 trace_seq_printf(s, "\n"); 148 return TRACE_TYPE_HANDLED; 149 } 150 151 if (entry->exit_id != ent->type) { 152 WARN_ON_ONCE(1); 153 return TRACE_TYPE_UNHANDLED; 154 } 155 156 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 157 trace->ret); 158 if (!ret) 159 return TRACE_TYPE_PARTIAL_LINE; 160 161 return TRACE_TYPE_HANDLED; 162 } 163 164 extern char *__bad_type_size(void); 165 166 #define SYSCALL_FIELD(type, name) \ 167 sizeof(type) != sizeof(trace.name) ? \ 168 __bad_type_size() : \ 169 #type, #name, offsetof(typeof(trace), name), \ 170 sizeof(trace.name), is_signed_type(type) 171 172 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 173 { 174 int i; 175 int nr; 176 int ret; 177 struct syscall_metadata *entry; 178 struct syscall_trace_enter trace; 179 int offset = offsetof(struct syscall_trace_enter, args); 180 181 nr = syscall_name_to_nr(call->data); 182 entry = syscall_nr_to_meta(nr); 183 184 if (!entry) 185 return 0; 186 187 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 188 "\tsigned:%u;\n", 189 SYSCALL_FIELD(int, nr)); 190 if (!ret) 191 return 0; 192 193 for (i = 0; i < entry->nb_args; i++) { 194 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 195 entry->args[i]); 196 if (!ret) 197 return 0; 198 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" 199 "\tsigned:%u;\n", offset, 200 sizeof(unsigned long), 201 is_signed_type(unsigned long)); 202 if (!ret) 203 return 0; 204 offset += sizeof(unsigned long); 205 } 206 207 trace_seq_puts(s, "\nprint fmt: \""); 208 for (i = 0; i < entry->nb_args; i++) { 209 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 210 sizeof(unsigned long), 211 i == entry->nb_args - 1 ? "" : ", "); 212 if (!ret) 213 return 0; 214 } 215 trace_seq_putc(s, '"'); 216 217 for (i = 0; i < entry->nb_args; i++) { 218 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", 219 entry->args[i]); 220 if (!ret) 221 return 0; 222 } 223 224 return trace_seq_putc(s, '\n'); 225 } 226 227 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 228 { 229 int ret; 230 struct syscall_trace_exit trace; 231 232 ret = trace_seq_printf(s, 233 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 234 "\tsigned:%u;\n" 235 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 236 "\tsigned:%u;\n", 237 SYSCALL_FIELD(int, nr), 238 SYSCALL_FIELD(long, ret)); 239 if (!ret) 240 return 0; 241 242 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 243 } 244 245 int syscall_enter_define_fields(struct ftrace_event_call *call) 246 { 247 struct syscall_trace_enter trace; 248 struct syscall_metadata *meta; 249 int ret; 250 int nr; 251 int i; 252 int offset = offsetof(typeof(trace), args); 253 254 nr = syscall_name_to_nr(call->data); 255 meta = syscall_nr_to_meta(nr); 256 257 if (!meta) 258 return 0; 259 260 ret = trace_define_common_fields(call); 261 if (ret) 262 return ret; 263 264 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 265 if (ret) 266 return ret; 267 268 for (i = 0; i < meta->nb_args; i++) { 269 ret = trace_define_field(call, meta->types[i], 270 meta->args[i], offset, 271 sizeof(unsigned long), 0, 272 FILTER_OTHER); 273 offset += sizeof(unsigned long); 274 } 275 276 return ret; 277 } 278 279 int syscall_exit_define_fields(struct ftrace_event_call *call) 280 { 281 struct syscall_trace_exit trace; 282 int ret; 283 284 ret = trace_define_common_fields(call); 285 if (ret) 286 return ret; 287 288 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 289 if (ret) 290 return ret; 291 292 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 293 FILTER_OTHER); 294 295 return ret; 296 } 297 298 void ftrace_syscall_enter(struct pt_regs *regs, long id) 299 { 300 struct syscall_trace_enter *entry; 301 struct syscall_metadata *sys_data; 302 struct ring_buffer_event *event; 303 struct ring_buffer *buffer; 304 int size; 305 int syscall_nr; 306 307 syscall_nr = syscall_get_nr(current, regs); 308 if (syscall_nr < 0) 309 return; 310 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 311 return; 312 313 sys_data = syscall_nr_to_meta(syscall_nr); 314 if (!sys_data) 315 return; 316 317 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 318 319 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 320 size, 0, 0); 321 if (!event) 322 return; 323 324 entry = ring_buffer_event_data(event); 325 entry->nr = syscall_nr; 326 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 327 328 if (!filter_current_check_discard(buffer, sys_data->enter_event, 329 entry, event)) 330 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 331 } 332 333 void ftrace_syscall_exit(struct pt_regs *regs, long ret) 334 { 335 struct syscall_trace_exit *entry; 336 struct syscall_metadata *sys_data; 337 struct ring_buffer_event *event; 338 struct ring_buffer *buffer; 339 int syscall_nr; 340 341 syscall_nr = syscall_get_nr(current, regs); 342 if (syscall_nr < 0) 343 return; 344 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 345 return; 346 347 sys_data = syscall_nr_to_meta(syscall_nr); 348 if (!sys_data) 349 return; 350 351 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 352 sizeof(*entry), 0, 0); 353 if (!event) 354 return; 355 356 entry = ring_buffer_event_data(event); 357 entry->nr = syscall_nr; 358 entry->ret = syscall_get_return_value(current, regs); 359 360 if (!filter_current_check_discard(buffer, sys_data->exit_event, 361 entry, event)) 362 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 363 } 364 365 int reg_event_syscall_enter(struct ftrace_event_call *call) 366 { 367 int ret = 0; 368 int num; 369 char *name; 370 371 name = (char *)call->data; 372 num = syscall_name_to_nr(name); 373 if (num < 0 || num >= NR_syscalls) 374 return -ENOSYS; 375 mutex_lock(&syscall_trace_lock); 376 if (!sys_refcount_enter) 377 ret = register_trace_sys_enter(ftrace_syscall_enter); 378 if (ret) { 379 pr_info("event trace: Could not activate" 380 "syscall entry trace point"); 381 } else { 382 set_bit(num, enabled_enter_syscalls); 383 sys_refcount_enter++; 384 } 385 mutex_unlock(&syscall_trace_lock); 386 return ret; 387 } 388 389 void unreg_event_syscall_enter(struct ftrace_event_call *call) 390 { 391 int num; 392 char *name; 393 394 name = (char *)call->data; 395 num = syscall_name_to_nr(name); 396 if (num < 0 || num >= NR_syscalls) 397 return; 398 mutex_lock(&syscall_trace_lock); 399 sys_refcount_enter--; 400 clear_bit(num, enabled_enter_syscalls); 401 if (!sys_refcount_enter) 402 unregister_trace_sys_enter(ftrace_syscall_enter); 403 mutex_unlock(&syscall_trace_lock); 404 } 405 406 int reg_event_syscall_exit(struct ftrace_event_call *call) 407 { 408 int ret = 0; 409 int num; 410 char *name; 411 412 name = call->data; 413 num = syscall_name_to_nr(name); 414 if (num < 0 || num >= NR_syscalls) 415 return -ENOSYS; 416 mutex_lock(&syscall_trace_lock); 417 if (!sys_refcount_exit) 418 ret = register_trace_sys_exit(ftrace_syscall_exit); 419 if (ret) { 420 pr_info("event trace: Could not activate" 421 "syscall exit trace point"); 422 } else { 423 set_bit(num, enabled_exit_syscalls); 424 sys_refcount_exit++; 425 } 426 mutex_unlock(&syscall_trace_lock); 427 return ret; 428 } 429 430 void unreg_event_syscall_exit(struct ftrace_event_call *call) 431 { 432 int num; 433 char *name; 434 435 name = call->data; 436 num = syscall_name_to_nr(name); 437 if (num < 0 || num >= NR_syscalls) 438 return; 439 mutex_lock(&syscall_trace_lock); 440 sys_refcount_exit--; 441 clear_bit(num, enabled_exit_syscalls); 442 if (!sys_refcount_exit) 443 unregister_trace_sys_exit(ftrace_syscall_exit); 444 mutex_unlock(&syscall_trace_lock); 445 } 446 447 struct trace_event event_syscall_enter = { 448 .trace = print_syscall_enter, 449 }; 450 451 struct trace_event event_syscall_exit = { 452 .trace = print_syscall_exit, 453 }; 454 455 int __init init_ftrace_syscalls(void) 456 { 457 struct syscall_metadata *meta; 458 unsigned long addr; 459 int i; 460 461 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 462 NR_syscalls, GFP_KERNEL); 463 if (!syscalls_metadata) { 464 WARN_ON(1); 465 return -ENOMEM; 466 } 467 468 for (i = 0; i < NR_syscalls; i++) { 469 addr = arch_syscall_addr(i); 470 meta = find_syscall_meta(addr); 471 syscalls_metadata[i] = meta; 472 } 473 474 return 0; 475 } 476 core_initcall(init_ftrace_syscalls); 477 478 #ifdef CONFIG_EVENT_PROFILE 479 480 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 481 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 482 static int sys_prof_refcount_enter; 483 static int sys_prof_refcount_exit; 484 485 static void prof_syscall_enter(struct pt_regs *regs, long id) 486 { 487 struct syscall_metadata *sys_data; 488 struct syscall_trace_enter *rec; 489 unsigned long flags; 490 char *trace_buf; 491 char *raw_data; 492 int syscall_nr; 493 int rctx; 494 int size; 495 int cpu; 496 497 syscall_nr = syscall_get_nr(current, regs); 498 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 499 return; 500 501 sys_data = syscall_nr_to_meta(syscall_nr); 502 if (!sys_data) 503 return; 504 505 /* get the size after alignment with the u32 buffer size field */ 506 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 507 size = ALIGN(size + sizeof(u32), sizeof(u64)); 508 size -= sizeof(u32); 509 510 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 511 "profile buffer not large enough")) 512 return; 513 514 /* Protect the per cpu buffer, begin the rcu read side */ 515 local_irq_save(flags); 516 517 rctx = perf_swevent_get_recursion_context(); 518 if (rctx < 0) 519 goto end_recursion; 520 521 cpu = smp_processor_id(); 522 523 trace_buf = rcu_dereference(perf_trace_buf); 524 525 if (!trace_buf) 526 goto end; 527 528 raw_data = per_cpu_ptr(trace_buf, cpu); 529 530 /* zero the dead bytes from align to not leak stack to user */ 531 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 532 533 rec = (struct syscall_trace_enter *) raw_data; 534 tracing_generic_entry_update(&rec->ent, 0, 0); 535 rec->ent.type = sys_data->enter_id; 536 rec->nr = syscall_nr; 537 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 538 (unsigned long *)&rec->args); 539 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 540 541 end: 542 perf_swevent_put_recursion_context(rctx); 543 end_recursion: 544 local_irq_restore(flags); 545 } 546 547 int reg_prof_syscall_enter(char *name) 548 { 549 int ret = 0; 550 int num; 551 552 num = syscall_name_to_nr(name); 553 if (num < 0 || num >= NR_syscalls) 554 return -ENOSYS; 555 556 mutex_lock(&syscall_trace_lock); 557 if (!sys_prof_refcount_enter) 558 ret = register_trace_sys_enter(prof_syscall_enter); 559 if (ret) { 560 pr_info("event trace: Could not activate" 561 "syscall entry trace point"); 562 } else { 563 set_bit(num, enabled_prof_enter_syscalls); 564 sys_prof_refcount_enter++; 565 } 566 mutex_unlock(&syscall_trace_lock); 567 return ret; 568 } 569 570 void unreg_prof_syscall_enter(char *name) 571 { 572 int num; 573 574 num = syscall_name_to_nr(name); 575 if (num < 0 || num >= NR_syscalls) 576 return; 577 578 mutex_lock(&syscall_trace_lock); 579 sys_prof_refcount_enter--; 580 clear_bit(num, enabled_prof_enter_syscalls); 581 if (!sys_prof_refcount_enter) 582 unregister_trace_sys_enter(prof_syscall_enter); 583 mutex_unlock(&syscall_trace_lock); 584 } 585 586 static void prof_syscall_exit(struct pt_regs *regs, long ret) 587 { 588 struct syscall_metadata *sys_data; 589 struct syscall_trace_exit *rec; 590 unsigned long flags; 591 int syscall_nr; 592 char *trace_buf; 593 char *raw_data; 594 int rctx; 595 int size; 596 int cpu; 597 598 syscall_nr = syscall_get_nr(current, regs); 599 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 600 return; 601 602 sys_data = syscall_nr_to_meta(syscall_nr); 603 if (!sys_data) 604 return; 605 606 /* We can probably do that at build time */ 607 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 608 size -= sizeof(u32); 609 610 /* 611 * Impossible, but be paranoid with the future 612 * How to put this check outside runtime? 613 */ 614 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 615 "exit event has grown above profile buffer size")) 616 return; 617 618 /* Protect the per cpu buffer, begin the rcu read side */ 619 local_irq_save(flags); 620 621 rctx = perf_swevent_get_recursion_context(); 622 if (rctx < 0) 623 goto end_recursion; 624 625 cpu = smp_processor_id(); 626 627 trace_buf = rcu_dereference(perf_trace_buf); 628 629 if (!trace_buf) 630 goto end; 631 632 raw_data = per_cpu_ptr(trace_buf, cpu); 633 634 /* zero the dead bytes from align to not leak stack to user */ 635 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 636 637 rec = (struct syscall_trace_exit *)raw_data; 638 639 tracing_generic_entry_update(&rec->ent, 0, 0); 640 rec->ent.type = sys_data->exit_id; 641 rec->nr = syscall_nr; 642 rec->ret = syscall_get_return_value(current, regs); 643 644 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 645 646 end: 647 perf_swevent_put_recursion_context(rctx); 648 end_recursion: 649 local_irq_restore(flags); 650 } 651 652 int reg_prof_syscall_exit(char *name) 653 { 654 int ret = 0; 655 int num; 656 657 num = syscall_name_to_nr(name); 658 if (num < 0 || num >= NR_syscalls) 659 return -ENOSYS; 660 661 mutex_lock(&syscall_trace_lock); 662 if (!sys_prof_refcount_exit) 663 ret = register_trace_sys_exit(prof_syscall_exit); 664 if (ret) { 665 pr_info("event trace: Could not activate" 666 "syscall entry trace point"); 667 } else { 668 set_bit(num, enabled_prof_exit_syscalls); 669 sys_prof_refcount_exit++; 670 } 671 mutex_unlock(&syscall_trace_lock); 672 return ret; 673 } 674 675 void unreg_prof_syscall_exit(char *name) 676 { 677 int num; 678 679 num = syscall_name_to_nr(name); 680 if (num < 0 || num >= NR_syscalls) 681 return; 682 683 mutex_lock(&syscall_trace_lock); 684 sys_prof_refcount_exit--; 685 clear_bit(num, enabled_prof_exit_syscalls); 686 if (!sys_prof_refcount_exit) 687 unregister_trace_sys_exit(prof_syscall_exit); 688 mutex_unlock(&syscall_trace_lock); 689 } 690 691 #endif 692 693 694