1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2021, Microsoft Corporation. 4 * 5 * Authors: 6 * Beau Belgrave <[email protected]> 7 */ 8 9 #include <linux/bitmap.h> 10 #include <linux/cdev.h> 11 #include <linux/hashtable.h> 12 #include <linux/list.h> 13 #include <linux/io.h> 14 #include <linux/uio.h> 15 #include <linux/ioctl.h> 16 #include <linux/jhash.h> 17 #include <linux/refcount.h> 18 #include <linux/trace_events.h> 19 #include <linux/tracefs.h> 20 #include <linux/types.h> 21 #include <linux/uaccess.h> 22 #include <linux/highmem.h> 23 #include <linux/init.h> 24 #include <linux/user_events.h> 25 #include "trace_dynevent.h" 26 #include "trace_output.h" 27 #include "trace.h" 28 29 #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1) 30 31 #define FIELD_DEPTH_TYPE 0 32 #define FIELD_DEPTH_NAME 1 33 #define FIELD_DEPTH_SIZE 2 34 35 /* Limit how long of an event name plus args within the subsystem. */ 36 #define MAX_EVENT_DESC 512 37 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name) 38 #define MAX_FIELD_ARRAY_SIZE 1024 39 40 /* 41 * Internal bits (kernel side only) to keep track of connected probes: 42 * These are used when status is requested in text form about an event. These 43 * bits are compared against an internal byte on the event to determine which 44 * probes to print out to the user. 45 * 46 * These do not reflect the mapped bytes between the user and kernel space. 47 */ 48 #define EVENT_STATUS_FTRACE BIT(0) 49 #define EVENT_STATUS_PERF BIT(1) 50 #define EVENT_STATUS_OTHER BIT(7) 51 52 /* 53 * Stores the system name, tables, and locks for a group of events. This 54 * allows isolation for events by various means. 55 */ 56 struct user_event_group { 57 char *system_name; 58 struct hlist_node node; 59 struct mutex reg_mutex; 60 DECLARE_HASHTABLE(register_table, 8); 61 }; 62 63 /* Group for init_user_ns mapping, top-most group */ 64 static struct user_event_group *init_group; 65 66 /* Max allowed events for the whole system */ 67 static unsigned int max_user_events = 32768; 68 69 /* Current number of events on the whole system */ 70 static unsigned int current_user_events; 71 72 /* 73 * Stores per-event properties, as users register events 74 * within a file a user_event might be created if it does not 75 * already exist. These are globally used and their lifetime 76 * is tied to the refcnt member. These cannot go away until the 77 * refcnt reaches one. 78 */ 79 struct user_event { 80 struct user_event_group *group; 81 struct tracepoint tracepoint; 82 struct trace_event_call call; 83 struct trace_event_class class; 84 struct dyn_event devent; 85 struct hlist_node node; 86 struct list_head fields; 87 struct list_head validators; 88 refcount_t refcnt; 89 int min_size; 90 char status; 91 }; 92 93 /* 94 * Stores per-mm/event properties that enable an address to be 95 * updated properly for each task. As tasks are forked, we use 96 * these to track enablement sites that are tied to an event. 97 */ 98 struct user_event_enabler { 99 struct list_head link; 100 struct user_event *event; 101 unsigned long addr; 102 103 /* Track enable bit, flags, etc. Aligned for bitops. */ 104 unsigned int values; 105 }; 106 107 /* Bits 0-5 are for the bit to update upon enable/disable (0-63 allowed) */ 108 #define ENABLE_VAL_BIT_MASK 0x3F 109 110 /* Bit 6 is for faulting status of enablement */ 111 #define ENABLE_VAL_FAULTING_BIT 6 112 113 /* Bit 7 is for freeing status of enablement */ 114 #define ENABLE_VAL_FREEING_BIT 7 115 116 /* Only duplicate the bit value */ 117 #define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK 118 119 #define ENABLE_BITOPS(e) ((unsigned long *)&(e)->values) 120 121 /* Used for asynchronous faulting in of pages */ 122 struct user_event_enabler_fault { 123 struct work_struct work; 124 struct user_event_mm *mm; 125 struct user_event_enabler *enabler; 126 }; 127 128 static struct kmem_cache *fault_cache; 129 130 /* Global list of memory descriptors using user_events */ 131 static LIST_HEAD(user_event_mms); 132 static DEFINE_SPINLOCK(user_event_mms_lock); 133 134 /* 135 * Stores per-file events references, as users register events 136 * within a file this structure is modified and freed via RCU. 137 * The lifetime of this struct is tied to the lifetime of the file. 138 * These are not shared and only accessible by the file that created it. 139 */ 140 struct user_event_refs { 141 struct rcu_head rcu; 142 int count; 143 struct user_event *events[]; 144 }; 145 146 struct user_event_file_info { 147 struct user_event_group *group; 148 struct user_event_refs *refs; 149 }; 150 151 #define VALIDATOR_ENSURE_NULL (1 << 0) 152 #define VALIDATOR_REL (1 << 1) 153 154 struct user_event_validator { 155 struct list_head link; 156 int offset; 157 int flags; 158 }; 159 160 typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, 161 void *tpdata, bool *faulted); 162 163 static int user_event_parse(struct user_event_group *group, char *name, 164 char *args, char *flags, 165 struct user_event **newuser); 166 167 static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm); 168 static struct user_event_mm *user_event_mm_get_all(struct user_event *user); 169 static void user_event_mm_put(struct user_event_mm *mm); 170 171 static u32 user_event_key(char *name) 172 { 173 return jhash(name, strlen(name), 0); 174 } 175 176 static void user_event_group_destroy(struct user_event_group *group) 177 { 178 kfree(group->system_name); 179 kfree(group); 180 } 181 182 static char *user_event_group_system_name(struct user_namespace *user_ns) 183 { 184 char *system_name; 185 int len = sizeof(USER_EVENTS_SYSTEM) + 1; 186 187 if (user_ns != &init_user_ns) { 188 /* 189 * Unexpected at this point: 190 * We only currently support init_user_ns. 191 * When we enable more, this will trigger a failure so log. 192 */ 193 pr_warn("user_events: Namespace other than init_user_ns!\n"); 194 return NULL; 195 } 196 197 system_name = kmalloc(len, GFP_KERNEL); 198 199 if (!system_name) 200 return NULL; 201 202 snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM); 203 204 return system_name; 205 } 206 207 static inline struct user_event_group 208 *user_event_group_from_user_ns(struct user_namespace *user_ns) 209 { 210 if (user_ns == &init_user_ns) 211 return init_group; 212 213 return NULL; 214 } 215 216 static struct user_event_group *current_user_event_group(void) 217 { 218 struct user_namespace *user_ns = current_user_ns(); 219 struct user_event_group *group = NULL; 220 221 while (user_ns) { 222 group = user_event_group_from_user_ns(user_ns); 223 224 if (group) 225 break; 226 227 user_ns = user_ns->parent; 228 } 229 230 return group; 231 } 232 233 static struct user_event_group 234 *user_event_group_create(struct user_namespace *user_ns) 235 { 236 struct user_event_group *group; 237 238 group = kzalloc(sizeof(*group), GFP_KERNEL); 239 240 if (!group) 241 return NULL; 242 243 group->system_name = user_event_group_system_name(user_ns); 244 245 if (!group->system_name) 246 goto error; 247 248 mutex_init(&group->reg_mutex); 249 hash_init(group->register_table); 250 251 return group; 252 error: 253 if (group) 254 user_event_group_destroy(group); 255 256 return NULL; 257 }; 258 259 static void user_event_enabler_destroy(struct user_event_enabler *enabler) 260 { 261 list_del_rcu(&enabler->link); 262 263 /* No longer tracking the event via the enabler */ 264 refcount_dec(&enabler->event->refcnt); 265 266 kfree(enabler); 267 } 268 269 static int user_event_mm_fault_in(struct user_event_mm *mm, unsigned long uaddr) 270 { 271 bool unlocked; 272 int ret; 273 274 mmap_read_lock(mm->mm); 275 276 /* Ensure MM has tasks, cannot use after exit_mm() */ 277 if (refcount_read(&mm->tasks) == 0) { 278 ret = -ENOENT; 279 goto out; 280 } 281 282 ret = fixup_user_fault(mm->mm, uaddr, FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE, 283 &unlocked); 284 out: 285 mmap_read_unlock(mm->mm); 286 287 return ret; 288 } 289 290 static int user_event_enabler_write(struct user_event_mm *mm, 291 struct user_event_enabler *enabler, 292 bool fixup_fault); 293 294 static void user_event_enabler_fault_fixup(struct work_struct *work) 295 { 296 struct user_event_enabler_fault *fault = container_of( 297 work, struct user_event_enabler_fault, work); 298 struct user_event_enabler *enabler = fault->enabler; 299 struct user_event_mm *mm = fault->mm; 300 unsigned long uaddr = enabler->addr; 301 int ret; 302 303 ret = user_event_mm_fault_in(mm, uaddr); 304 305 if (ret && ret != -ENOENT) { 306 struct user_event *user = enabler->event; 307 308 pr_warn("user_events: Fault for mm: 0x%pK @ 0x%llx event: %s\n", 309 mm->mm, (unsigned long long)uaddr, EVENT_NAME(user)); 310 } 311 312 /* Prevent state changes from racing */ 313 mutex_lock(&event_mutex); 314 315 /* User asked for enabler to be removed during fault */ 316 if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) { 317 user_event_enabler_destroy(enabler); 318 goto out; 319 } 320 321 /* 322 * If we managed to get the page, re-issue the write. We do not 323 * want to get into a possible infinite loop, which is why we only 324 * attempt again directly if the page came in. If we couldn't get 325 * the page here, then we will try again the next time the event is 326 * enabled/disabled. 327 */ 328 clear_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); 329 330 if (!ret) { 331 mmap_read_lock(mm->mm); 332 user_event_enabler_write(mm, enabler, true); 333 mmap_read_unlock(mm->mm); 334 } 335 out: 336 mutex_unlock(&event_mutex); 337 338 /* In all cases we no longer need the mm or fault */ 339 user_event_mm_put(mm); 340 kmem_cache_free(fault_cache, fault); 341 } 342 343 static bool user_event_enabler_queue_fault(struct user_event_mm *mm, 344 struct user_event_enabler *enabler) 345 { 346 struct user_event_enabler_fault *fault; 347 348 fault = kmem_cache_zalloc(fault_cache, GFP_NOWAIT | __GFP_NOWARN); 349 350 if (!fault) 351 return false; 352 353 INIT_WORK(&fault->work, user_event_enabler_fault_fixup); 354 fault->mm = user_event_mm_get(mm); 355 fault->enabler = enabler; 356 357 /* Don't try to queue in again while we have a pending fault */ 358 set_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); 359 360 if (!schedule_work(&fault->work)) { 361 /* Allow another attempt later */ 362 clear_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); 363 364 user_event_mm_put(mm); 365 kmem_cache_free(fault_cache, fault); 366 367 return false; 368 } 369 370 return true; 371 } 372 373 static int user_event_enabler_write(struct user_event_mm *mm, 374 struct user_event_enabler *enabler, 375 bool fixup_fault) 376 { 377 unsigned long uaddr = enabler->addr; 378 unsigned long *ptr; 379 struct page *page; 380 void *kaddr; 381 int ret; 382 383 lockdep_assert_held(&event_mutex); 384 mmap_assert_locked(mm->mm); 385 386 /* Ensure MM has tasks, cannot use after exit_mm() */ 387 if (refcount_read(&mm->tasks) == 0) 388 return -ENOENT; 389 390 if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)) || 391 test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)))) 392 return -EBUSY; 393 394 ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT, 395 &page, NULL, NULL); 396 397 if (unlikely(ret <= 0)) { 398 if (!fixup_fault) 399 return -EFAULT; 400 401 if (!user_event_enabler_queue_fault(mm, enabler)) 402 pr_warn("user_events: Unable to queue fault handler\n"); 403 404 return -EFAULT; 405 } 406 407 kaddr = kmap_local_page(page); 408 ptr = kaddr + (uaddr & ~PAGE_MASK); 409 410 /* Update bit atomically, user tracers must be atomic as well */ 411 if (enabler->event && enabler->event->status) 412 set_bit(enabler->values & ENABLE_VAL_BIT_MASK, ptr); 413 else 414 clear_bit(enabler->values & ENABLE_VAL_BIT_MASK, ptr); 415 416 kunmap_local(kaddr); 417 unpin_user_pages_dirty_lock(&page, 1, true); 418 419 return 0; 420 } 421 422 static bool user_event_enabler_exists(struct user_event_mm *mm, 423 unsigned long uaddr, unsigned char bit) 424 { 425 struct user_event_enabler *enabler; 426 struct user_event_enabler *next; 427 428 list_for_each_entry_safe(enabler, next, &mm->enablers, link) { 429 if (enabler->addr == uaddr && 430 (enabler->values & ENABLE_VAL_BIT_MASK) == bit) 431 return true; 432 } 433 434 return false; 435 } 436 437 static void user_event_enabler_update(struct user_event *user) 438 { 439 struct user_event_enabler *enabler; 440 struct user_event_mm *mm = user_event_mm_get_all(user); 441 struct user_event_mm *next; 442 443 while (mm) { 444 next = mm->next; 445 mmap_read_lock(mm->mm); 446 rcu_read_lock(); 447 448 list_for_each_entry_rcu(enabler, &mm->enablers, link) 449 if (enabler->event == user) 450 user_event_enabler_write(mm, enabler, true); 451 452 rcu_read_unlock(); 453 mmap_read_unlock(mm->mm); 454 user_event_mm_put(mm); 455 mm = next; 456 } 457 } 458 459 static bool user_event_enabler_dup(struct user_event_enabler *orig, 460 struct user_event_mm *mm) 461 { 462 struct user_event_enabler *enabler; 463 464 /* Skip pending frees */ 465 if (unlikely(test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(orig)))) 466 return true; 467 468 enabler = kzalloc(sizeof(*enabler), GFP_NOWAIT | __GFP_ACCOUNT); 469 470 if (!enabler) 471 return false; 472 473 enabler->event = orig->event; 474 enabler->addr = orig->addr; 475 476 /* Only dup part of value (ignore future flags, etc) */ 477 enabler->values = orig->values & ENABLE_VAL_DUP_MASK; 478 479 refcount_inc(&enabler->event->refcnt); 480 list_add_rcu(&enabler->link, &mm->enablers); 481 482 return true; 483 } 484 485 static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm) 486 { 487 refcount_inc(&mm->refcnt); 488 489 return mm; 490 } 491 492 static struct user_event_mm *user_event_mm_get_all(struct user_event *user) 493 { 494 struct user_event_mm *found = NULL; 495 struct user_event_enabler *enabler; 496 struct user_event_mm *mm; 497 498 /* 499 * We do not want to block fork/exec while enablements are being 500 * updated, so we use RCU to walk the current tasks that have used 501 * user_events ABI for 1 or more events. Each enabler found in each 502 * task that matches the event being updated has a write to reflect 503 * the kernel state back into the process. Waits/faults must not occur 504 * during this. So we scan the list under RCU for all the mm that have 505 * the event within it. This is needed because mm_read_lock() can wait. 506 * Each user mm returned has a ref inc to handle remove RCU races. 507 */ 508 rcu_read_lock(); 509 510 list_for_each_entry_rcu(mm, &user_event_mms, link) 511 list_for_each_entry_rcu(enabler, &mm->enablers, link) 512 if (enabler->event == user) { 513 mm->next = found; 514 found = user_event_mm_get(mm); 515 break; 516 } 517 518 rcu_read_unlock(); 519 520 return found; 521 } 522 523 static struct user_event_mm *user_event_mm_create(struct task_struct *t) 524 { 525 struct user_event_mm *user_mm; 526 unsigned long flags; 527 528 user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT); 529 530 if (!user_mm) 531 return NULL; 532 533 user_mm->mm = t->mm; 534 INIT_LIST_HEAD(&user_mm->enablers); 535 refcount_set(&user_mm->refcnt, 1); 536 refcount_set(&user_mm->tasks, 1); 537 538 spin_lock_irqsave(&user_event_mms_lock, flags); 539 list_add_rcu(&user_mm->link, &user_event_mms); 540 spin_unlock_irqrestore(&user_event_mms_lock, flags); 541 542 t->user_event_mm = user_mm; 543 544 /* 545 * The lifetime of the memory descriptor can slightly outlast 546 * the task lifetime if a ref to the user_event_mm is taken 547 * between list_del_rcu() and call_rcu(). Therefore we need 548 * to take a reference to it to ensure it can live this long 549 * under this corner case. This can also occur in clones that 550 * outlast the parent. 551 */ 552 mmgrab(user_mm->mm); 553 554 return user_mm; 555 } 556 557 static struct user_event_mm *current_user_event_mm(void) 558 { 559 struct user_event_mm *user_mm = current->user_event_mm; 560 561 if (user_mm) 562 goto inc; 563 564 user_mm = user_event_mm_create(current); 565 566 if (!user_mm) 567 goto error; 568 inc: 569 refcount_inc(&user_mm->refcnt); 570 error: 571 return user_mm; 572 } 573 574 static void user_event_mm_destroy(struct user_event_mm *mm) 575 { 576 struct user_event_enabler *enabler, *next; 577 578 list_for_each_entry_safe(enabler, next, &mm->enablers, link) 579 user_event_enabler_destroy(enabler); 580 581 mmdrop(mm->mm); 582 kfree(mm); 583 } 584 585 static void user_event_mm_put(struct user_event_mm *mm) 586 { 587 if (mm && refcount_dec_and_test(&mm->refcnt)) 588 user_event_mm_destroy(mm); 589 } 590 591 static void delayed_user_event_mm_put(struct work_struct *work) 592 { 593 struct user_event_mm *mm; 594 595 mm = container_of(to_rcu_work(work), struct user_event_mm, put_rwork); 596 user_event_mm_put(mm); 597 } 598 599 void user_event_mm_remove(struct task_struct *t) 600 { 601 struct user_event_mm *mm; 602 unsigned long flags; 603 604 might_sleep(); 605 606 mm = t->user_event_mm; 607 t->user_event_mm = NULL; 608 609 /* Clone will increment the tasks, only remove if last clone */ 610 if (!refcount_dec_and_test(&mm->tasks)) 611 return; 612 613 /* Remove the mm from the list, so it can no longer be enabled */ 614 spin_lock_irqsave(&user_event_mms_lock, flags); 615 list_del_rcu(&mm->link); 616 spin_unlock_irqrestore(&user_event_mms_lock, flags); 617 618 /* 619 * We need to wait for currently occurring writes to stop within 620 * the mm. This is required since exit_mm() snaps the current rss 621 * stats and clears them. On the final mmdrop(), check_mm() will 622 * report a bug if these increment. 623 * 624 * All writes/pins are done under mmap_read lock, take the write 625 * lock to ensure in-progress faults have completed. Faults that 626 * are pending but yet to run will check the task count and skip 627 * the fault since the mm is going away. 628 */ 629 mmap_write_lock(mm->mm); 630 mmap_write_unlock(mm->mm); 631 632 /* 633 * Put for mm must be done after RCU delay to handle new refs in 634 * between the list_del_rcu() and now. This ensures any get refs 635 * during rcu_read_lock() are accounted for during list removal. 636 * 637 * CPU A | CPU B 638 * --------------------------------------------------------------- 639 * user_event_mm_remove() | rcu_read_lock(); 640 * list_del_rcu() | list_for_each_entry_rcu(); 641 * call_rcu() | refcount_inc(); 642 * . | rcu_read_unlock(); 643 * schedule_work() | . 644 * user_event_mm_put() | . 645 * 646 * mmdrop() cannot be called in the softirq context of call_rcu() 647 * so we use a work queue after call_rcu() to run within. 648 */ 649 INIT_RCU_WORK(&mm->put_rwork, delayed_user_event_mm_put); 650 queue_rcu_work(system_wq, &mm->put_rwork); 651 } 652 653 void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm) 654 { 655 struct user_event_mm *mm = user_event_mm_create(t); 656 struct user_event_enabler *enabler; 657 658 if (!mm) 659 return; 660 661 rcu_read_lock(); 662 663 list_for_each_entry_rcu(enabler, &old_mm->enablers, link) 664 if (!user_event_enabler_dup(enabler, mm)) 665 goto error; 666 667 rcu_read_unlock(); 668 669 return; 670 error: 671 rcu_read_unlock(); 672 user_event_mm_remove(t); 673 } 674 675 static bool current_user_event_enabler_exists(unsigned long uaddr, 676 unsigned char bit) 677 { 678 struct user_event_mm *user_mm = current_user_event_mm(); 679 bool exists; 680 681 if (!user_mm) 682 return false; 683 684 exists = user_event_enabler_exists(user_mm, uaddr, bit); 685 686 user_event_mm_put(user_mm); 687 688 return exists; 689 } 690 691 static struct user_event_enabler 692 *user_event_enabler_create(struct user_reg *reg, struct user_event *user, 693 int *write_result) 694 { 695 struct user_event_enabler *enabler; 696 struct user_event_mm *user_mm; 697 unsigned long uaddr = (unsigned long)reg->enable_addr; 698 699 user_mm = current_user_event_mm(); 700 701 if (!user_mm) 702 return NULL; 703 704 enabler = kzalloc(sizeof(*enabler), GFP_KERNEL_ACCOUNT); 705 706 if (!enabler) 707 goto out; 708 709 enabler->event = user; 710 enabler->addr = uaddr; 711 enabler->values = reg->enable_bit; 712 retry: 713 /* Prevents state changes from racing with new enablers */ 714 mutex_lock(&event_mutex); 715 716 /* Attempt to reflect the current state within the process */ 717 mmap_read_lock(user_mm->mm); 718 *write_result = user_event_enabler_write(user_mm, enabler, false); 719 mmap_read_unlock(user_mm->mm); 720 721 /* 722 * If the write works, then we will track the enabler. A ref to the 723 * underlying user_event is held by the enabler to prevent it going 724 * away while the enabler is still in use by a process. The ref is 725 * removed when the enabler is destroyed. This means a event cannot 726 * be forcefully deleted from the system until all tasks using it 727 * exit or run exec(), which includes forks and clones. 728 */ 729 if (!*write_result) { 730 refcount_inc(&enabler->event->refcnt); 731 list_add_rcu(&enabler->link, &user_mm->enablers); 732 } 733 734 mutex_unlock(&event_mutex); 735 736 if (*write_result) { 737 /* Attempt to fault-in and retry if it worked */ 738 if (!user_event_mm_fault_in(user_mm, uaddr)) 739 goto retry; 740 741 kfree(enabler); 742 enabler = NULL; 743 } 744 out: 745 user_event_mm_put(user_mm); 746 747 return enabler; 748 } 749 750 static __always_inline __must_check 751 bool user_event_last_ref(struct user_event *user) 752 { 753 return refcount_read(&user->refcnt) == 1; 754 } 755 756 static __always_inline __must_check 757 size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i) 758 { 759 size_t ret; 760 761 pagefault_disable(); 762 763 ret = copy_from_iter_nocache(addr, bytes, i); 764 765 pagefault_enable(); 766 767 return ret; 768 } 769 770 static struct list_head *user_event_get_fields(struct trace_event_call *call) 771 { 772 struct user_event *user = (struct user_event *)call->data; 773 774 return &user->fields; 775 } 776 777 /* 778 * Parses a register command for user_events 779 * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]] 780 * 781 * Example event named 'test' with a 20 char 'msg' field with an unsigned int 782 * 'id' field after: 783 * test char[20] msg;unsigned int id 784 * 785 * NOTE: Offsets are from the user data perspective, they are not from the 786 * trace_entry/buffer perspective. We automatically add the common properties 787 * sizes to the offset for the user. 788 * 789 * Upon success user_event has its ref count increased by 1. 790 */ 791 static int user_event_parse_cmd(struct user_event_group *group, 792 char *raw_command, struct user_event **newuser) 793 { 794 char *name = raw_command; 795 char *args = strpbrk(name, " "); 796 char *flags; 797 798 if (args) 799 *args++ = '\0'; 800 801 flags = strpbrk(name, ":"); 802 803 if (flags) 804 *flags++ = '\0'; 805 806 return user_event_parse(group, name, args, flags, newuser); 807 } 808 809 static int user_field_array_size(const char *type) 810 { 811 const char *start = strchr(type, '['); 812 char val[8]; 813 char *bracket; 814 int size = 0; 815 816 if (start == NULL) 817 return -EINVAL; 818 819 if (strscpy(val, start + 1, sizeof(val)) <= 0) 820 return -EINVAL; 821 822 bracket = strchr(val, ']'); 823 824 if (!bracket) 825 return -EINVAL; 826 827 *bracket = '\0'; 828 829 if (kstrtouint(val, 0, &size)) 830 return -EINVAL; 831 832 if (size > MAX_FIELD_ARRAY_SIZE) 833 return -EINVAL; 834 835 return size; 836 } 837 838 static int user_field_size(const char *type) 839 { 840 /* long is not allowed from a user, since it's ambigious in size */ 841 if (strcmp(type, "s64") == 0) 842 return sizeof(s64); 843 if (strcmp(type, "u64") == 0) 844 return sizeof(u64); 845 if (strcmp(type, "s32") == 0) 846 return sizeof(s32); 847 if (strcmp(type, "u32") == 0) 848 return sizeof(u32); 849 if (strcmp(type, "int") == 0) 850 return sizeof(int); 851 if (strcmp(type, "unsigned int") == 0) 852 return sizeof(unsigned int); 853 if (strcmp(type, "s16") == 0) 854 return sizeof(s16); 855 if (strcmp(type, "u16") == 0) 856 return sizeof(u16); 857 if (strcmp(type, "short") == 0) 858 return sizeof(short); 859 if (strcmp(type, "unsigned short") == 0) 860 return sizeof(unsigned short); 861 if (strcmp(type, "s8") == 0) 862 return sizeof(s8); 863 if (strcmp(type, "u8") == 0) 864 return sizeof(u8); 865 if (strcmp(type, "char") == 0) 866 return sizeof(char); 867 if (strcmp(type, "unsigned char") == 0) 868 return sizeof(unsigned char); 869 if (str_has_prefix(type, "char[")) 870 return user_field_array_size(type); 871 if (str_has_prefix(type, "unsigned char[")) 872 return user_field_array_size(type); 873 if (str_has_prefix(type, "__data_loc ")) 874 return sizeof(u32); 875 if (str_has_prefix(type, "__rel_loc ")) 876 return sizeof(u32); 877 878 /* Uknown basic type, error */ 879 return -EINVAL; 880 } 881 882 static void user_event_destroy_validators(struct user_event *user) 883 { 884 struct user_event_validator *validator, *next; 885 struct list_head *head = &user->validators; 886 887 list_for_each_entry_safe(validator, next, head, link) { 888 list_del(&validator->link); 889 kfree(validator); 890 } 891 } 892 893 static void user_event_destroy_fields(struct user_event *user) 894 { 895 struct ftrace_event_field *field, *next; 896 struct list_head *head = &user->fields; 897 898 list_for_each_entry_safe(field, next, head, link) { 899 list_del(&field->link); 900 kfree(field); 901 } 902 } 903 904 static int user_event_add_field(struct user_event *user, const char *type, 905 const char *name, int offset, int size, 906 int is_signed, int filter_type) 907 { 908 struct user_event_validator *validator; 909 struct ftrace_event_field *field; 910 int validator_flags = 0; 911 912 field = kmalloc(sizeof(*field), GFP_KERNEL_ACCOUNT); 913 914 if (!field) 915 return -ENOMEM; 916 917 if (str_has_prefix(type, "__data_loc ")) 918 goto add_validator; 919 920 if (str_has_prefix(type, "__rel_loc ")) { 921 validator_flags |= VALIDATOR_REL; 922 goto add_validator; 923 } 924 925 goto add_field; 926 927 add_validator: 928 if (strstr(type, "char") != NULL) 929 validator_flags |= VALIDATOR_ENSURE_NULL; 930 931 validator = kmalloc(sizeof(*validator), GFP_KERNEL_ACCOUNT); 932 933 if (!validator) { 934 kfree(field); 935 return -ENOMEM; 936 } 937 938 validator->flags = validator_flags; 939 validator->offset = offset; 940 941 /* Want sequential access when validating */ 942 list_add_tail(&validator->link, &user->validators); 943 944 add_field: 945 field->type = type; 946 field->name = name; 947 field->offset = offset; 948 field->size = size; 949 field->is_signed = is_signed; 950 field->filter_type = filter_type; 951 952 if (filter_type == FILTER_OTHER) 953 field->filter_type = filter_assign_type(type); 954 955 list_add(&field->link, &user->fields); 956 957 /* 958 * Min size from user writes that are required, this does not include 959 * the size of trace_entry (common fields). 960 */ 961 user->min_size = (offset + size) - sizeof(struct trace_entry); 962 963 return 0; 964 } 965 966 /* 967 * Parses the values of a field within the description 968 * Format: type name [size] 969 */ 970 static int user_event_parse_field(char *field, struct user_event *user, 971 u32 *offset) 972 { 973 char *part, *type, *name; 974 u32 depth = 0, saved_offset = *offset; 975 int len, size = -EINVAL; 976 bool is_struct = false; 977 978 field = skip_spaces(field); 979 980 if (*field == '\0') 981 return 0; 982 983 /* Handle types that have a space within */ 984 len = str_has_prefix(field, "unsigned "); 985 if (len) 986 goto skip_next; 987 988 len = str_has_prefix(field, "struct "); 989 if (len) { 990 is_struct = true; 991 goto skip_next; 992 } 993 994 len = str_has_prefix(field, "__data_loc unsigned "); 995 if (len) 996 goto skip_next; 997 998 len = str_has_prefix(field, "__data_loc "); 999 if (len) 1000 goto skip_next; 1001 1002 len = str_has_prefix(field, "__rel_loc unsigned "); 1003 if (len) 1004 goto skip_next; 1005 1006 len = str_has_prefix(field, "__rel_loc "); 1007 if (len) 1008 goto skip_next; 1009 1010 goto parse; 1011 skip_next: 1012 type = field; 1013 field = strpbrk(field + len, " "); 1014 1015 if (field == NULL) 1016 return -EINVAL; 1017 1018 *field++ = '\0'; 1019 depth++; 1020 parse: 1021 name = NULL; 1022 1023 while ((part = strsep(&field, " ")) != NULL) { 1024 switch (depth++) { 1025 case FIELD_DEPTH_TYPE: 1026 type = part; 1027 break; 1028 case FIELD_DEPTH_NAME: 1029 name = part; 1030 break; 1031 case FIELD_DEPTH_SIZE: 1032 if (!is_struct) 1033 return -EINVAL; 1034 1035 if (kstrtou32(part, 10, &size)) 1036 return -EINVAL; 1037 break; 1038 default: 1039 return -EINVAL; 1040 } 1041 } 1042 1043 if (depth < FIELD_DEPTH_SIZE || !name) 1044 return -EINVAL; 1045 1046 if (depth == FIELD_DEPTH_SIZE) 1047 size = user_field_size(type); 1048 1049 if (size == 0) 1050 return -EINVAL; 1051 1052 if (size < 0) 1053 return size; 1054 1055 *offset = saved_offset + size; 1056 1057 return user_event_add_field(user, type, name, saved_offset, size, 1058 type[0] != 'u', FILTER_OTHER); 1059 } 1060 1061 static int user_event_parse_fields(struct user_event *user, char *args) 1062 { 1063 char *field; 1064 u32 offset = sizeof(struct trace_entry); 1065 int ret = -EINVAL; 1066 1067 if (args == NULL) 1068 return 0; 1069 1070 while ((field = strsep(&args, ";")) != NULL) { 1071 ret = user_event_parse_field(field, user, &offset); 1072 1073 if (ret) 1074 break; 1075 } 1076 1077 return ret; 1078 } 1079 1080 static struct trace_event_fields user_event_fields_array[1]; 1081 1082 static const char *user_field_format(const char *type) 1083 { 1084 if (strcmp(type, "s64") == 0) 1085 return "%lld"; 1086 if (strcmp(type, "u64") == 0) 1087 return "%llu"; 1088 if (strcmp(type, "s32") == 0) 1089 return "%d"; 1090 if (strcmp(type, "u32") == 0) 1091 return "%u"; 1092 if (strcmp(type, "int") == 0) 1093 return "%d"; 1094 if (strcmp(type, "unsigned int") == 0) 1095 return "%u"; 1096 if (strcmp(type, "s16") == 0) 1097 return "%d"; 1098 if (strcmp(type, "u16") == 0) 1099 return "%u"; 1100 if (strcmp(type, "short") == 0) 1101 return "%d"; 1102 if (strcmp(type, "unsigned short") == 0) 1103 return "%u"; 1104 if (strcmp(type, "s8") == 0) 1105 return "%d"; 1106 if (strcmp(type, "u8") == 0) 1107 return "%u"; 1108 if (strcmp(type, "char") == 0) 1109 return "%d"; 1110 if (strcmp(type, "unsigned char") == 0) 1111 return "%u"; 1112 if (strstr(type, "char[") != NULL) 1113 return "%s"; 1114 1115 /* Unknown, likely struct, allowed treat as 64-bit */ 1116 return "%llu"; 1117 } 1118 1119 static bool user_field_is_dyn_string(const char *type, const char **str_func) 1120 { 1121 if (str_has_prefix(type, "__data_loc ")) { 1122 *str_func = "__get_str"; 1123 goto check; 1124 } 1125 1126 if (str_has_prefix(type, "__rel_loc ")) { 1127 *str_func = "__get_rel_str"; 1128 goto check; 1129 } 1130 1131 return false; 1132 check: 1133 return strstr(type, "char") != NULL; 1134 } 1135 1136 #define LEN_OR_ZERO (len ? len - pos : 0) 1137 static int user_dyn_field_set_string(int argc, const char **argv, int *iout, 1138 char *buf, int len, bool *colon) 1139 { 1140 int pos = 0, i = *iout; 1141 1142 *colon = false; 1143 1144 for (; i < argc; ++i) { 1145 if (i != *iout) 1146 pos += snprintf(buf + pos, LEN_OR_ZERO, " "); 1147 1148 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", argv[i]); 1149 1150 if (strchr(argv[i], ';')) { 1151 ++i; 1152 *colon = true; 1153 break; 1154 } 1155 } 1156 1157 /* Actual set, advance i */ 1158 if (len != 0) 1159 *iout = i; 1160 1161 return pos + 1; 1162 } 1163 1164 static int user_field_set_string(struct ftrace_event_field *field, 1165 char *buf, int len, bool colon) 1166 { 1167 int pos = 0; 1168 1169 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->type); 1170 pos += snprintf(buf + pos, LEN_OR_ZERO, " "); 1171 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->name); 1172 1173 if (colon) 1174 pos += snprintf(buf + pos, LEN_OR_ZERO, ";"); 1175 1176 return pos + 1; 1177 } 1178 1179 static int user_event_set_print_fmt(struct user_event *user, char *buf, int len) 1180 { 1181 struct ftrace_event_field *field, *next; 1182 struct list_head *head = &user->fields; 1183 int pos = 0, depth = 0; 1184 const char *str_func; 1185 1186 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 1187 1188 list_for_each_entry_safe_reverse(field, next, head, link) { 1189 if (depth != 0) 1190 pos += snprintf(buf + pos, LEN_OR_ZERO, " "); 1191 1192 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s", 1193 field->name, user_field_format(field->type)); 1194 1195 depth++; 1196 } 1197 1198 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 1199 1200 list_for_each_entry_safe_reverse(field, next, head, link) { 1201 if (user_field_is_dyn_string(field->type, &str_func)) 1202 pos += snprintf(buf + pos, LEN_OR_ZERO, 1203 ", %s(%s)", str_func, field->name); 1204 else 1205 pos += snprintf(buf + pos, LEN_OR_ZERO, 1206 ", REC->%s", field->name); 1207 } 1208 1209 return pos + 1; 1210 } 1211 #undef LEN_OR_ZERO 1212 1213 static int user_event_create_print_fmt(struct user_event *user) 1214 { 1215 char *print_fmt; 1216 int len; 1217 1218 len = user_event_set_print_fmt(user, NULL, 0); 1219 1220 print_fmt = kmalloc(len, GFP_KERNEL_ACCOUNT); 1221 1222 if (!print_fmt) 1223 return -ENOMEM; 1224 1225 user_event_set_print_fmt(user, print_fmt, len); 1226 1227 user->call.print_fmt = print_fmt; 1228 1229 return 0; 1230 } 1231 1232 static enum print_line_t user_event_print_trace(struct trace_iterator *iter, 1233 int flags, 1234 struct trace_event *event) 1235 { 1236 return print_event_fields(iter, event); 1237 } 1238 1239 static struct trace_event_functions user_event_funcs = { 1240 .trace = user_event_print_trace, 1241 }; 1242 1243 static int user_event_set_call_visible(struct user_event *user, bool visible) 1244 { 1245 int ret; 1246 const struct cred *old_cred; 1247 struct cred *cred; 1248 1249 cred = prepare_creds(); 1250 1251 if (!cred) 1252 return -ENOMEM; 1253 1254 /* 1255 * While by default tracefs is locked down, systems can be configured 1256 * to allow user_event files to be less locked down. The extreme case 1257 * being "other" has read/write access to user_events_data/status. 1258 * 1259 * When not locked down, processes may not have permissions to 1260 * add/remove calls themselves to tracefs. We need to temporarily 1261 * switch to root file permission to allow for this scenario. 1262 */ 1263 cred->fsuid = GLOBAL_ROOT_UID; 1264 1265 old_cred = override_creds(cred); 1266 1267 if (visible) 1268 ret = trace_add_event_call(&user->call); 1269 else 1270 ret = trace_remove_event_call(&user->call); 1271 1272 revert_creds(old_cred); 1273 put_cred(cred); 1274 1275 return ret; 1276 } 1277 1278 static int destroy_user_event(struct user_event *user) 1279 { 1280 int ret = 0; 1281 1282 lockdep_assert_held(&event_mutex); 1283 1284 /* Must destroy fields before call removal */ 1285 user_event_destroy_fields(user); 1286 1287 ret = user_event_set_call_visible(user, false); 1288 1289 if (ret) 1290 return ret; 1291 1292 dyn_event_remove(&user->devent); 1293 hash_del(&user->node); 1294 1295 user_event_destroy_validators(user); 1296 kfree(user->call.print_fmt); 1297 kfree(EVENT_NAME(user)); 1298 kfree(user); 1299 1300 if (current_user_events > 0) 1301 current_user_events--; 1302 else 1303 pr_alert("BUG: Bad current_user_events\n"); 1304 1305 return ret; 1306 } 1307 1308 static struct user_event *find_user_event(struct user_event_group *group, 1309 char *name, u32 *outkey) 1310 { 1311 struct user_event *user; 1312 u32 key = user_event_key(name); 1313 1314 *outkey = key; 1315 1316 hash_for_each_possible(group->register_table, user, node, key) 1317 if (!strcmp(EVENT_NAME(user), name)) { 1318 refcount_inc(&user->refcnt); 1319 return user; 1320 } 1321 1322 return NULL; 1323 } 1324 1325 static int user_event_validate(struct user_event *user, void *data, int len) 1326 { 1327 struct list_head *head = &user->validators; 1328 struct user_event_validator *validator; 1329 void *pos, *end = data + len; 1330 u32 loc, offset, size; 1331 1332 list_for_each_entry(validator, head, link) { 1333 pos = data + validator->offset; 1334 1335 /* Already done min_size check, no bounds check here */ 1336 loc = *(u32 *)pos; 1337 offset = loc & 0xffff; 1338 size = loc >> 16; 1339 1340 if (likely(validator->flags & VALIDATOR_REL)) 1341 pos += offset + sizeof(loc); 1342 else 1343 pos = data + offset; 1344 1345 pos += size; 1346 1347 if (unlikely(pos > end)) 1348 return -EFAULT; 1349 1350 if (likely(validator->flags & VALIDATOR_ENSURE_NULL)) 1351 if (unlikely(*(char *)(pos - 1) != '\0')) 1352 return -EFAULT; 1353 } 1354 1355 return 0; 1356 } 1357 1358 /* 1359 * Writes the user supplied payload out to a trace file. 1360 */ 1361 static void user_event_ftrace(struct user_event *user, struct iov_iter *i, 1362 void *tpdata, bool *faulted) 1363 { 1364 struct trace_event_file *file; 1365 struct trace_entry *entry; 1366 struct trace_event_buffer event_buffer; 1367 size_t size = sizeof(*entry) + i->count; 1368 1369 file = (struct trace_event_file *)tpdata; 1370 1371 if (!file || 1372 !(file->flags & EVENT_FILE_FL_ENABLED) || 1373 trace_trigger_soft_disabled(file)) 1374 return; 1375 1376 /* Allocates and fills trace_entry, + 1 of this is data payload */ 1377 entry = trace_event_buffer_reserve(&event_buffer, file, size); 1378 1379 if (unlikely(!entry)) 1380 return; 1381 1382 if (unlikely(!copy_nofault(entry + 1, i->count, i))) 1383 goto discard; 1384 1385 if (!list_empty(&user->validators) && 1386 unlikely(user_event_validate(user, entry, size))) 1387 goto discard; 1388 1389 trace_event_buffer_commit(&event_buffer); 1390 1391 return; 1392 discard: 1393 *faulted = true; 1394 __trace_event_discard_commit(event_buffer.buffer, 1395 event_buffer.event); 1396 } 1397 1398 #ifdef CONFIG_PERF_EVENTS 1399 /* 1400 * Writes the user supplied payload out to perf ring buffer. 1401 */ 1402 static void user_event_perf(struct user_event *user, struct iov_iter *i, 1403 void *tpdata, bool *faulted) 1404 { 1405 struct hlist_head *perf_head; 1406 1407 perf_head = this_cpu_ptr(user->call.perf_events); 1408 1409 if (perf_head && !hlist_empty(perf_head)) { 1410 struct trace_entry *perf_entry; 1411 struct pt_regs *regs; 1412 size_t size = sizeof(*perf_entry) + i->count; 1413 int context; 1414 1415 perf_entry = perf_trace_buf_alloc(ALIGN(size, 8), 1416 ®s, &context); 1417 1418 if (unlikely(!perf_entry)) 1419 return; 1420 1421 perf_fetch_caller_regs(regs); 1422 1423 if (unlikely(!copy_nofault(perf_entry + 1, i->count, i))) 1424 goto discard; 1425 1426 if (!list_empty(&user->validators) && 1427 unlikely(user_event_validate(user, perf_entry, size))) 1428 goto discard; 1429 1430 perf_trace_buf_submit(perf_entry, size, context, 1431 user->call.event.type, 1, regs, 1432 perf_head, NULL); 1433 1434 return; 1435 discard: 1436 *faulted = true; 1437 perf_swevent_put_recursion_context(context); 1438 } 1439 } 1440 #endif 1441 1442 /* 1443 * Update the enabled bit among all user processes. 1444 */ 1445 static void update_enable_bit_for(struct user_event *user) 1446 { 1447 struct tracepoint *tp = &user->tracepoint; 1448 char status = 0; 1449 1450 if (atomic_read(&tp->key.enabled) > 0) { 1451 struct tracepoint_func *probe_func_ptr; 1452 user_event_func_t probe_func; 1453 1454 rcu_read_lock_sched(); 1455 1456 probe_func_ptr = rcu_dereference_sched(tp->funcs); 1457 1458 if (probe_func_ptr) { 1459 do { 1460 probe_func = probe_func_ptr->func; 1461 1462 if (probe_func == user_event_ftrace) 1463 status |= EVENT_STATUS_FTRACE; 1464 #ifdef CONFIG_PERF_EVENTS 1465 else if (probe_func == user_event_perf) 1466 status |= EVENT_STATUS_PERF; 1467 #endif 1468 else 1469 status |= EVENT_STATUS_OTHER; 1470 } while ((++probe_func_ptr)->func); 1471 } 1472 1473 rcu_read_unlock_sched(); 1474 } 1475 1476 user->status = status; 1477 1478 user_event_enabler_update(user); 1479 } 1480 1481 /* 1482 * Register callback for our events from tracing sub-systems. 1483 */ 1484 static int user_event_reg(struct trace_event_call *call, 1485 enum trace_reg type, 1486 void *data) 1487 { 1488 struct user_event *user = (struct user_event *)call->data; 1489 int ret = 0; 1490 1491 if (!user) 1492 return -ENOENT; 1493 1494 switch (type) { 1495 case TRACE_REG_REGISTER: 1496 ret = tracepoint_probe_register(call->tp, 1497 call->class->probe, 1498 data); 1499 if (!ret) 1500 goto inc; 1501 break; 1502 1503 case TRACE_REG_UNREGISTER: 1504 tracepoint_probe_unregister(call->tp, 1505 call->class->probe, 1506 data); 1507 goto dec; 1508 1509 #ifdef CONFIG_PERF_EVENTS 1510 case TRACE_REG_PERF_REGISTER: 1511 ret = tracepoint_probe_register(call->tp, 1512 call->class->perf_probe, 1513 data); 1514 if (!ret) 1515 goto inc; 1516 break; 1517 1518 case TRACE_REG_PERF_UNREGISTER: 1519 tracepoint_probe_unregister(call->tp, 1520 call->class->perf_probe, 1521 data); 1522 goto dec; 1523 1524 case TRACE_REG_PERF_OPEN: 1525 case TRACE_REG_PERF_CLOSE: 1526 case TRACE_REG_PERF_ADD: 1527 case TRACE_REG_PERF_DEL: 1528 break; 1529 #endif 1530 } 1531 1532 return ret; 1533 inc: 1534 refcount_inc(&user->refcnt); 1535 update_enable_bit_for(user); 1536 return 0; 1537 dec: 1538 update_enable_bit_for(user); 1539 refcount_dec(&user->refcnt); 1540 return 0; 1541 } 1542 1543 static int user_event_create(const char *raw_command) 1544 { 1545 struct user_event_group *group; 1546 struct user_event *user; 1547 char *name; 1548 int ret; 1549 1550 if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX)) 1551 return -ECANCELED; 1552 1553 raw_command += USER_EVENTS_PREFIX_LEN; 1554 raw_command = skip_spaces(raw_command); 1555 1556 name = kstrdup(raw_command, GFP_KERNEL_ACCOUNT); 1557 1558 if (!name) 1559 return -ENOMEM; 1560 1561 group = current_user_event_group(); 1562 1563 if (!group) { 1564 kfree(name); 1565 return -ENOENT; 1566 } 1567 1568 mutex_lock(&group->reg_mutex); 1569 1570 ret = user_event_parse_cmd(group, name, &user); 1571 1572 if (!ret) 1573 refcount_dec(&user->refcnt); 1574 1575 mutex_unlock(&group->reg_mutex); 1576 1577 if (ret) 1578 kfree(name); 1579 1580 return ret; 1581 } 1582 1583 static int user_event_show(struct seq_file *m, struct dyn_event *ev) 1584 { 1585 struct user_event *user = container_of(ev, struct user_event, devent); 1586 struct ftrace_event_field *field, *next; 1587 struct list_head *head; 1588 int depth = 0; 1589 1590 seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user)); 1591 1592 head = trace_get_fields(&user->call); 1593 1594 list_for_each_entry_safe_reverse(field, next, head, link) { 1595 if (depth == 0) 1596 seq_puts(m, " "); 1597 else 1598 seq_puts(m, "; "); 1599 1600 seq_printf(m, "%s %s", field->type, field->name); 1601 1602 if (str_has_prefix(field->type, "struct ")) 1603 seq_printf(m, " %d", field->size); 1604 1605 depth++; 1606 } 1607 1608 seq_puts(m, "\n"); 1609 1610 return 0; 1611 } 1612 1613 static bool user_event_is_busy(struct dyn_event *ev) 1614 { 1615 struct user_event *user = container_of(ev, struct user_event, devent); 1616 1617 return !user_event_last_ref(user); 1618 } 1619 1620 static int user_event_free(struct dyn_event *ev) 1621 { 1622 struct user_event *user = container_of(ev, struct user_event, devent); 1623 1624 if (!user_event_last_ref(user)) 1625 return -EBUSY; 1626 1627 return destroy_user_event(user); 1628 } 1629 1630 static bool user_field_match(struct ftrace_event_field *field, int argc, 1631 const char **argv, int *iout) 1632 { 1633 char *field_name = NULL, *dyn_field_name = NULL; 1634 bool colon = false, match = false; 1635 int dyn_len, len; 1636 1637 if (*iout >= argc) 1638 return false; 1639 1640 dyn_len = user_dyn_field_set_string(argc, argv, iout, dyn_field_name, 1641 0, &colon); 1642 1643 len = user_field_set_string(field, field_name, 0, colon); 1644 1645 if (dyn_len != len) 1646 return false; 1647 1648 dyn_field_name = kmalloc(dyn_len, GFP_KERNEL); 1649 field_name = kmalloc(len, GFP_KERNEL); 1650 1651 if (!dyn_field_name || !field_name) 1652 goto out; 1653 1654 user_dyn_field_set_string(argc, argv, iout, dyn_field_name, 1655 dyn_len, &colon); 1656 1657 user_field_set_string(field, field_name, len, colon); 1658 1659 match = strcmp(dyn_field_name, field_name) == 0; 1660 out: 1661 kfree(dyn_field_name); 1662 kfree(field_name); 1663 1664 return match; 1665 } 1666 1667 static bool user_fields_match(struct user_event *user, int argc, 1668 const char **argv) 1669 { 1670 struct ftrace_event_field *field, *next; 1671 struct list_head *head = &user->fields; 1672 int i = 0; 1673 1674 list_for_each_entry_safe_reverse(field, next, head, link) 1675 if (!user_field_match(field, argc, argv, &i)) 1676 return false; 1677 1678 if (i != argc) 1679 return false; 1680 1681 return true; 1682 } 1683 1684 static bool user_event_match(const char *system, const char *event, 1685 int argc, const char **argv, struct dyn_event *ev) 1686 { 1687 struct user_event *user = container_of(ev, struct user_event, devent); 1688 bool match; 1689 1690 match = strcmp(EVENT_NAME(user), event) == 0 && 1691 (!system || strcmp(system, USER_EVENTS_SYSTEM) == 0); 1692 1693 if (match && argc > 0) 1694 match = user_fields_match(user, argc, argv); 1695 1696 return match; 1697 } 1698 1699 static struct dyn_event_operations user_event_dops = { 1700 .create = user_event_create, 1701 .show = user_event_show, 1702 .is_busy = user_event_is_busy, 1703 .free = user_event_free, 1704 .match = user_event_match, 1705 }; 1706 1707 static int user_event_trace_register(struct user_event *user) 1708 { 1709 int ret; 1710 1711 ret = register_trace_event(&user->call.event); 1712 1713 if (!ret) 1714 return -ENODEV; 1715 1716 ret = user_event_set_call_visible(user, true); 1717 1718 if (ret) 1719 unregister_trace_event(&user->call.event); 1720 1721 return ret; 1722 } 1723 1724 /* 1725 * Parses the event name, arguments and flags then registers if successful. 1726 * The name buffer lifetime is owned by this method for success cases only. 1727 * Upon success the returned user_event has its ref count increased by 1. 1728 */ 1729 static int user_event_parse(struct user_event_group *group, char *name, 1730 char *args, char *flags, 1731 struct user_event **newuser) 1732 { 1733 int ret; 1734 u32 key; 1735 struct user_event *user; 1736 1737 /* Prevent dyn_event from racing */ 1738 mutex_lock(&event_mutex); 1739 user = find_user_event(group, name, &key); 1740 mutex_unlock(&event_mutex); 1741 1742 if (user) { 1743 *newuser = user; 1744 /* 1745 * Name is allocated by caller, free it since it already exists. 1746 * Caller only worries about failure cases for freeing. 1747 */ 1748 kfree(name); 1749 return 0; 1750 } 1751 1752 user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT); 1753 1754 if (!user) 1755 return -ENOMEM; 1756 1757 INIT_LIST_HEAD(&user->class.fields); 1758 INIT_LIST_HEAD(&user->fields); 1759 INIT_LIST_HEAD(&user->validators); 1760 1761 user->group = group; 1762 user->tracepoint.name = name; 1763 1764 ret = user_event_parse_fields(user, args); 1765 1766 if (ret) 1767 goto put_user; 1768 1769 ret = user_event_create_print_fmt(user); 1770 1771 if (ret) 1772 goto put_user; 1773 1774 user->call.data = user; 1775 user->call.class = &user->class; 1776 user->call.name = name; 1777 user->call.flags = TRACE_EVENT_FL_TRACEPOINT; 1778 user->call.tp = &user->tracepoint; 1779 user->call.event.funcs = &user_event_funcs; 1780 user->class.system = group->system_name; 1781 1782 user->class.fields_array = user_event_fields_array; 1783 user->class.get_fields = user_event_get_fields; 1784 user->class.reg = user_event_reg; 1785 user->class.probe = user_event_ftrace; 1786 #ifdef CONFIG_PERF_EVENTS 1787 user->class.perf_probe = user_event_perf; 1788 #endif 1789 1790 mutex_lock(&event_mutex); 1791 1792 if (current_user_events >= max_user_events) { 1793 ret = -EMFILE; 1794 goto put_user_lock; 1795 } 1796 1797 ret = user_event_trace_register(user); 1798 1799 if (ret) 1800 goto put_user_lock; 1801 1802 /* Ensure we track self ref and caller ref (2) */ 1803 refcount_set(&user->refcnt, 2); 1804 1805 dyn_event_init(&user->devent, &user_event_dops); 1806 dyn_event_add(&user->devent, &user->call); 1807 hash_add(group->register_table, &user->node, key); 1808 current_user_events++; 1809 1810 mutex_unlock(&event_mutex); 1811 1812 *newuser = user; 1813 return 0; 1814 put_user_lock: 1815 mutex_unlock(&event_mutex); 1816 put_user: 1817 user_event_destroy_fields(user); 1818 user_event_destroy_validators(user); 1819 kfree(user->call.print_fmt); 1820 kfree(user); 1821 return ret; 1822 } 1823 1824 /* 1825 * Deletes a previously created event if it is no longer being used. 1826 */ 1827 static int delete_user_event(struct user_event_group *group, char *name) 1828 { 1829 u32 key; 1830 struct user_event *user = find_user_event(group, name, &key); 1831 1832 if (!user) 1833 return -ENOENT; 1834 1835 refcount_dec(&user->refcnt); 1836 1837 if (!user_event_last_ref(user)) 1838 return -EBUSY; 1839 1840 return destroy_user_event(user); 1841 } 1842 1843 /* 1844 * Validates the user payload and writes via iterator. 1845 */ 1846 static ssize_t user_events_write_core(struct file *file, struct iov_iter *i) 1847 { 1848 struct user_event_file_info *info = file->private_data; 1849 struct user_event_refs *refs; 1850 struct user_event *user = NULL; 1851 struct tracepoint *tp; 1852 ssize_t ret = i->count; 1853 int idx; 1854 1855 if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx))) 1856 return -EFAULT; 1857 1858 if (idx < 0) 1859 return -EINVAL; 1860 1861 rcu_read_lock_sched(); 1862 1863 refs = rcu_dereference_sched(info->refs); 1864 1865 /* 1866 * The refs->events array is protected by RCU, and new items may be 1867 * added. But the user retrieved from indexing into the events array 1868 * shall be immutable while the file is opened. 1869 */ 1870 if (likely(refs && idx < refs->count)) 1871 user = refs->events[idx]; 1872 1873 rcu_read_unlock_sched(); 1874 1875 if (unlikely(user == NULL)) 1876 return -ENOENT; 1877 1878 if (unlikely(i->count < user->min_size)) 1879 return -EINVAL; 1880 1881 tp = &user->tracepoint; 1882 1883 /* 1884 * It's possible key.enabled disables after this check, however 1885 * we don't mind if a few events are included in this condition. 1886 */ 1887 if (likely(atomic_read(&tp->key.enabled) > 0)) { 1888 struct tracepoint_func *probe_func_ptr; 1889 user_event_func_t probe_func; 1890 struct iov_iter copy; 1891 void *tpdata; 1892 bool faulted; 1893 1894 if (unlikely(fault_in_iov_iter_readable(i, i->count))) 1895 return -EFAULT; 1896 1897 faulted = false; 1898 1899 rcu_read_lock_sched(); 1900 1901 probe_func_ptr = rcu_dereference_sched(tp->funcs); 1902 1903 if (probe_func_ptr) { 1904 do { 1905 copy = *i; 1906 probe_func = probe_func_ptr->func; 1907 tpdata = probe_func_ptr->data; 1908 probe_func(user, ©, tpdata, &faulted); 1909 } while ((++probe_func_ptr)->func); 1910 } 1911 1912 rcu_read_unlock_sched(); 1913 1914 if (unlikely(faulted)) 1915 return -EFAULT; 1916 } 1917 1918 return ret; 1919 } 1920 1921 static int user_events_open(struct inode *node, struct file *file) 1922 { 1923 struct user_event_group *group; 1924 struct user_event_file_info *info; 1925 1926 group = current_user_event_group(); 1927 1928 if (!group) 1929 return -ENOENT; 1930 1931 info = kzalloc(sizeof(*info), GFP_KERNEL_ACCOUNT); 1932 1933 if (!info) 1934 return -ENOMEM; 1935 1936 info->group = group; 1937 1938 file->private_data = info; 1939 1940 return 0; 1941 } 1942 1943 static ssize_t user_events_write(struct file *file, const char __user *ubuf, 1944 size_t count, loff_t *ppos) 1945 { 1946 struct iovec iov; 1947 struct iov_iter i; 1948 1949 if (unlikely(*ppos != 0)) 1950 return -EFAULT; 1951 1952 if (unlikely(import_single_range(ITER_SOURCE, (char __user *)ubuf, 1953 count, &iov, &i))) 1954 return -EFAULT; 1955 1956 return user_events_write_core(file, &i); 1957 } 1958 1959 static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i) 1960 { 1961 return user_events_write_core(kp->ki_filp, i); 1962 } 1963 1964 static int user_events_ref_add(struct user_event_file_info *info, 1965 struct user_event *user) 1966 { 1967 struct user_event_group *group = info->group; 1968 struct user_event_refs *refs, *new_refs; 1969 int i, size, count = 0; 1970 1971 refs = rcu_dereference_protected(info->refs, 1972 lockdep_is_held(&group->reg_mutex)); 1973 1974 if (refs) { 1975 count = refs->count; 1976 1977 for (i = 0; i < count; ++i) 1978 if (refs->events[i] == user) 1979 return i; 1980 } 1981 1982 size = struct_size(refs, events, count + 1); 1983 1984 new_refs = kzalloc(size, GFP_KERNEL_ACCOUNT); 1985 1986 if (!new_refs) 1987 return -ENOMEM; 1988 1989 new_refs->count = count + 1; 1990 1991 for (i = 0; i < count; ++i) 1992 new_refs->events[i] = refs->events[i]; 1993 1994 new_refs->events[i] = user; 1995 1996 refcount_inc(&user->refcnt); 1997 1998 rcu_assign_pointer(info->refs, new_refs); 1999 2000 if (refs) 2001 kfree_rcu(refs, rcu); 2002 2003 return i; 2004 } 2005 2006 static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg) 2007 { 2008 u32 size; 2009 long ret; 2010 2011 ret = get_user(size, &ureg->size); 2012 2013 if (ret) 2014 return ret; 2015 2016 if (size > PAGE_SIZE) 2017 return -E2BIG; 2018 2019 if (size < offsetofend(struct user_reg, write_index)) 2020 return -EINVAL; 2021 2022 ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size); 2023 2024 if (ret) 2025 return ret; 2026 2027 /* Ensure no flags, since we don't support any yet */ 2028 if (kreg->flags != 0) 2029 return -EINVAL; 2030 2031 /* Ensure supported size */ 2032 switch (kreg->enable_size) { 2033 case 4: 2034 /* 32-bit */ 2035 break; 2036 #if BITS_PER_LONG >= 64 2037 case 8: 2038 /* 64-bit */ 2039 break; 2040 #endif 2041 default: 2042 return -EINVAL; 2043 } 2044 2045 /* Ensure natural alignment */ 2046 if (kreg->enable_addr % kreg->enable_size) 2047 return -EINVAL; 2048 2049 /* Ensure bit range for size */ 2050 if (kreg->enable_bit > (kreg->enable_size * BITS_PER_BYTE) - 1) 2051 return -EINVAL; 2052 2053 /* Ensure accessible */ 2054 if (!access_ok((const void __user *)(uintptr_t)kreg->enable_addr, 2055 kreg->enable_size)) 2056 return -EFAULT; 2057 2058 kreg->size = size; 2059 2060 return 0; 2061 } 2062 2063 /* 2064 * Registers a user_event on behalf of a user process. 2065 */ 2066 static long user_events_ioctl_reg(struct user_event_file_info *info, 2067 unsigned long uarg) 2068 { 2069 struct user_reg __user *ureg = (struct user_reg __user *)uarg; 2070 struct user_reg reg; 2071 struct user_event *user; 2072 struct user_event_enabler *enabler; 2073 char *name; 2074 long ret; 2075 int write_result; 2076 2077 ret = user_reg_get(ureg, ®); 2078 2079 if (ret) 2080 return ret; 2081 2082 /* 2083 * Prevent users from using the same address and bit multiple times 2084 * within the same mm address space. This can cause unexpected behavior 2085 * for user processes that is far easier to debug if this is explictly 2086 * an error upon registering. 2087 */ 2088 if (current_user_event_enabler_exists((unsigned long)reg.enable_addr, 2089 reg.enable_bit)) 2090 return -EADDRINUSE; 2091 2092 name = strndup_user((const char __user *)(uintptr_t)reg.name_args, 2093 MAX_EVENT_DESC); 2094 2095 if (IS_ERR(name)) { 2096 ret = PTR_ERR(name); 2097 return ret; 2098 } 2099 2100 ret = user_event_parse_cmd(info->group, name, &user); 2101 2102 if (ret) { 2103 kfree(name); 2104 return ret; 2105 } 2106 2107 ret = user_events_ref_add(info, user); 2108 2109 /* No longer need parse ref, ref_add either worked or not */ 2110 refcount_dec(&user->refcnt); 2111 2112 /* Positive number is index and valid */ 2113 if (ret < 0) 2114 return ret; 2115 2116 /* 2117 * user_events_ref_add succeeded: 2118 * At this point we have a user_event, it's lifetime is bound by the 2119 * reference count, not this file. If anything fails, the user_event 2120 * still has a reference until the file is released. During release 2121 * any remaining references (from user_events_ref_add) are decremented. 2122 * 2123 * Attempt to create an enabler, which too has a lifetime tied in the 2124 * same way for the event. Once the task that caused the enabler to be 2125 * created exits or issues exec() then the enablers it has created 2126 * will be destroyed and the ref to the event will be decremented. 2127 */ 2128 enabler = user_event_enabler_create(®, user, &write_result); 2129 2130 if (!enabler) 2131 return -ENOMEM; 2132 2133 /* Write failed/faulted, give error back to caller */ 2134 if (write_result) 2135 return write_result; 2136 2137 put_user((u32)ret, &ureg->write_index); 2138 2139 return 0; 2140 } 2141 2142 /* 2143 * Deletes a user_event on behalf of a user process. 2144 */ 2145 static long user_events_ioctl_del(struct user_event_file_info *info, 2146 unsigned long uarg) 2147 { 2148 void __user *ubuf = (void __user *)uarg; 2149 char *name; 2150 long ret; 2151 2152 name = strndup_user(ubuf, MAX_EVENT_DESC); 2153 2154 if (IS_ERR(name)) 2155 return PTR_ERR(name); 2156 2157 /* event_mutex prevents dyn_event from racing */ 2158 mutex_lock(&event_mutex); 2159 ret = delete_user_event(info->group, name); 2160 mutex_unlock(&event_mutex); 2161 2162 kfree(name); 2163 2164 return ret; 2165 } 2166 2167 static long user_unreg_get(struct user_unreg __user *ureg, 2168 struct user_unreg *kreg) 2169 { 2170 u32 size; 2171 long ret; 2172 2173 ret = get_user(size, &ureg->size); 2174 2175 if (ret) 2176 return ret; 2177 2178 if (size > PAGE_SIZE) 2179 return -E2BIG; 2180 2181 if (size < offsetofend(struct user_unreg, disable_addr)) 2182 return -EINVAL; 2183 2184 ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size); 2185 2186 /* Ensure no reserved values, since we don't support any yet */ 2187 if (kreg->__reserved || kreg->__reserved2) 2188 return -EINVAL; 2189 2190 return ret; 2191 } 2192 2193 static int user_event_mm_clear_bit(struct user_event_mm *user_mm, 2194 unsigned long uaddr, unsigned char bit) 2195 { 2196 struct user_event_enabler enabler; 2197 int result; 2198 2199 memset(&enabler, 0, sizeof(enabler)); 2200 enabler.addr = uaddr; 2201 enabler.values = bit; 2202 retry: 2203 /* Prevents state changes from racing with new enablers */ 2204 mutex_lock(&event_mutex); 2205 2206 /* Force the bit to be cleared, since no event is attached */ 2207 mmap_read_lock(user_mm->mm); 2208 result = user_event_enabler_write(user_mm, &enabler, false); 2209 mmap_read_unlock(user_mm->mm); 2210 2211 mutex_unlock(&event_mutex); 2212 2213 if (result) { 2214 /* Attempt to fault-in and retry if it worked */ 2215 if (!user_event_mm_fault_in(user_mm, uaddr)) 2216 goto retry; 2217 } 2218 2219 return result; 2220 } 2221 2222 /* 2223 * Unregisters an enablement address/bit within a task/user mm. 2224 */ 2225 static long user_events_ioctl_unreg(unsigned long uarg) 2226 { 2227 struct user_unreg __user *ureg = (struct user_unreg __user *)uarg; 2228 struct user_event_mm *mm = current->user_event_mm; 2229 struct user_event_enabler *enabler, *next; 2230 struct user_unreg reg; 2231 long ret; 2232 2233 ret = user_unreg_get(ureg, ®); 2234 2235 if (ret) 2236 return ret; 2237 2238 if (!mm) 2239 return -ENOENT; 2240 2241 ret = -ENOENT; 2242 2243 /* 2244 * Flags freeing and faulting are used to indicate if the enabler is in 2245 * use at all. When faulting is set a page-fault is occurring asyncly. 2246 * During async fault if freeing is set, the enabler will be destroyed. 2247 * If no async fault is happening, we can destroy it now since we hold 2248 * the event_mutex during these checks. 2249 */ 2250 mutex_lock(&event_mutex); 2251 2252 list_for_each_entry_safe(enabler, next, &mm->enablers, link) 2253 if (enabler->addr == reg.disable_addr && 2254 (enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) { 2255 set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); 2256 2257 if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) 2258 user_event_enabler_destroy(enabler); 2259 2260 /* Removed at least one */ 2261 ret = 0; 2262 } 2263 2264 mutex_unlock(&event_mutex); 2265 2266 /* Ensure bit is now cleared for user, regardless of event status */ 2267 if (!ret) 2268 ret = user_event_mm_clear_bit(mm, reg.disable_addr, 2269 reg.disable_bit); 2270 2271 return ret; 2272 } 2273 2274 /* 2275 * Handles the ioctl from user mode to register or alter operations. 2276 */ 2277 static long user_events_ioctl(struct file *file, unsigned int cmd, 2278 unsigned long uarg) 2279 { 2280 struct user_event_file_info *info = file->private_data; 2281 struct user_event_group *group = info->group; 2282 long ret = -ENOTTY; 2283 2284 switch (cmd) { 2285 case DIAG_IOCSREG: 2286 mutex_lock(&group->reg_mutex); 2287 ret = user_events_ioctl_reg(info, uarg); 2288 mutex_unlock(&group->reg_mutex); 2289 break; 2290 2291 case DIAG_IOCSDEL: 2292 mutex_lock(&group->reg_mutex); 2293 ret = user_events_ioctl_del(info, uarg); 2294 mutex_unlock(&group->reg_mutex); 2295 break; 2296 2297 case DIAG_IOCSUNREG: 2298 mutex_lock(&group->reg_mutex); 2299 ret = user_events_ioctl_unreg(uarg); 2300 mutex_unlock(&group->reg_mutex); 2301 break; 2302 } 2303 2304 return ret; 2305 } 2306 2307 /* 2308 * Handles the final close of the file from user mode. 2309 */ 2310 static int user_events_release(struct inode *node, struct file *file) 2311 { 2312 struct user_event_file_info *info = file->private_data; 2313 struct user_event_group *group; 2314 struct user_event_refs *refs; 2315 struct user_event *user; 2316 int i; 2317 2318 if (!info) 2319 return -EINVAL; 2320 2321 group = info->group; 2322 2323 /* 2324 * Ensure refs cannot change under any situation by taking the 2325 * register mutex during the final freeing of the references. 2326 */ 2327 mutex_lock(&group->reg_mutex); 2328 2329 refs = info->refs; 2330 2331 if (!refs) 2332 goto out; 2333 2334 /* 2335 * The lifetime of refs has reached an end, it's tied to this file. 2336 * The underlying user_events are ref counted, and cannot be freed. 2337 * After this decrement, the user_events may be freed elsewhere. 2338 */ 2339 for (i = 0; i < refs->count; ++i) { 2340 user = refs->events[i]; 2341 2342 if (user) 2343 refcount_dec(&user->refcnt); 2344 } 2345 out: 2346 file->private_data = NULL; 2347 2348 mutex_unlock(&group->reg_mutex); 2349 2350 kfree(refs); 2351 kfree(info); 2352 2353 return 0; 2354 } 2355 2356 static const struct file_operations user_data_fops = { 2357 .open = user_events_open, 2358 .write = user_events_write, 2359 .write_iter = user_events_write_iter, 2360 .unlocked_ioctl = user_events_ioctl, 2361 .release = user_events_release, 2362 }; 2363 2364 static void *user_seq_start(struct seq_file *m, loff_t *pos) 2365 { 2366 if (*pos) 2367 return NULL; 2368 2369 return (void *)1; 2370 } 2371 2372 static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos) 2373 { 2374 ++*pos; 2375 return NULL; 2376 } 2377 2378 static void user_seq_stop(struct seq_file *m, void *p) 2379 { 2380 } 2381 2382 static int user_seq_show(struct seq_file *m, void *p) 2383 { 2384 struct user_event_group *group = m->private; 2385 struct user_event *user; 2386 char status; 2387 int i, active = 0, busy = 0; 2388 2389 if (!group) 2390 return -EINVAL; 2391 2392 mutex_lock(&group->reg_mutex); 2393 2394 hash_for_each(group->register_table, i, user, node) { 2395 status = user->status; 2396 2397 seq_printf(m, "%s", EVENT_NAME(user)); 2398 2399 if (status != 0) 2400 seq_puts(m, " #"); 2401 2402 if (status != 0) { 2403 seq_puts(m, " Used by"); 2404 if (status & EVENT_STATUS_FTRACE) 2405 seq_puts(m, " ftrace"); 2406 if (status & EVENT_STATUS_PERF) 2407 seq_puts(m, " perf"); 2408 if (status & EVENT_STATUS_OTHER) 2409 seq_puts(m, " other"); 2410 busy++; 2411 } 2412 2413 seq_puts(m, "\n"); 2414 active++; 2415 } 2416 2417 mutex_unlock(&group->reg_mutex); 2418 2419 seq_puts(m, "\n"); 2420 seq_printf(m, "Active: %d\n", active); 2421 seq_printf(m, "Busy: %d\n", busy); 2422 2423 return 0; 2424 } 2425 2426 static const struct seq_operations user_seq_ops = { 2427 .start = user_seq_start, 2428 .next = user_seq_next, 2429 .stop = user_seq_stop, 2430 .show = user_seq_show, 2431 }; 2432 2433 static int user_status_open(struct inode *node, struct file *file) 2434 { 2435 struct user_event_group *group; 2436 int ret; 2437 2438 group = current_user_event_group(); 2439 2440 if (!group) 2441 return -ENOENT; 2442 2443 ret = seq_open(file, &user_seq_ops); 2444 2445 if (!ret) { 2446 /* Chain group to seq_file */ 2447 struct seq_file *m = file->private_data; 2448 2449 m->private = group; 2450 } 2451 2452 return ret; 2453 } 2454 2455 static const struct file_operations user_status_fops = { 2456 .open = user_status_open, 2457 .read = seq_read, 2458 .llseek = seq_lseek, 2459 .release = seq_release, 2460 }; 2461 2462 /* 2463 * Creates a set of tracefs files to allow user mode interactions. 2464 */ 2465 static int create_user_tracefs(void) 2466 { 2467 struct dentry *edata, *emmap; 2468 2469 edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE, 2470 NULL, NULL, &user_data_fops); 2471 2472 if (!edata) { 2473 pr_warn("Could not create tracefs 'user_events_data' entry\n"); 2474 goto err; 2475 } 2476 2477 emmap = tracefs_create_file("user_events_status", TRACE_MODE_READ, 2478 NULL, NULL, &user_status_fops); 2479 2480 if (!emmap) { 2481 tracefs_remove(edata); 2482 pr_warn("Could not create tracefs 'user_events_mmap' entry\n"); 2483 goto err; 2484 } 2485 2486 return 0; 2487 err: 2488 return -ENODEV; 2489 } 2490 2491 static int set_max_user_events_sysctl(struct ctl_table *table, int write, 2492 void *buffer, size_t *lenp, loff_t *ppos) 2493 { 2494 int ret; 2495 2496 mutex_lock(&event_mutex); 2497 2498 ret = proc_douintvec(table, write, buffer, lenp, ppos); 2499 2500 mutex_unlock(&event_mutex); 2501 2502 return ret; 2503 } 2504 2505 static struct ctl_table user_event_sysctls[] = { 2506 { 2507 .procname = "user_events_max", 2508 .data = &max_user_events, 2509 .maxlen = sizeof(unsigned int), 2510 .mode = 0644, 2511 .proc_handler = set_max_user_events_sysctl, 2512 }, 2513 {} 2514 }; 2515 2516 static int __init trace_events_user_init(void) 2517 { 2518 int ret; 2519 2520 fault_cache = KMEM_CACHE(user_event_enabler_fault, 0); 2521 2522 if (!fault_cache) 2523 return -ENOMEM; 2524 2525 init_group = user_event_group_create(&init_user_ns); 2526 2527 if (!init_group) { 2528 kmem_cache_destroy(fault_cache); 2529 return -ENOMEM; 2530 } 2531 2532 ret = create_user_tracefs(); 2533 2534 if (ret) { 2535 pr_warn("user_events could not register with tracefs\n"); 2536 user_event_group_destroy(init_group); 2537 kmem_cache_destroy(fault_cache); 2538 init_group = NULL; 2539 return ret; 2540 } 2541 2542 if (dyn_event_register(&user_event_dops)) 2543 pr_warn("user_events could not register with dyn_events\n"); 2544 2545 register_sysctl_init("kernel", user_event_sysctls); 2546 2547 return 0; 2548 } 2549 2550 fs_initcall(trace_events_user_init); 2551