1 /* 2 * Performance events: 3 * 4 * Copyright (C) 2008-2009, Thomas Gleixner <[email protected]> 5 * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar 6 * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra 7 * 8 * Data type definitions, declarations, prototypes. 9 * 10 * Started by: Thomas Gleixner and Ingo Molnar 11 * 12 * For licencing details see kernel-base/COPYING 13 */ 14 #ifndef _LINUX_PERF_EVENT_H 15 #define _LINUX_PERF_EVENT_H 16 17 #include <uapi/linux/perf_event.h> 18 19 /* 20 * Kernel-internal data types and definitions: 21 */ 22 23 #ifdef CONFIG_PERF_EVENTS 24 # include <asm/perf_event.h> 25 # include <asm/local64.h> 26 #endif 27 28 struct perf_guest_info_callbacks { 29 int (*is_in_guest)(void); 30 int (*is_user_mode)(void); 31 unsigned long (*get_guest_ip)(void); 32 }; 33 34 #ifdef CONFIG_HAVE_HW_BREAKPOINT 35 #include <asm/hw_breakpoint.h> 36 #endif 37 38 #include <linux/list.h> 39 #include <linux/mutex.h> 40 #include <linux/rculist.h> 41 #include <linux/rcupdate.h> 42 #include <linux/spinlock.h> 43 #include <linux/hrtimer.h> 44 #include <linux/fs.h> 45 #include <linux/pid_namespace.h> 46 #include <linux/workqueue.h> 47 #include <linux/ftrace.h> 48 #include <linux/cpu.h> 49 #include <linux/irq_work.h> 50 #include <linux/static_key.h> 51 #include <linux/jump_label_ratelimit.h> 52 #include <linux/atomic.h> 53 #include <linux/sysfs.h> 54 #include <linux/perf_regs.h> 55 #include <linux/workqueue.h> 56 #include <linux/cgroup.h> 57 #include <asm/local.h> 58 59 struct perf_callchain_entry { 60 __u64 nr; 61 __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ 62 }; 63 64 struct perf_raw_record { 65 u32 size; 66 void *data; 67 }; 68 69 /* 70 * branch stack layout: 71 * nr: number of taken branches stored in entries[] 72 * 73 * Note that nr can vary from sample to sample 74 * branches (to, from) are stored from most recent 75 * to least recent, i.e., entries[0] contains the most 76 * recent branch. 77 */ 78 struct perf_branch_stack { 79 __u64 nr; 80 struct perf_branch_entry entries[0]; 81 }; 82 83 struct task_struct; 84 85 /* 86 * extra PMU register associated with an event 87 */ 88 struct hw_perf_event_extra { 89 u64 config; /* register value */ 90 unsigned int reg; /* register address or index */ 91 int alloc; /* extra register already allocated */ 92 int idx; /* index in shared_regs->regs[] */ 93 }; 94 95 /** 96 * struct hw_perf_event - performance event hardware details: 97 */ 98 struct hw_perf_event { 99 #ifdef CONFIG_PERF_EVENTS 100 union { 101 struct { /* hardware */ 102 u64 config; 103 u64 last_tag; 104 unsigned long config_base; 105 unsigned long event_base; 106 int event_base_rdpmc; 107 int idx; 108 int last_cpu; 109 int flags; 110 111 struct hw_perf_event_extra extra_reg; 112 struct hw_perf_event_extra branch_reg; 113 }; 114 struct { /* software */ 115 struct hrtimer hrtimer; 116 }; 117 struct { /* tracepoint */ 118 /* for tp_event->class */ 119 struct list_head tp_list; 120 }; 121 struct { /* intel_cqm */ 122 int cqm_state; 123 u32 cqm_rmid; 124 int is_group_event; 125 struct list_head cqm_events_entry; 126 struct list_head cqm_groups_entry; 127 struct list_head cqm_group_entry; 128 }; 129 struct { /* itrace */ 130 int itrace_started; 131 }; 132 struct { /* amd_power */ 133 u64 pwr_acc; 134 u64 ptsc; 135 }; 136 #ifdef CONFIG_HAVE_HW_BREAKPOINT 137 struct { /* breakpoint */ 138 /* 139 * Crufty hack to avoid the chicken and egg 140 * problem hw_breakpoint has with context 141 * creation and event initalization. 142 */ 143 struct arch_hw_breakpoint info; 144 struct list_head bp_list; 145 }; 146 #endif 147 }; 148 /* 149 * If the event is a per task event, this will point to the task in 150 * question. See the comment in perf_event_alloc(). 151 */ 152 struct task_struct *target; 153 154 /* 155 * PMU would store hardware filter configuration 156 * here. 157 */ 158 void *addr_filters; 159 160 /* Last sync'ed generation of filters */ 161 unsigned long addr_filters_gen; 162 163 /* 164 * hw_perf_event::state flags; used to track the PERF_EF_* state. 165 */ 166 #define PERF_HES_STOPPED 0x01 /* the counter is stopped */ 167 #define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ 168 #define PERF_HES_ARCH 0x04 169 170 int state; 171 172 /* 173 * The last observed hardware counter value, updated with a 174 * local64_cmpxchg() such that pmu::read() can be called nested. 175 */ 176 local64_t prev_count; 177 178 /* 179 * The period to start the next sample with. 180 */ 181 u64 sample_period; 182 183 /* 184 * The period we started this sample with. 185 */ 186 u64 last_period; 187 188 /* 189 * However much is left of the current period; note that this is 190 * a full 64bit value and allows for generation of periods longer 191 * than hardware might allow. 192 */ 193 local64_t period_left; 194 195 /* 196 * State for throttling the event, see __perf_event_overflow() and 197 * perf_adjust_freq_unthr_context(). 198 */ 199 u64 interrupts_seq; 200 u64 interrupts; 201 202 /* 203 * State for freq target events, see __perf_event_overflow() and 204 * perf_adjust_freq_unthr_context(). 205 */ 206 u64 freq_time_stamp; 207 u64 freq_count_stamp; 208 #endif 209 }; 210 211 struct perf_event; 212 213 /* 214 * Common implementation detail of pmu::{start,commit,cancel}_txn 215 */ 216 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ 217 #define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ 218 219 /** 220 * pmu::capabilities flags 221 */ 222 #define PERF_PMU_CAP_NO_INTERRUPT 0x01 223 #define PERF_PMU_CAP_NO_NMI 0x02 224 #define PERF_PMU_CAP_AUX_NO_SG 0x04 225 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 226 #define PERF_PMU_CAP_EXCLUSIVE 0x10 227 #define PERF_PMU_CAP_ITRACE 0x20 228 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 229 230 /** 231 * struct pmu - generic performance monitoring unit 232 */ 233 struct pmu { 234 struct list_head entry; 235 236 struct module *module; 237 struct device *dev; 238 const struct attribute_group **attr_groups; 239 const char *name; 240 int type; 241 242 /* 243 * various common per-pmu feature flags 244 */ 245 int capabilities; 246 247 int * __percpu pmu_disable_count; 248 struct perf_cpu_context * __percpu pmu_cpu_context; 249 atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ 250 int task_ctx_nr; 251 int hrtimer_interval_ms; 252 253 /* number of address filters this PMU can do */ 254 unsigned int nr_addr_filters; 255 256 /* 257 * Fully disable/enable this PMU, can be used to protect from the PMI 258 * as well as for lazy/batch writing of the MSRs. 259 */ 260 void (*pmu_enable) (struct pmu *pmu); /* optional */ 261 void (*pmu_disable) (struct pmu *pmu); /* optional */ 262 263 /* 264 * Try and initialize the event for this PMU. 265 * 266 * Returns: 267 * -ENOENT -- @event is not for this PMU 268 * 269 * -ENODEV -- @event is for this PMU but PMU not present 270 * -EBUSY -- @event is for this PMU but PMU temporarily unavailable 271 * -EINVAL -- @event is for this PMU but @event is not valid 272 * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported 273 * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges 274 * 275 * 0 -- @event is for this PMU and valid 276 * 277 * Other error return values are allowed. 278 */ 279 int (*event_init) (struct perf_event *event); 280 281 /* 282 * Notification that the event was mapped or unmapped. Called 283 * in the context of the mapping task. 284 */ 285 void (*event_mapped) (struct perf_event *event); /*optional*/ 286 void (*event_unmapped) (struct perf_event *event); /*optional*/ 287 288 /* 289 * Flags for ->add()/->del()/ ->start()/->stop(). There are 290 * matching hw_perf_event::state flags. 291 */ 292 #define PERF_EF_START 0x01 /* start the counter when adding */ 293 #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ 294 #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ 295 296 /* 297 * Adds/Removes a counter to/from the PMU, can be done inside a 298 * transaction, see the ->*_txn() methods. 299 * 300 * The add/del callbacks will reserve all hardware resources required 301 * to service the event, this includes any counter constraint 302 * scheduling etc. 303 * 304 * Called with IRQs disabled and the PMU disabled on the CPU the event 305 * is on. 306 * 307 * ->add() called without PERF_EF_START should result in the same state 308 * as ->add() followed by ->stop(). 309 * 310 * ->del() must always PERF_EF_UPDATE stop an event. If it calls 311 * ->stop() that must deal with already being stopped without 312 * PERF_EF_UPDATE. 313 */ 314 int (*add) (struct perf_event *event, int flags); 315 void (*del) (struct perf_event *event, int flags); 316 317 /* 318 * Starts/Stops a counter present on the PMU. 319 * 320 * The PMI handler should stop the counter when perf_event_overflow() 321 * returns !0. ->start() will be used to continue. 322 * 323 * Also used to change the sample period. 324 * 325 * Called with IRQs disabled and the PMU disabled on the CPU the event 326 * is on -- will be called from NMI context with the PMU generates 327 * NMIs. 328 * 329 * ->stop() with PERF_EF_UPDATE will read the counter and update 330 * period/count values like ->read() would. 331 * 332 * ->start() with PERF_EF_RELOAD will reprogram the the counter 333 * value, must be preceded by a ->stop() with PERF_EF_UPDATE. 334 */ 335 void (*start) (struct perf_event *event, int flags); 336 void (*stop) (struct perf_event *event, int flags); 337 338 /* 339 * Updates the counter value of the event. 340 * 341 * For sampling capable PMUs this will also update the software period 342 * hw_perf_event::period_left field. 343 */ 344 void (*read) (struct perf_event *event); 345 346 /* 347 * Group events scheduling is treated as a transaction, add 348 * group events as a whole and perform one schedulability test. 349 * If the test fails, roll back the whole group 350 * 351 * Start the transaction, after this ->add() doesn't need to 352 * do schedulability tests. 353 * 354 * Optional. 355 */ 356 void (*start_txn) (struct pmu *pmu, unsigned int txn_flags); 357 /* 358 * If ->start_txn() disabled the ->add() schedulability test 359 * then ->commit_txn() is required to perform one. On success 360 * the transaction is closed. On error the transaction is kept 361 * open until ->cancel_txn() is called. 362 * 363 * Optional. 364 */ 365 int (*commit_txn) (struct pmu *pmu); 366 /* 367 * Will cancel the transaction, assumes ->del() is called 368 * for each successful ->add() during the transaction. 369 * 370 * Optional. 371 */ 372 void (*cancel_txn) (struct pmu *pmu); 373 374 /* 375 * Will return the value for perf_event_mmap_page::index for this event, 376 * if no implementation is provided it will default to: event->hw.idx + 1. 377 */ 378 int (*event_idx) (struct perf_event *event); /*optional */ 379 380 /* 381 * context-switches callback 382 */ 383 void (*sched_task) (struct perf_event_context *ctx, 384 bool sched_in); 385 /* 386 * PMU specific data size 387 */ 388 size_t task_ctx_size; 389 390 391 /* 392 * Return the count value for a counter. 393 */ 394 u64 (*count) (struct perf_event *event); /*optional*/ 395 396 /* 397 * Set up pmu-private data structures for an AUX area 398 */ 399 void *(*setup_aux) (int cpu, void **pages, 400 int nr_pages, bool overwrite); 401 /* optional */ 402 403 /* 404 * Free pmu-private AUX data structures 405 */ 406 void (*free_aux) (void *aux); /* optional */ 407 408 /* 409 * Validate address range filters: make sure the HW supports the 410 * requested configuration and number of filters; return 0 if the 411 * supplied filters are valid, -errno otherwise. 412 * 413 * Runs in the context of the ioctl()ing process and is not serialized 414 * with the rest of the PMU callbacks. 415 */ 416 int (*addr_filters_validate) (struct list_head *filters); 417 /* optional */ 418 419 /* 420 * Synchronize address range filter configuration: 421 * translate hw-agnostic filters into hardware configuration in 422 * event::hw::addr_filters. 423 * 424 * Runs as a part of filter sync sequence that is done in ->start() 425 * callback by calling perf_event_addr_filters_sync(). 426 * 427 * May (and should) traverse event::addr_filters::list, for which its 428 * caller provides necessary serialization. 429 */ 430 void (*addr_filters_sync) (struct perf_event *event); 431 /* optional */ 432 433 /* 434 * Filter events for PMU-specific reasons. 435 */ 436 int (*filter_match) (struct perf_event *event); /* optional */ 437 }; 438 439 /** 440 * struct perf_addr_filter - address range filter definition 441 * @entry: event's filter list linkage 442 * @inode: object file's inode for file-based filters 443 * @offset: filter range offset 444 * @size: filter range size 445 * @range: 1: range, 0: address 446 * @filter: 1: filter/start, 0: stop 447 * 448 * This is a hardware-agnostic filter configuration as specified by the user. 449 */ 450 struct perf_addr_filter { 451 struct list_head entry; 452 struct inode *inode; 453 unsigned long offset; 454 unsigned long size; 455 unsigned int range : 1, 456 filter : 1; 457 }; 458 459 /** 460 * struct perf_addr_filters_head - container for address range filters 461 * @list: list of filters for this event 462 * @lock: spinlock that serializes accesses to the @list and event's 463 * (and its children's) filter generations. 464 * 465 * A child event will use parent's @list (and therefore @lock), so they are 466 * bundled together; see perf_event_addr_filters(). 467 */ 468 struct perf_addr_filters_head { 469 struct list_head list; 470 raw_spinlock_t lock; 471 }; 472 473 /** 474 * enum perf_event_active_state - the states of a event 475 */ 476 enum perf_event_active_state { 477 PERF_EVENT_STATE_DEAD = -4, 478 PERF_EVENT_STATE_EXIT = -3, 479 PERF_EVENT_STATE_ERROR = -2, 480 PERF_EVENT_STATE_OFF = -1, 481 PERF_EVENT_STATE_INACTIVE = 0, 482 PERF_EVENT_STATE_ACTIVE = 1, 483 }; 484 485 struct file; 486 struct perf_sample_data; 487 488 typedef void (*perf_overflow_handler_t)(struct perf_event *, 489 struct perf_sample_data *, 490 struct pt_regs *regs); 491 492 enum perf_group_flag { 493 PERF_GROUP_SOFTWARE = 0x1, 494 }; 495 496 #define SWEVENT_HLIST_BITS 8 497 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) 498 499 struct swevent_hlist { 500 struct hlist_head heads[SWEVENT_HLIST_SIZE]; 501 struct rcu_head rcu_head; 502 }; 503 504 #define PERF_ATTACH_CONTEXT 0x01 505 #define PERF_ATTACH_GROUP 0x02 506 #define PERF_ATTACH_TASK 0x04 507 #define PERF_ATTACH_TASK_DATA 0x08 508 509 struct perf_cgroup; 510 struct ring_buffer; 511 512 /** 513 * struct perf_event - performance event kernel representation: 514 */ 515 struct perf_event { 516 #ifdef CONFIG_PERF_EVENTS 517 /* 518 * entry onto perf_event_context::event_list; 519 * modifications require ctx->lock 520 * RCU safe iterations. 521 */ 522 struct list_head event_entry; 523 524 /* 525 * XXX: group_entry and sibling_list should be mutually exclusive; 526 * either you're a sibling on a group, or you're the group leader. 527 * Rework the code to always use the same list element. 528 * 529 * Locked for modification by both ctx->mutex and ctx->lock; holding 530 * either sufficies for read. 531 */ 532 struct list_head group_entry; 533 struct list_head sibling_list; 534 535 /* 536 * We need storage to track the entries in perf_pmu_migrate_context; we 537 * cannot use the event_entry because of RCU and we want to keep the 538 * group in tact which avoids us using the other two entries. 539 */ 540 struct list_head migrate_entry; 541 542 struct hlist_node hlist_entry; 543 struct list_head active_entry; 544 int nr_siblings; 545 int group_flags; 546 struct perf_event *group_leader; 547 struct pmu *pmu; 548 void *pmu_private; 549 550 enum perf_event_active_state state; 551 unsigned int attach_state; 552 local64_t count; 553 atomic64_t child_count; 554 555 /* 556 * These are the total time in nanoseconds that the event 557 * has been enabled (i.e. eligible to run, and the task has 558 * been scheduled in, if this is a per-task event) 559 * and running (scheduled onto the CPU), respectively. 560 * 561 * They are computed from tstamp_enabled, tstamp_running and 562 * tstamp_stopped when the event is in INACTIVE or ACTIVE state. 563 */ 564 u64 total_time_enabled; 565 u64 total_time_running; 566 567 /* 568 * These are timestamps used for computing total_time_enabled 569 * and total_time_running when the event is in INACTIVE or 570 * ACTIVE state, measured in nanoseconds from an arbitrary point 571 * in time. 572 * tstamp_enabled: the notional time when the event was enabled 573 * tstamp_running: the notional time when the event was scheduled on 574 * tstamp_stopped: in INACTIVE state, the notional time when the 575 * event was scheduled off. 576 */ 577 u64 tstamp_enabled; 578 u64 tstamp_running; 579 u64 tstamp_stopped; 580 581 /* 582 * timestamp shadows the actual context timing but it can 583 * be safely used in NMI interrupt context. It reflects the 584 * context time as it was when the event was last scheduled in. 585 * 586 * ctx_time already accounts for ctx->timestamp. Therefore to 587 * compute ctx_time for a sample, simply add perf_clock(). 588 */ 589 u64 shadow_ctx_time; 590 591 struct perf_event_attr attr; 592 u16 header_size; 593 u16 id_header_size; 594 u16 read_size; 595 struct hw_perf_event hw; 596 597 struct perf_event_context *ctx; 598 atomic_long_t refcount; 599 600 /* 601 * These accumulate total time (in nanoseconds) that children 602 * events have been enabled and running, respectively. 603 */ 604 atomic64_t child_total_time_enabled; 605 atomic64_t child_total_time_running; 606 607 /* 608 * Protect attach/detach and child_list: 609 */ 610 struct mutex child_mutex; 611 struct list_head child_list; 612 struct perf_event *parent; 613 614 int oncpu; 615 int cpu; 616 617 struct list_head owner_entry; 618 struct task_struct *owner; 619 620 /* mmap bits */ 621 struct mutex mmap_mutex; 622 atomic_t mmap_count; 623 624 struct ring_buffer *rb; 625 struct list_head rb_entry; 626 unsigned long rcu_batches; 627 int rcu_pending; 628 629 /* poll related */ 630 wait_queue_head_t waitq; 631 struct fasync_struct *fasync; 632 633 /* delayed work for NMIs and such */ 634 int pending_wakeup; 635 int pending_kill; 636 int pending_disable; 637 struct irq_work pending; 638 639 atomic_t event_limit; 640 641 /* address range filters */ 642 struct perf_addr_filters_head addr_filters; 643 /* vma address array for file-based filders */ 644 unsigned long *addr_filters_offs; 645 unsigned long addr_filters_gen; 646 647 void (*destroy)(struct perf_event *); 648 struct rcu_head rcu_head; 649 650 struct pid_namespace *ns; 651 u64 id; 652 653 u64 (*clock)(void); 654 perf_overflow_handler_t overflow_handler; 655 void *overflow_handler_context; 656 657 #ifdef CONFIG_EVENT_TRACING 658 struct trace_event_call *tp_event; 659 struct event_filter *filter; 660 #ifdef CONFIG_FUNCTION_TRACER 661 struct ftrace_ops ftrace_ops; 662 #endif 663 #endif 664 665 #ifdef CONFIG_CGROUP_PERF 666 struct perf_cgroup *cgrp; /* cgroup event is attach to */ 667 int cgrp_defer_enabled; 668 #endif 669 670 #endif /* CONFIG_PERF_EVENTS */ 671 }; 672 673 /** 674 * struct perf_event_context - event context structure 675 * 676 * Used as a container for task events and CPU events as well: 677 */ 678 struct perf_event_context { 679 struct pmu *pmu; 680 /* 681 * Protect the states of the events in the list, 682 * nr_active, and the list: 683 */ 684 raw_spinlock_t lock; 685 /* 686 * Protect the list of events. Locking either mutex or lock 687 * is sufficient to ensure the list doesn't change; to change 688 * the list you need to lock both the mutex and the spinlock. 689 */ 690 struct mutex mutex; 691 692 struct list_head active_ctx_list; 693 struct list_head pinned_groups; 694 struct list_head flexible_groups; 695 struct list_head event_list; 696 int nr_events; 697 int nr_active; 698 int is_active; 699 int nr_stat; 700 int nr_freq; 701 int rotate_disable; 702 atomic_t refcount; 703 struct task_struct *task; 704 705 /* 706 * Context clock, runs when context enabled. 707 */ 708 u64 time; 709 u64 timestamp; 710 711 /* 712 * These fields let us detect when two contexts have both 713 * been cloned (inherited) from a common ancestor. 714 */ 715 struct perf_event_context *parent_ctx; 716 u64 parent_gen; 717 u64 generation; 718 int pin_count; 719 int nr_cgroups; /* cgroup evts */ 720 void *task_ctx_data; /* pmu specific data */ 721 struct rcu_head rcu_head; 722 }; 723 724 /* 725 * Number of contexts where an event can trigger: 726 * task, softirq, hardirq, nmi. 727 */ 728 #define PERF_NR_CONTEXTS 4 729 730 /** 731 * struct perf_event_cpu_context - per cpu event context structure 732 */ 733 struct perf_cpu_context { 734 struct perf_event_context ctx; 735 struct perf_event_context *task_ctx; 736 int active_oncpu; 737 int exclusive; 738 739 raw_spinlock_t hrtimer_lock; 740 struct hrtimer hrtimer; 741 ktime_t hrtimer_interval; 742 unsigned int hrtimer_active; 743 744 struct pmu *unique_pmu; 745 struct perf_cgroup *cgrp; 746 }; 747 748 struct perf_output_handle { 749 struct perf_event *event; 750 struct ring_buffer *rb; 751 unsigned long wakeup; 752 unsigned long size; 753 union { 754 void *addr; 755 unsigned long head; 756 }; 757 int page; 758 }; 759 760 #ifdef CONFIG_CGROUP_PERF 761 762 /* 763 * perf_cgroup_info keeps track of time_enabled for a cgroup. 764 * This is a per-cpu dynamically allocated data structure. 765 */ 766 struct perf_cgroup_info { 767 u64 time; 768 u64 timestamp; 769 }; 770 771 struct perf_cgroup { 772 struct cgroup_subsys_state css; 773 struct perf_cgroup_info __percpu *info; 774 }; 775 776 /* 777 * Must ensure cgroup is pinned (css_get) before calling 778 * this function. In other words, we cannot call this function 779 * if there is no cgroup event for the current CPU context. 780 */ 781 static inline struct perf_cgroup * 782 perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) 783 { 784 return container_of(task_css_check(task, perf_event_cgrp_id, 785 ctx ? lockdep_is_held(&ctx->lock) 786 : true), 787 struct perf_cgroup, css); 788 } 789 #endif /* CONFIG_CGROUP_PERF */ 790 791 #ifdef CONFIG_PERF_EVENTS 792 793 extern void *perf_aux_output_begin(struct perf_output_handle *handle, 794 struct perf_event *event); 795 extern void perf_aux_output_end(struct perf_output_handle *handle, 796 unsigned long size, bool truncated); 797 extern int perf_aux_output_skip(struct perf_output_handle *handle, 798 unsigned long size); 799 extern void *perf_get_aux(struct perf_output_handle *handle); 800 801 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); 802 extern void perf_pmu_unregister(struct pmu *pmu); 803 804 extern int perf_num_counters(void); 805 extern const char *perf_pmu_name(void); 806 extern void __perf_event_task_sched_in(struct task_struct *prev, 807 struct task_struct *task); 808 extern void __perf_event_task_sched_out(struct task_struct *prev, 809 struct task_struct *next); 810 extern int perf_event_init_task(struct task_struct *child); 811 extern void perf_event_exit_task(struct task_struct *child); 812 extern void perf_event_free_task(struct task_struct *task); 813 extern void perf_event_delayed_put(struct task_struct *task); 814 extern struct file *perf_event_get(unsigned int fd); 815 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event); 816 extern void perf_event_print_debug(void); 817 extern void perf_pmu_disable(struct pmu *pmu); 818 extern void perf_pmu_enable(struct pmu *pmu); 819 extern void perf_sched_cb_dec(struct pmu *pmu); 820 extern void perf_sched_cb_inc(struct pmu *pmu); 821 extern int perf_event_task_disable(void); 822 extern int perf_event_task_enable(void); 823 extern int perf_event_refresh(struct perf_event *event, int refresh); 824 extern void perf_event_update_userpage(struct perf_event *event); 825 extern int perf_event_release_kernel(struct perf_event *event); 826 extern struct perf_event * 827 perf_event_create_kernel_counter(struct perf_event_attr *attr, 828 int cpu, 829 struct task_struct *task, 830 perf_overflow_handler_t callback, 831 void *context); 832 extern void perf_pmu_migrate_context(struct pmu *pmu, 833 int src_cpu, int dst_cpu); 834 extern u64 perf_event_read_local(struct perf_event *event); 835 extern u64 perf_event_read_value(struct perf_event *event, 836 u64 *enabled, u64 *running); 837 838 839 struct perf_sample_data { 840 /* 841 * Fields set by perf_sample_data_init(), group so as to 842 * minimize the cachelines touched. 843 */ 844 u64 addr; 845 struct perf_raw_record *raw; 846 struct perf_branch_stack *br_stack; 847 u64 period; 848 u64 weight; 849 u64 txn; 850 union perf_mem_data_src data_src; 851 852 /* 853 * The other fields, optionally {set,used} by 854 * perf_{prepare,output}_sample(). 855 */ 856 u64 type; 857 u64 ip; 858 struct { 859 u32 pid; 860 u32 tid; 861 } tid_entry; 862 u64 time; 863 u64 id; 864 u64 stream_id; 865 struct { 866 u32 cpu; 867 u32 reserved; 868 } cpu_entry; 869 struct perf_callchain_entry *callchain; 870 871 /* 872 * regs_user may point to task_pt_regs or to regs_user_copy, depending 873 * on arch details. 874 */ 875 struct perf_regs regs_user; 876 struct pt_regs regs_user_copy; 877 878 struct perf_regs regs_intr; 879 u64 stack_user_size; 880 } ____cacheline_aligned; 881 882 /* default value for data source */ 883 #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ 884 PERF_MEM_S(LVL, NA) |\ 885 PERF_MEM_S(SNOOP, NA) |\ 886 PERF_MEM_S(LOCK, NA) |\ 887 PERF_MEM_S(TLB, NA)) 888 889 static inline void perf_sample_data_init(struct perf_sample_data *data, 890 u64 addr, u64 period) 891 { 892 /* remaining struct members initialized in perf_prepare_sample() */ 893 data->addr = addr; 894 data->raw = NULL; 895 data->br_stack = NULL; 896 data->period = period; 897 data->weight = 0; 898 data->data_src.val = PERF_MEM_NA; 899 data->txn = 0; 900 } 901 902 extern void perf_output_sample(struct perf_output_handle *handle, 903 struct perf_event_header *header, 904 struct perf_sample_data *data, 905 struct perf_event *event); 906 extern void perf_prepare_sample(struct perf_event_header *header, 907 struct perf_sample_data *data, 908 struct perf_event *event, 909 struct pt_regs *regs); 910 911 extern int perf_event_overflow(struct perf_event *event, 912 struct perf_sample_data *data, 913 struct pt_regs *regs); 914 915 extern void perf_event_output_forward(struct perf_event *event, 916 struct perf_sample_data *data, 917 struct pt_regs *regs); 918 extern void perf_event_output_backward(struct perf_event *event, 919 struct perf_sample_data *data, 920 struct pt_regs *regs); 921 extern void perf_event_output(struct perf_event *event, 922 struct perf_sample_data *data, 923 struct pt_regs *regs); 924 925 static inline bool 926 is_default_overflow_handler(struct perf_event *event) 927 { 928 if (likely(event->overflow_handler == perf_event_output_forward)) 929 return true; 930 if (unlikely(event->overflow_handler == perf_event_output_backward)) 931 return true; 932 return false; 933 } 934 935 extern void 936 perf_event_header__init_id(struct perf_event_header *header, 937 struct perf_sample_data *data, 938 struct perf_event *event); 939 extern void 940 perf_event__output_id_sample(struct perf_event *event, 941 struct perf_output_handle *handle, 942 struct perf_sample_data *sample); 943 944 extern void 945 perf_log_lost_samples(struct perf_event *event, u64 lost); 946 947 static inline bool is_sampling_event(struct perf_event *event) 948 { 949 return event->attr.sample_period != 0; 950 } 951 952 /* 953 * Return 1 for a software event, 0 for a hardware event 954 */ 955 static inline int is_software_event(struct perf_event *event) 956 { 957 return event->pmu->task_ctx_nr == perf_sw_context; 958 } 959 960 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; 961 962 extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); 963 extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); 964 965 #ifndef perf_arch_fetch_caller_regs 966 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } 967 #endif 968 969 /* 970 * Take a snapshot of the regs. Skip ip and frame pointer to 971 * the nth caller. We only need a few of the regs: 972 * - ip for PERF_SAMPLE_IP 973 * - cs for user_mode() tests 974 * - bp for callchains 975 * - eflags, for future purposes, just in case 976 */ 977 static inline void perf_fetch_caller_regs(struct pt_regs *regs) 978 { 979 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); 980 } 981 982 static __always_inline void 983 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) 984 { 985 if (static_key_false(&perf_swevent_enabled[event_id])) 986 __perf_sw_event(event_id, nr, regs, addr); 987 } 988 989 DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); 990 991 /* 992 * 'Special' version for the scheduler, it hard assumes no recursion, 993 * which is guaranteed by us not actually scheduling inside other swevents 994 * because those disable preemption. 995 */ 996 static __always_inline void 997 perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) 998 { 999 if (static_key_false(&perf_swevent_enabled[event_id])) { 1000 struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); 1001 1002 perf_fetch_caller_regs(regs); 1003 ___perf_sw_event(event_id, nr, regs, addr); 1004 } 1005 } 1006 1007 extern struct static_key_false perf_sched_events; 1008 1009 static __always_inline bool 1010 perf_sw_migrate_enabled(void) 1011 { 1012 if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS])) 1013 return true; 1014 return false; 1015 } 1016 1017 static inline void perf_event_task_migrate(struct task_struct *task) 1018 { 1019 if (perf_sw_migrate_enabled()) 1020 task->sched_migrated = 1; 1021 } 1022 1023 static inline void perf_event_task_sched_in(struct task_struct *prev, 1024 struct task_struct *task) 1025 { 1026 if (static_branch_unlikely(&perf_sched_events)) 1027 __perf_event_task_sched_in(prev, task); 1028 1029 if (perf_sw_migrate_enabled() && task->sched_migrated) { 1030 struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); 1031 1032 perf_fetch_caller_regs(regs); 1033 ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0); 1034 task->sched_migrated = 0; 1035 } 1036 } 1037 1038 static inline void perf_event_task_sched_out(struct task_struct *prev, 1039 struct task_struct *next) 1040 { 1041 perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); 1042 1043 if (static_branch_unlikely(&perf_sched_events)) 1044 __perf_event_task_sched_out(prev, next); 1045 } 1046 1047 static inline u64 __perf_event_count(struct perf_event *event) 1048 { 1049 return local64_read(&event->count) + atomic64_read(&event->child_count); 1050 } 1051 1052 extern void perf_event_mmap(struct vm_area_struct *vma); 1053 extern struct perf_guest_info_callbacks *perf_guest_cbs; 1054 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 1055 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 1056 1057 extern void perf_event_exec(void); 1058 extern void perf_event_comm(struct task_struct *tsk, bool exec); 1059 extern void perf_event_fork(struct task_struct *tsk); 1060 1061 /* Callchains */ 1062 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); 1063 1064 extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); 1065 extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); 1066 extern struct perf_callchain_entry * 1067 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, 1068 bool crosstask, bool add_mark); 1069 extern int get_callchain_buffers(void); 1070 extern void put_callchain_buffers(void); 1071 1072 extern int sysctl_perf_event_max_stack; 1073 1074 static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) 1075 { 1076 if (entry->nr < sysctl_perf_event_max_stack) { 1077 entry->ip[entry->nr++] = ip; 1078 return 0; 1079 } else { 1080 return -1; /* no more room, stop walking the stack */ 1081 } 1082 } 1083 1084 extern int sysctl_perf_event_paranoid; 1085 extern int sysctl_perf_event_mlock; 1086 extern int sysctl_perf_event_sample_rate; 1087 extern int sysctl_perf_cpu_time_max_percent; 1088 1089 extern void perf_sample_event_took(u64 sample_len_ns); 1090 1091 extern int perf_proc_update_handler(struct ctl_table *table, int write, 1092 void __user *buffer, size_t *lenp, 1093 loff_t *ppos); 1094 extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, 1095 void __user *buffer, size_t *lenp, 1096 loff_t *ppos); 1097 1098 int perf_event_max_stack_handler(struct ctl_table *table, int write, 1099 void __user *buffer, size_t *lenp, loff_t *ppos); 1100 1101 static inline bool perf_paranoid_tracepoint_raw(void) 1102 { 1103 return sysctl_perf_event_paranoid > -1; 1104 } 1105 1106 static inline bool perf_paranoid_cpu(void) 1107 { 1108 return sysctl_perf_event_paranoid > 0; 1109 } 1110 1111 static inline bool perf_paranoid_kernel(void) 1112 { 1113 return sysctl_perf_event_paranoid > 1; 1114 } 1115 1116 extern void perf_event_init(void); 1117 extern void perf_tp_event(u16 event_type, u64 count, void *record, 1118 int entry_size, struct pt_regs *regs, 1119 struct hlist_head *head, int rctx, 1120 struct task_struct *task); 1121 extern void perf_bp_event(struct perf_event *event, void *data); 1122 1123 #ifndef perf_misc_flags 1124 # define perf_misc_flags(regs) \ 1125 (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) 1126 # define perf_instruction_pointer(regs) instruction_pointer(regs) 1127 #endif 1128 1129 static inline bool has_branch_stack(struct perf_event *event) 1130 { 1131 return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; 1132 } 1133 1134 static inline bool needs_branch_stack(struct perf_event *event) 1135 { 1136 return event->attr.branch_sample_type != 0; 1137 } 1138 1139 static inline bool has_aux(struct perf_event *event) 1140 { 1141 return event->pmu->setup_aux; 1142 } 1143 1144 static inline bool is_write_backward(struct perf_event *event) 1145 { 1146 return !!event->attr.write_backward; 1147 } 1148 1149 static inline bool has_addr_filter(struct perf_event *event) 1150 { 1151 return event->pmu->nr_addr_filters; 1152 } 1153 1154 /* 1155 * An inherited event uses parent's filters 1156 */ 1157 static inline struct perf_addr_filters_head * 1158 perf_event_addr_filters(struct perf_event *event) 1159 { 1160 struct perf_addr_filters_head *ifh = &event->addr_filters; 1161 1162 if (event->parent) 1163 ifh = &event->parent->addr_filters; 1164 1165 return ifh; 1166 } 1167 1168 extern void perf_event_addr_filters_sync(struct perf_event *event); 1169 1170 extern int perf_output_begin(struct perf_output_handle *handle, 1171 struct perf_event *event, unsigned int size); 1172 extern int perf_output_begin_forward(struct perf_output_handle *handle, 1173 struct perf_event *event, 1174 unsigned int size); 1175 extern int perf_output_begin_backward(struct perf_output_handle *handle, 1176 struct perf_event *event, 1177 unsigned int size); 1178 1179 extern void perf_output_end(struct perf_output_handle *handle); 1180 extern unsigned int perf_output_copy(struct perf_output_handle *handle, 1181 const void *buf, unsigned int len); 1182 extern unsigned int perf_output_skip(struct perf_output_handle *handle, 1183 unsigned int len); 1184 extern int perf_swevent_get_recursion_context(void); 1185 extern void perf_swevent_put_recursion_context(int rctx); 1186 extern u64 perf_swevent_set_period(struct perf_event *event); 1187 extern void perf_event_enable(struct perf_event *event); 1188 extern void perf_event_disable(struct perf_event *event); 1189 extern void perf_event_disable_local(struct perf_event *event); 1190 extern void perf_event_task_tick(void); 1191 #else /* !CONFIG_PERF_EVENTS: */ 1192 static inline void * 1193 perf_aux_output_begin(struct perf_output_handle *handle, 1194 struct perf_event *event) { return NULL; } 1195 static inline void 1196 perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, 1197 bool truncated) { } 1198 static inline int 1199 perf_aux_output_skip(struct perf_output_handle *handle, 1200 unsigned long size) { return -EINVAL; } 1201 static inline void * 1202 perf_get_aux(struct perf_output_handle *handle) { return NULL; } 1203 static inline void 1204 perf_event_task_migrate(struct task_struct *task) { } 1205 static inline void 1206 perf_event_task_sched_in(struct task_struct *prev, 1207 struct task_struct *task) { } 1208 static inline void 1209 perf_event_task_sched_out(struct task_struct *prev, 1210 struct task_struct *next) { } 1211 static inline int perf_event_init_task(struct task_struct *child) { return 0; } 1212 static inline void perf_event_exit_task(struct task_struct *child) { } 1213 static inline void perf_event_free_task(struct task_struct *task) { } 1214 static inline void perf_event_delayed_put(struct task_struct *task) { } 1215 static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } 1216 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event) 1217 { 1218 return ERR_PTR(-EINVAL); 1219 } 1220 static inline u64 perf_event_read_local(struct perf_event *event) { return -EINVAL; } 1221 static inline void perf_event_print_debug(void) { } 1222 static inline int perf_event_task_disable(void) { return -EINVAL; } 1223 static inline int perf_event_task_enable(void) { return -EINVAL; } 1224 static inline int perf_event_refresh(struct perf_event *event, int refresh) 1225 { 1226 return -EINVAL; 1227 } 1228 1229 static inline void 1230 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } 1231 static inline void 1232 perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { } 1233 static inline void 1234 perf_bp_event(struct perf_event *event, void *data) { } 1235 1236 static inline int perf_register_guest_info_callbacks 1237 (struct perf_guest_info_callbacks *callbacks) { return 0; } 1238 static inline int perf_unregister_guest_info_callbacks 1239 (struct perf_guest_info_callbacks *callbacks) { return 0; } 1240 1241 static inline void perf_event_mmap(struct vm_area_struct *vma) { } 1242 static inline void perf_event_exec(void) { } 1243 static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } 1244 static inline void perf_event_fork(struct task_struct *tsk) { } 1245 static inline void perf_event_init(void) { } 1246 static inline int perf_swevent_get_recursion_context(void) { return -1; } 1247 static inline void perf_swevent_put_recursion_context(int rctx) { } 1248 static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; } 1249 static inline void perf_event_enable(struct perf_event *event) { } 1250 static inline void perf_event_disable(struct perf_event *event) { } 1251 static inline int __perf_event_disable(void *info) { return -1; } 1252 static inline void perf_event_task_tick(void) { } 1253 static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } 1254 #endif 1255 1256 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) 1257 extern void perf_restore_debug_store(void); 1258 #else 1259 static inline void perf_restore_debug_store(void) { } 1260 #endif 1261 1262 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) 1263 1264 /* 1265 * This has to have a higher priority than migration_notifier in sched/core.c. 1266 */ 1267 #define perf_cpu_notifier(fn) \ 1268 do { \ 1269 static struct notifier_block fn##_nb = \ 1270 { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ 1271 unsigned long cpu = smp_processor_id(); \ 1272 unsigned long flags; \ 1273 \ 1274 cpu_notifier_register_begin(); \ 1275 fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ 1276 (void *)(unsigned long)cpu); \ 1277 local_irq_save(flags); \ 1278 fn(&fn##_nb, (unsigned long)CPU_STARTING, \ 1279 (void *)(unsigned long)cpu); \ 1280 local_irq_restore(flags); \ 1281 fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ 1282 (void *)(unsigned long)cpu); \ 1283 __register_cpu_notifier(&fn##_nb); \ 1284 cpu_notifier_register_done(); \ 1285 } while (0) 1286 1287 /* 1288 * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the 1289 * callback for already online CPUs. 1290 */ 1291 #define __perf_cpu_notifier(fn) \ 1292 do { \ 1293 static struct notifier_block fn##_nb = \ 1294 { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ 1295 \ 1296 __register_cpu_notifier(&fn##_nb); \ 1297 } while (0) 1298 1299 struct perf_pmu_events_attr { 1300 struct device_attribute attr; 1301 u64 id; 1302 const char *event_str; 1303 }; 1304 1305 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, 1306 char *page); 1307 1308 #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ 1309 static struct perf_pmu_events_attr _var = { \ 1310 .attr = __ATTR(_name, 0444, _show, NULL), \ 1311 .id = _id, \ 1312 }; 1313 1314 #define PMU_EVENT_ATTR_STRING(_name, _var, _str) \ 1315 static struct perf_pmu_events_attr _var = { \ 1316 .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ 1317 .id = 0, \ 1318 .event_str = _str, \ 1319 }; 1320 1321 #define PMU_FORMAT_ATTR(_name, _format) \ 1322 static ssize_t \ 1323 _name##_show(struct device *dev, \ 1324 struct device_attribute *attr, \ 1325 char *page) \ 1326 { \ 1327 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ 1328 return sprintf(page, _format "\n"); \ 1329 } \ 1330 \ 1331 static struct device_attribute format_attr_##_name = __ATTR_RO(_name) 1332 1333 #endif /* _LINUX_PERF_EVENT_H */ 1334