1 /* 2 * Performance events: 3 * 4 * Copyright (C) 2008-2009, Thomas Gleixner <[email protected]> 5 * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar 6 * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra 7 * 8 * Data type definitions, declarations, prototypes. 9 * 10 * Started by: Thomas Gleixner and Ingo Molnar 11 * 12 * For licencing details see kernel-base/COPYING 13 */ 14 #ifndef _LINUX_PERF_EVENT_H 15 #define _LINUX_PERF_EVENT_H 16 17 #include <uapi/linux/perf_event.h> 18 19 /* 20 * Kernel-internal data types and definitions: 21 */ 22 23 #ifdef CONFIG_PERF_EVENTS 24 # include <asm/perf_event.h> 25 # include <asm/local64.h> 26 #endif 27 28 struct perf_guest_info_callbacks { 29 int (*is_in_guest)(void); 30 int (*is_user_mode)(void); 31 unsigned long (*get_guest_ip)(void); 32 }; 33 34 #ifdef CONFIG_HAVE_HW_BREAKPOINT 35 #include <asm/hw_breakpoint.h> 36 #endif 37 38 #include <linux/list.h> 39 #include <linux/mutex.h> 40 #include <linux/rculist.h> 41 #include <linux/rcupdate.h> 42 #include <linux/spinlock.h> 43 #include <linux/hrtimer.h> 44 #include <linux/fs.h> 45 #include <linux/pid_namespace.h> 46 #include <linux/workqueue.h> 47 #include <linux/ftrace.h> 48 #include <linux/cpu.h> 49 #include <linux/irq_work.h> 50 #include <linux/static_key.h> 51 #include <linux/jump_label_ratelimit.h> 52 #include <linux/atomic.h> 53 #include <linux/sysfs.h> 54 #include <linux/perf_regs.h> 55 #include <linux/workqueue.h> 56 #include <linux/cgroup.h> 57 #include <asm/local.h> 58 59 struct perf_callchain_entry { 60 __u64 nr; 61 __u64 ip[PERF_MAX_STACK_DEPTH]; 62 }; 63 64 struct perf_raw_record { 65 u32 size; 66 void *data; 67 }; 68 69 /* 70 * branch stack layout: 71 * nr: number of taken branches stored in entries[] 72 * 73 * Note that nr can vary from sample to sample 74 * branches (to, from) are stored from most recent 75 * to least recent, i.e., entries[0] contains the most 76 * recent branch. 77 */ 78 struct perf_branch_stack { 79 __u64 nr; 80 struct perf_branch_entry entries[0]; 81 }; 82 83 struct task_struct; 84 85 /* 86 * extra PMU register associated with an event 87 */ 88 struct hw_perf_event_extra { 89 u64 config; /* register value */ 90 unsigned int reg; /* register address or index */ 91 int alloc; /* extra register already allocated */ 92 int idx; /* index in shared_regs->regs[] */ 93 }; 94 95 struct event_constraint; 96 97 /** 98 * struct hw_perf_event - performance event hardware details: 99 */ 100 struct hw_perf_event { 101 #ifdef CONFIG_PERF_EVENTS 102 union { 103 struct { /* hardware */ 104 u64 config; 105 u64 last_tag; 106 unsigned long config_base; 107 unsigned long event_base; 108 int event_base_rdpmc; 109 int idx; 110 int last_cpu; 111 int flags; 112 113 struct hw_perf_event_extra extra_reg; 114 struct hw_perf_event_extra branch_reg; 115 116 struct event_constraint *constraint; 117 }; 118 struct { /* software */ 119 struct hrtimer hrtimer; 120 }; 121 struct { /* tracepoint */ 122 /* for tp_event->class */ 123 struct list_head tp_list; 124 }; 125 struct { /* intel_cqm */ 126 int cqm_state; 127 int cqm_rmid; 128 struct list_head cqm_events_entry; 129 struct list_head cqm_groups_entry; 130 struct list_head cqm_group_entry; 131 }; 132 struct { /* itrace */ 133 int itrace_started; 134 }; 135 #ifdef CONFIG_HAVE_HW_BREAKPOINT 136 struct { /* breakpoint */ 137 /* 138 * Crufty hack to avoid the chicken and egg 139 * problem hw_breakpoint has with context 140 * creation and event initalization. 141 */ 142 struct arch_hw_breakpoint info; 143 struct list_head bp_list; 144 }; 145 #endif 146 }; 147 struct task_struct *target; 148 int state; 149 local64_t prev_count; 150 u64 sample_period; 151 u64 last_period; 152 local64_t period_left; 153 u64 interrupts_seq; 154 u64 interrupts; 155 156 u64 freq_time_stamp; 157 u64 freq_count_stamp; 158 #endif 159 }; 160 161 /* 162 * hw_perf_event::state flags 163 */ 164 #define PERF_HES_STOPPED 0x01 /* the counter is stopped */ 165 #define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ 166 #define PERF_HES_ARCH 0x04 167 168 struct perf_event; 169 170 /* 171 * Common implementation detail of pmu::{start,commit,cancel}_txn 172 */ 173 #define PERF_EVENT_TXN 0x1 174 175 /** 176 * pmu::capabilities flags 177 */ 178 #define PERF_PMU_CAP_NO_INTERRUPT 0x01 179 #define PERF_PMU_CAP_NO_NMI 0x02 180 #define PERF_PMU_CAP_AUX_NO_SG 0x04 181 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 182 #define PERF_PMU_CAP_EXCLUSIVE 0x10 183 #define PERF_PMU_CAP_ITRACE 0x20 184 185 /** 186 * struct pmu - generic performance monitoring unit 187 */ 188 struct pmu { 189 struct list_head entry; 190 191 struct module *module; 192 struct device *dev; 193 const struct attribute_group **attr_groups; 194 const char *name; 195 int type; 196 197 /* 198 * various common per-pmu feature flags 199 */ 200 int capabilities; 201 202 int * __percpu pmu_disable_count; 203 struct perf_cpu_context * __percpu pmu_cpu_context; 204 atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ 205 int task_ctx_nr; 206 int hrtimer_interval_ms; 207 208 /* 209 * Fully disable/enable this PMU, can be used to protect from the PMI 210 * as well as for lazy/batch writing of the MSRs. 211 */ 212 void (*pmu_enable) (struct pmu *pmu); /* optional */ 213 void (*pmu_disable) (struct pmu *pmu); /* optional */ 214 215 /* 216 * Try and initialize the event for this PMU. 217 * Should return -ENOENT when the @event doesn't match this PMU. 218 */ 219 int (*event_init) (struct perf_event *event); 220 221 /* 222 * Notification that the event was mapped or unmapped. Called 223 * in the context of the mapping task. 224 */ 225 void (*event_mapped) (struct perf_event *event); /*optional*/ 226 void (*event_unmapped) (struct perf_event *event); /*optional*/ 227 228 #define PERF_EF_START 0x01 /* start the counter when adding */ 229 #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ 230 #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ 231 232 /* 233 * Adds/Removes a counter to/from the PMU, can be done inside 234 * a transaction, see the ->*_txn() methods. 235 */ 236 int (*add) (struct perf_event *event, int flags); 237 void (*del) (struct perf_event *event, int flags); 238 239 /* 240 * Starts/Stops a counter present on the PMU. The PMI handler 241 * should stop the counter when perf_event_overflow() returns 242 * !0. ->start() will be used to continue. 243 */ 244 void (*start) (struct perf_event *event, int flags); 245 void (*stop) (struct perf_event *event, int flags); 246 247 /* 248 * Updates the counter value of the event. 249 */ 250 void (*read) (struct perf_event *event); 251 252 /* 253 * Group events scheduling is treated as a transaction, add 254 * group events as a whole and perform one schedulability test. 255 * If the test fails, roll back the whole group 256 * 257 * Start the transaction, after this ->add() doesn't need to 258 * do schedulability tests. 259 */ 260 void (*start_txn) (struct pmu *pmu); /* optional */ 261 /* 262 * If ->start_txn() disabled the ->add() schedulability test 263 * then ->commit_txn() is required to perform one. On success 264 * the transaction is closed. On error the transaction is kept 265 * open until ->cancel_txn() is called. 266 */ 267 int (*commit_txn) (struct pmu *pmu); /* optional */ 268 /* 269 * Will cancel the transaction, assumes ->del() is called 270 * for each successful ->add() during the transaction. 271 */ 272 void (*cancel_txn) (struct pmu *pmu); /* optional */ 273 274 /* 275 * Will return the value for perf_event_mmap_page::index for this event, 276 * if no implementation is provided it will default to: event->hw.idx + 1. 277 */ 278 int (*event_idx) (struct perf_event *event); /*optional */ 279 280 /* 281 * context-switches callback 282 */ 283 void (*sched_task) (struct perf_event_context *ctx, 284 bool sched_in); 285 /* 286 * PMU specific data size 287 */ 288 size_t task_ctx_size; 289 290 291 /* 292 * Return the count value for a counter. 293 */ 294 u64 (*count) (struct perf_event *event); /*optional*/ 295 296 /* 297 * Set up pmu-private data structures for an AUX area 298 */ 299 void *(*setup_aux) (int cpu, void **pages, 300 int nr_pages, bool overwrite); 301 /* optional */ 302 303 /* 304 * Free pmu-private AUX data structures 305 */ 306 void (*free_aux) (void *aux); /* optional */ 307 }; 308 309 /** 310 * enum perf_event_active_state - the states of a event 311 */ 312 enum perf_event_active_state { 313 PERF_EVENT_STATE_EXIT = -3, 314 PERF_EVENT_STATE_ERROR = -2, 315 PERF_EVENT_STATE_OFF = -1, 316 PERF_EVENT_STATE_INACTIVE = 0, 317 PERF_EVENT_STATE_ACTIVE = 1, 318 }; 319 320 struct file; 321 struct perf_sample_data; 322 323 typedef void (*perf_overflow_handler_t)(struct perf_event *, 324 struct perf_sample_data *, 325 struct pt_regs *regs); 326 327 enum perf_group_flag { 328 PERF_GROUP_SOFTWARE = 0x1, 329 }; 330 331 #define SWEVENT_HLIST_BITS 8 332 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) 333 334 struct swevent_hlist { 335 struct hlist_head heads[SWEVENT_HLIST_SIZE]; 336 struct rcu_head rcu_head; 337 }; 338 339 #define PERF_ATTACH_CONTEXT 0x01 340 #define PERF_ATTACH_GROUP 0x02 341 #define PERF_ATTACH_TASK 0x04 342 #define PERF_ATTACH_TASK_DATA 0x08 343 344 struct perf_cgroup; 345 struct ring_buffer; 346 347 /** 348 * struct perf_event - performance event kernel representation: 349 */ 350 struct perf_event { 351 #ifdef CONFIG_PERF_EVENTS 352 /* 353 * entry onto perf_event_context::event_list; 354 * modifications require ctx->lock 355 * RCU safe iterations. 356 */ 357 struct list_head event_entry; 358 359 /* 360 * XXX: group_entry and sibling_list should be mutually exclusive; 361 * either you're a sibling on a group, or you're the group leader. 362 * Rework the code to always use the same list element. 363 * 364 * Locked for modification by both ctx->mutex and ctx->lock; holding 365 * either sufficies for read. 366 */ 367 struct list_head group_entry; 368 struct list_head sibling_list; 369 370 /* 371 * We need storage to track the entries in perf_pmu_migrate_context; we 372 * cannot use the event_entry because of RCU and we want to keep the 373 * group in tact which avoids us using the other two entries. 374 */ 375 struct list_head migrate_entry; 376 377 struct hlist_node hlist_entry; 378 struct list_head active_entry; 379 int nr_siblings; 380 int group_flags; 381 struct perf_event *group_leader; 382 struct pmu *pmu; 383 384 enum perf_event_active_state state; 385 unsigned int attach_state; 386 local64_t count; 387 atomic64_t child_count; 388 389 /* 390 * These are the total time in nanoseconds that the event 391 * has been enabled (i.e. eligible to run, and the task has 392 * been scheduled in, if this is a per-task event) 393 * and running (scheduled onto the CPU), respectively. 394 * 395 * They are computed from tstamp_enabled, tstamp_running and 396 * tstamp_stopped when the event is in INACTIVE or ACTIVE state. 397 */ 398 u64 total_time_enabled; 399 u64 total_time_running; 400 401 /* 402 * These are timestamps used for computing total_time_enabled 403 * and total_time_running when the event is in INACTIVE or 404 * ACTIVE state, measured in nanoseconds from an arbitrary point 405 * in time. 406 * tstamp_enabled: the notional time when the event was enabled 407 * tstamp_running: the notional time when the event was scheduled on 408 * tstamp_stopped: in INACTIVE state, the notional time when the 409 * event was scheduled off. 410 */ 411 u64 tstamp_enabled; 412 u64 tstamp_running; 413 u64 tstamp_stopped; 414 415 /* 416 * timestamp shadows the actual context timing but it can 417 * be safely used in NMI interrupt context. It reflects the 418 * context time as it was when the event was last scheduled in. 419 * 420 * ctx_time already accounts for ctx->timestamp. Therefore to 421 * compute ctx_time for a sample, simply add perf_clock(). 422 */ 423 u64 shadow_ctx_time; 424 425 struct perf_event_attr attr; 426 u16 header_size; 427 u16 id_header_size; 428 u16 read_size; 429 struct hw_perf_event hw; 430 431 struct perf_event_context *ctx; 432 atomic_long_t refcount; 433 434 /* 435 * These accumulate total time (in nanoseconds) that children 436 * events have been enabled and running, respectively. 437 */ 438 atomic64_t child_total_time_enabled; 439 atomic64_t child_total_time_running; 440 441 /* 442 * Protect attach/detach and child_list: 443 */ 444 struct mutex child_mutex; 445 struct list_head child_list; 446 struct perf_event *parent; 447 448 int oncpu; 449 int cpu; 450 451 struct list_head owner_entry; 452 struct task_struct *owner; 453 454 /* mmap bits */ 455 struct mutex mmap_mutex; 456 atomic_t mmap_count; 457 458 struct ring_buffer *rb; 459 struct list_head rb_entry; 460 unsigned long rcu_batches; 461 int rcu_pending; 462 463 /* poll related */ 464 wait_queue_head_t waitq; 465 struct fasync_struct *fasync; 466 467 /* delayed work for NMIs and such */ 468 int pending_wakeup; 469 int pending_kill; 470 int pending_disable; 471 struct irq_work pending; 472 473 atomic_t event_limit; 474 475 void (*destroy)(struct perf_event *); 476 struct rcu_head rcu_head; 477 478 struct pid_namespace *ns; 479 u64 id; 480 481 u64 (*clock)(void); 482 perf_overflow_handler_t overflow_handler; 483 void *overflow_handler_context; 484 485 #ifdef CONFIG_EVENT_TRACING 486 struct ftrace_event_call *tp_event; 487 struct event_filter *filter; 488 #ifdef CONFIG_FUNCTION_TRACER 489 struct ftrace_ops ftrace_ops; 490 #endif 491 #endif 492 493 #ifdef CONFIG_CGROUP_PERF 494 struct perf_cgroup *cgrp; /* cgroup event is attach to */ 495 int cgrp_defer_enabled; 496 #endif 497 498 #endif /* CONFIG_PERF_EVENTS */ 499 }; 500 501 /** 502 * struct perf_event_context - event context structure 503 * 504 * Used as a container for task events and CPU events as well: 505 */ 506 struct perf_event_context { 507 struct pmu *pmu; 508 /* 509 * Protect the states of the events in the list, 510 * nr_active, and the list: 511 */ 512 raw_spinlock_t lock; 513 /* 514 * Protect the list of events. Locking either mutex or lock 515 * is sufficient to ensure the list doesn't change; to change 516 * the list you need to lock both the mutex and the spinlock. 517 */ 518 struct mutex mutex; 519 520 struct list_head active_ctx_list; 521 struct list_head pinned_groups; 522 struct list_head flexible_groups; 523 struct list_head event_list; 524 int nr_events; 525 int nr_active; 526 int is_active; 527 int nr_stat; 528 int nr_freq; 529 int rotate_disable; 530 atomic_t refcount; 531 struct task_struct *task; 532 533 /* 534 * Context clock, runs when context enabled. 535 */ 536 u64 time; 537 u64 timestamp; 538 539 /* 540 * These fields let us detect when two contexts have both 541 * been cloned (inherited) from a common ancestor. 542 */ 543 struct perf_event_context *parent_ctx; 544 u64 parent_gen; 545 u64 generation; 546 int pin_count; 547 int nr_cgroups; /* cgroup evts */ 548 void *task_ctx_data; /* pmu specific data */ 549 struct rcu_head rcu_head; 550 551 struct delayed_work orphans_remove; 552 bool orphans_remove_sched; 553 }; 554 555 /* 556 * Number of contexts where an event can trigger: 557 * task, softirq, hardirq, nmi. 558 */ 559 #define PERF_NR_CONTEXTS 4 560 561 /** 562 * struct perf_event_cpu_context - per cpu event context structure 563 */ 564 struct perf_cpu_context { 565 struct perf_event_context ctx; 566 struct perf_event_context *task_ctx; 567 int active_oncpu; 568 int exclusive; 569 struct hrtimer hrtimer; 570 ktime_t hrtimer_interval; 571 struct pmu *unique_pmu; 572 struct perf_cgroup *cgrp; 573 }; 574 575 struct perf_output_handle { 576 struct perf_event *event; 577 struct ring_buffer *rb; 578 unsigned long wakeup; 579 unsigned long size; 580 union { 581 void *addr; 582 unsigned long head; 583 }; 584 int page; 585 }; 586 587 #ifdef CONFIG_CGROUP_PERF 588 589 /* 590 * perf_cgroup_info keeps track of time_enabled for a cgroup. 591 * This is a per-cpu dynamically allocated data structure. 592 */ 593 struct perf_cgroup_info { 594 u64 time; 595 u64 timestamp; 596 }; 597 598 struct perf_cgroup { 599 struct cgroup_subsys_state css; 600 struct perf_cgroup_info __percpu *info; 601 }; 602 603 /* 604 * Must ensure cgroup is pinned (css_get) before calling 605 * this function. In other words, we cannot call this function 606 * if there is no cgroup event for the current CPU context. 607 */ 608 static inline struct perf_cgroup * 609 perf_cgroup_from_task(struct task_struct *task) 610 { 611 return container_of(task_css(task, perf_event_cgrp_id), 612 struct perf_cgroup, css); 613 } 614 #endif /* CONFIG_CGROUP_PERF */ 615 616 #ifdef CONFIG_PERF_EVENTS 617 618 extern void *perf_aux_output_begin(struct perf_output_handle *handle, 619 struct perf_event *event); 620 extern void perf_aux_output_end(struct perf_output_handle *handle, 621 unsigned long size, bool truncated); 622 extern int perf_aux_output_skip(struct perf_output_handle *handle, 623 unsigned long size); 624 extern void *perf_get_aux(struct perf_output_handle *handle); 625 626 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); 627 extern void perf_pmu_unregister(struct pmu *pmu); 628 629 extern int perf_num_counters(void); 630 extern const char *perf_pmu_name(void); 631 extern void __perf_event_task_sched_in(struct task_struct *prev, 632 struct task_struct *task); 633 extern void __perf_event_task_sched_out(struct task_struct *prev, 634 struct task_struct *next); 635 extern int perf_event_init_task(struct task_struct *child); 636 extern void perf_event_exit_task(struct task_struct *child); 637 extern void perf_event_free_task(struct task_struct *task); 638 extern void perf_event_delayed_put(struct task_struct *task); 639 extern void perf_event_print_debug(void); 640 extern void perf_pmu_disable(struct pmu *pmu); 641 extern void perf_pmu_enable(struct pmu *pmu); 642 extern void perf_sched_cb_dec(struct pmu *pmu); 643 extern void perf_sched_cb_inc(struct pmu *pmu); 644 extern int perf_event_task_disable(void); 645 extern int perf_event_task_enable(void); 646 extern int perf_event_refresh(struct perf_event *event, int refresh); 647 extern void perf_event_update_userpage(struct perf_event *event); 648 extern int perf_event_release_kernel(struct perf_event *event); 649 extern struct perf_event * 650 perf_event_create_kernel_counter(struct perf_event_attr *attr, 651 int cpu, 652 struct task_struct *task, 653 perf_overflow_handler_t callback, 654 void *context); 655 extern void perf_pmu_migrate_context(struct pmu *pmu, 656 int src_cpu, int dst_cpu); 657 extern u64 perf_event_read_value(struct perf_event *event, 658 u64 *enabled, u64 *running); 659 660 661 struct perf_sample_data { 662 /* 663 * Fields set by perf_sample_data_init(), group so as to 664 * minimize the cachelines touched. 665 */ 666 u64 addr; 667 struct perf_raw_record *raw; 668 struct perf_branch_stack *br_stack; 669 u64 period; 670 u64 weight; 671 u64 txn; 672 union perf_mem_data_src data_src; 673 674 /* 675 * The other fields, optionally {set,used} by 676 * perf_{prepare,output}_sample(). 677 */ 678 u64 type; 679 u64 ip; 680 struct { 681 u32 pid; 682 u32 tid; 683 } tid_entry; 684 u64 time; 685 u64 id; 686 u64 stream_id; 687 struct { 688 u32 cpu; 689 u32 reserved; 690 } cpu_entry; 691 struct perf_callchain_entry *callchain; 692 693 /* 694 * regs_user may point to task_pt_regs or to regs_user_copy, depending 695 * on arch details. 696 */ 697 struct perf_regs regs_user; 698 struct pt_regs regs_user_copy; 699 700 struct perf_regs regs_intr; 701 u64 stack_user_size; 702 } ____cacheline_aligned; 703 704 /* default value for data source */ 705 #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ 706 PERF_MEM_S(LVL, NA) |\ 707 PERF_MEM_S(SNOOP, NA) |\ 708 PERF_MEM_S(LOCK, NA) |\ 709 PERF_MEM_S(TLB, NA)) 710 711 static inline void perf_sample_data_init(struct perf_sample_data *data, 712 u64 addr, u64 period) 713 { 714 /* remaining struct members initialized in perf_prepare_sample() */ 715 data->addr = addr; 716 data->raw = NULL; 717 data->br_stack = NULL; 718 data->period = period; 719 data->weight = 0; 720 data->data_src.val = PERF_MEM_NA; 721 data->txn = 0; 722 } 723 724 extern void perf_output_sample(struct perf_output_handle *handle, 725 struct perf_event_header *header, 726 struct perf_sample_data *data, 727 struct perf_event *event); 728 extern void perf_prepare_sample(struct perf_event_header *header, 729 struct perf_sample_data *data, 730 struct perf_event *event, 731 struct pt_regs *regs); 732 733 extern int perf_event_overflow(struct perf_event *event, 734 struct perf_sample_data *data, 735 struct pt_regs *regs); 736 737 static inline bool is_sampling_event(struct perf_event *event) 738 { 739 return event->attr.sample_period != 0; 740 } 741 742 /* 743 * Return 1 for a software event, 0 for a hardware event 744 */ 745 static inline int is_software_event(struct perf_event *event) 746 { 747 return event->pmu->task_ctx_nr == perf_sw_context; 748 } 749 750 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; 751 752 extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); 753 extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); 754 755 #ifndef perf_arch_fetch_caller_regs 756 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } 757 #endif 758 759 /* 760 * Take a snapshot of the regs. Skip ip and frame pointer to 761 * the nth caller. We only need a few of the regs: 762 * - ip for PERF_SAMPLE_IP 763 * - cs for user_mode() tests 764 * - bp for callchains 765 * - eflags, for future purposes, just in case 766 */ 767 static inline void perf_fetch_caller_regs(struct pt_regs *regs) 768 { 769 memset(regs, 0, sizeof(*regs)); 770 771 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); 772 } 773 774 static __always_inline void 775 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) 776 { 777 if (static_key_false(&perf_swevent_enabled[event_id])) 778 __perf_sw_event(event_id, nr, regs, addr); 779 } 780 781 DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); 782 783 /* 784 * 'Special' version for the scheduler, it hard assumes no recursion, 785 * which is guaranteed by us not actually scheduling inside other swevents 786 * because those disable preemption. 787 */ 788 static __always_inline void 789 perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) 790 { 791 if (static_key_false(&perf_swevent_enabled[event_id])) { 792 struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); 793 794 perf_fetch_caller_regs(regs); 795 ___perf_sw_event(event_id, nr, regs, addr); 796 } 797 } 798 799 extern struct static_key_deferred perf_sched_events; 800 801 static __always_inline bool 802 perf_sw_migrate_enabled(void) 803 { 804 if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS])) 805 return true; 806 return false; 807 } 808 809 static inline void perf_event_task_migrate(struct task_struct *task) 810 { 811 if (perf_sw_migrate_enabled()) 812 task->sched_migrated = 1; 813 } 814 815 static inline void perf_event_task_sched_in(struct task_struct *prev, 816 struct task_struct *task) 817 { 818 if (static_key_false(&perf_sched_events.key)) 819 __perf_event_task_sched_in(prev, task); 820 821 if (perf_sw_migrate_enabled() && task->sched_migrated) { 822 struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); 823 824 perf_fetch_caller_regs(regs); 825 ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0); 826 task->sched_migrated = 0; 827 } 828 } 829 830 static inline void perf_event_task_sched_out(struct task_struct *prev, 831 struct task_struct *next) 832 { 833 perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); 834 835 if (static_key_false(&perf_sched_events.key)) 836 __perf_event_task_sched_out(prev, next); 837 } 838 839 static inline u64 __perf_event_count(struct perf_event *event) 840 { 841 return local64_read(&event->count) + atomic64_read(&event->child_count); 842 } 843 844 extern void perf_event_mmap(struct vm_area_struct *vma); 845 extern struct perf_guest_info_callbacks *perf_guest_cbs; 846 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 847 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 848 849 extern void perf_event_exec(void); 850 extern void perf_event_comm(struct task_struct *tsk, bool exec); 851 extern void perf_event_fork(struct task_struct *tsk); 852 853 /* Callchains */ 854 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); 855 856 extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); 857 extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); 858 859 static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) 860 { 861 if (entry->nr < PERF_MAX_STACK_DEPTH) 862 entry->ip[entry->nr++] = ip; 863 } 864 865 extern int sysctl_perf_event_paranoid; 866 extern int sysctl_perf_event_mlock; 867 extern int sysctl_perf_event_sample_rate; 868 extern int sysctl_perf_cpu_time_max_percent; 869 870 extern void perf_sample_event_took(u64 sample_len_ns); 871 872 extern int perf_proc_update_handler(struct ctl_table *table, int write, 873 void __user *buffer, size_t *lenp, 874 loff_t *ppos); 875 extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, 876 void __user *buffer, size_t *lenp, 877 loff_t *ppos); 878 879 880 static inline bool perf_paranoid_tracepoint_raw(void) 881 { 882 return sysctl_perf_event_paranoid > -1; 883 } 884 885 static inline bool perf_paranoid_cpu(void) 886 { 887 return sysctl_perf_event_paranoid > 0; 888 } 889 890 static inline bool perf_paranoid_kernel(void) 891 { 892 return sysctl_perf_event_paranoid > 1; 893 } 894 895 extern void perf_event_init(void); 896 extern void perf_tp_event(u64 addr, u64 count, void *record, 897 int entry_size, struct pt_regs *regs, 898 struct hlist_head *head, int rctx, 899 struct task_struct *task); 900 extern void perf_bp_event(struct perf_event *event, void *data); 901 902 #ifndef perf_misc_flags 903 # define perf_misc_flags(regs) \ 904 (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) 905 # define perf_instruction_pointer(regs) instruction_pointer(regs) 906 #endif 907 908 static inline bool has_branch_stack(struct perf_event *event) 909 { 910 return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; 911 } 912 913 static inline bool needs_branch_stack(struct perf_event *event) 914 { 915 return event->attr.branch_sample_type != 0; 916 } 917 918 static inline bool has_aux(struct perf_event *event) 919 { 920 return event->pmu->setup_aux; 921 } 922 923 extern int perf_output_begin(struct perf_output_handle *handle, 924 struct perf_event *event, unsigned int size); 925 extern void perf_output_end(struct perf_output_handle *handle); 926 extern unsigned int perf_output_copy(struct perf_output_handle *handle, 927 const void *buf, unsigned int len); 928 extern unsigned int perf_output_skip(struct perf_output_handle *handle, 929 unsigned int len); 930 extern int perf_swevent_get_recursion_context(void); 931 extern void perf_swevent_put_recursion_context(int rctx); 932 extern u64 perf_swevent_set_period(struct perf_event *event); 933 extern void perf_event_enable(struct perf_event *event); 934 extern void perf_event_disable(struct perf_event *event); 935 extern int __perf_event_disable(void *info); 936 extern void perf_event_task_tick(void); 937 #else /* !CONFIG_PERF_EVENTS: */ 938 static inline void * 939 perf_aux_output_begin(struct perf_output_handle *handle, 940 struct perf_event *event) { return NULL; } 941 static inline void 942 perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, 943 bool truncated) { } 944 static inline int 945 perf_aux_output_skip(struct perf_output_handle *handle, 946 unsigned long size) { return -EINVAL; } 947 static inline void * 948 perf_get_aux(struct perf_output_handle *handle) { return NULL; } 949 static inline void 950 perf_event_task_migrate(struct task_struct *task) { } 951 static inline void 952 perf_event_task_sched_in(struct task_struct *prev, 953 struct task_struct *task) { } 954 static inline void 955 perf_event_task_sched_out(struct task_struct *prev, 956 struct task_struct *next) { } 957 static inline int perf_event_init_task(struct task_struct *child) { return 0; } 958 static inline void perf_event_exit_task(struct task_struct *child) { } 959 static inline void perf_event_free_task(struct task_struct *task) { } 960 static inline void perf_event_delayed_put(struct task_struct *task) { } 961 static inline void perf_event_print_debug(void) { } 962 static inline int perf_event_task_disable(void) { return -EINVAL; } 963 static inline int perf_event_task_enable(void) { return -EINVAL; } 964 static inline int perf_event_refresh(struct perf_event *event, int refresh) 965 { 966 return -EINVAL; 967 } 968 969 static inline void 970 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } 971 static inline void 972 perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { } 973 static inline void 974 perf_bp_event(struct perf_event *event, void *data) { } 975 976 static inline int perf_register_guest_info_callbacks 977 (struct perf_guest_info_callbacks *callbacks) { return 0; } 978 static inline int perf_unregister_guest_info_callbacks 979 (struct perf_guest_info_callbacks *callbacks) { return 0; } 980 981 static inline void perf_event_mmap(struct vm_area_struct *vma) { } 982 static inline void perf_event_exec(void) { } 983 static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } 984 static inline void perf_event_fork(struct task_struct *tsk) { } 985 static inline void perf_event_init(void) { } 986 static inline int perf_swevent_get_recursion_context(void) { return -1; } 987 static inline void perf_swevent_put_recursion_context(int rctx) { } 988 static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; } 989 static inline void perf_event_enable(struct perf_event *event) { } 990 static inline void perf_event_disable(struct perf_event *event) { } 991 static inline int __perf_event_disable(void *info) { return -1; } 992 static inline void perf_event_task_tick(void) { } 993 #endif 994 995 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) 996 extern bool perf_event_can_stop_tick(void); 997 #else 998 static inline bool perf_event_can_stop_tick(void) { return true; } 999 #endif 1000 1001 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) 1002 extern void perf_restore_debug_store(void); 1003 #else 1004 static inline void perf_restore_debug_store(void) { } 1005 #endif 1006 1007 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) 1008 1009 /* 1010 * This has to have a higher priority than migration_notifier in sched/core.c. 1011 */ 1012 #define perf_cpu_notifier(fn) \ 1013 do { \ 1014 static struct notifier_block fn##_nb = \ 1015 { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ 1016 unsigned long cpu = smp_processor_id(); \ 1017 unsigned long flags; \ 1018 \ 1019 cpu_notifier_register_begin(); \ 1020 fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ 1021 (void *)(unsigned long)cpu); \ 1022 local_irq_save(flags); \ 1023 fn(&fn##_nb, (unsigned long)CPU_STARTING, \ 1024 (void *)(unsigned long)cpu); \ 1025 local_irq_restore(flags); \ 1026 fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ 1027 (void *)(unsigned long)cpu); \ 1028 __register_cpu_notifier(&fn##_nb); \ 1029 cpu_notifier_register_done(); \ 1030 } while (0) 1031 1032 /* 1033 * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the 1034 * callback for already online CPUs. 1035 */ 1036 #define __perf_cpu_notifier(fn) \ 1037 do { \ 1038 static struct notifier_block fn##_nb = \ 1039 { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ 1040 \ 1041 __register_cpu_notifier(&fn##_nb); \ 1042 } while (0) 1043 1044 struct perf_pmu_events_attr { 1045 struct device_attribute attr; 1046 u64 id; 1047 const char *event_str; 1048 }; 1049 1050 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, 1051 char *page); 1052 1053 #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ 1054 static struct perf_pmu_events_attr _var = { \ 1055 .attr = __ATTR(_name, 0444, _show, NULL), \ 1056 .id = _id, \ 1057 }; 1058 1059 #define PMU_EVENT_ATTR_STRING(_name, _var, _str) \ 1060 static struct perf_pmu_events_attr _var = { \ 1061 .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ 1062 .id = 0, \ 1063 .event_str = _str, \ 1064 }; 1065 1066 #define PMU_FORMAT_ATTR(_name, _format) \ 1067 static ssize_t \ 1068 _name##_show(struct device *dev, \ 1069 struct device_attribute *attr, \ 1070 char *page) \ 1071 { \ 1072 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ 1073 return sprintf(page, _format "\n"); \ 1074 } \ 1075 \ 1076 static struct device_attribute format_attr_##_name = __ATTR_RO(_name) 1077 1078 #endif /* _LINUX_PERF_EVENT_H */ 1079