xref: /linux-6.15/include/linux/perf_event.h (revision 4f3db074)
1 /*
2  * Performance events:
3  *
4  *    Copyright (C) 2008-2009, Thomas Gleixner <[email protected]>
5  *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
6  *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
7  *
8  * Data type definitions, declarations, prototypes.
9  *
10  *    Started by: Thomas Gleixner and Ingo Molnar
11  *
12  * For licencing details see kernel-base/COPYING
13  */
14 #ifndef _LINUX_PERF_EVENT_H
15 #define _LINUX_PERF_EVENT_H
16 
17 #include <uapi/linux/perf_event.h>
18 
19 /*
20  * Kernel-internal data types and definitions:
21  */
22 
23 #ifdef CONFIG_PERF_EVENTS
24 # include <asm/perf_event.h>
25 # include <asm/local64.h>
26 #endif
27 
28 struct perf_guest_info_callbacks {
29 	int				(*is_in_guest)(void);
30 	int				(*is_user_mode)(void);
31 	unsigned long			(*get_guest_ip)(void);
32 };
33 
34 #ifdef CONFIG_HAVE_HW_BREAKPOINT
35 #include <asm/hw_breakpoint.h>
36 #endif
37 
38 #include <linux/list.h>
39 #include <linux/mutex.h>
40 #include <linux/rculist.h>
41 #include <linux/rcupdate.h>
42 #include <linux/spinlock.h>
43 #include <linux/hrtimer.h>
44 #include <linux/fs.h>
45 #include <linux/pid_namespace.h>
46 #include <linux/workqueue.h>
47 #include <linux/ftrace.h>
48 #include <linux/cpu.h>
49 #include <linux/irq_work.h>
50 #include <linux/static_key.h>
51 #include <linux/jump_label_ratelimit.h>
52 #include <linux/atomic.h>
53 #include <linux/sysfs.h>
54 #include <linux/perf_regs.h>
55 #include <linux/workqueue.h>
56 #include <linux/cgroup.h>
57 #include <asm/local.h>
58 
59 struct perf_callchain_entry {
60 	__u64				nr;
61 	__u64				ip[PERF_MAX_STACK_DEPTH];
62 };
63 
64 struct perf_raw_record {
65 	u32				size;
66 	void				*data;
67 };
68 
69 /*
70  * branch stack layout:
71  *  nr: number of taken branches stored in entries[]
72  *
73  * Note that nr can vary from sample to sample
74  * branches (to, from) are stored from most recent
75  * to least recent, i.e., entries[0] contains the most
76  * recent branch.
77  */
78 struct perf_branch_stack {
79 	__u64				nr;
80 	struct perf_branch_entry	entries[0];
81 };
82 
83 struct task_struct;
84 
85 /*
86  * extra PMU register associated with an event
87  */
88 struct hw_perf_event_extra {
89 	u64		config;	/* register value */
90 	unsigned int	reg;	/* register address or index */
91 	int		alloc;	/* extra register already allocated */
92 	int		idx;	/* index in shared_regs->regs[] */
93 };
94 
95 struct event_constraint;
96 
97 /**
98  * struct hw_perf_event - performance event hardware details:
99  */
100 struct hw_perf_event {
101 #ifdef CONFIG_PERF_EVENTS
102 	union {
103 		struct { /* hardware */
104 			u64		config;
105 			u64		last_tag;
106 			unsigned long	config_base;
107 			unsigned long	event_base;
108 			int		event_base_rdpmc;
109 			int		idx;
110 			int		last_cpu;
111 			int		flags;
112 
113 			struct hw_perf_event_extra extra_reg;
114 			struct hw_perf_event_extra branch_reg;
115 
116 			struct event_constraint *constraint;
117 		};
118 		struct { /* software */
119 			struct hrtimer	hrtimer;
120 		};
121 		struct { /* tracepoint */
122 			/* for tp_event->class */
123 			struct list_head	tp_list;
124 		};
125 		struct { /* intel_cqm */
126 			int			cqm_state;
127 			int			cqm_rmid;
128 			struct list_head	cqm_events_entry;
129 			struct list_head	cqm_groups_entry;
130 			struct list_head	cqm_group_entry;
131 		};
132 		struct { /* itrace */
133 			int			itrace_started;
134 		};
135 #ifdef CONFIG_HAVE_HW_BREAKPOINT
136 		struct { /* breakpoint */
137 			/*
138 			 * Crufty hack to avoid the chicken and egg
139 			 * problem hw_breakpoint has with context
140 			 * creation and event initalization.
141 			 */
142 			struct arch_hw_breakpoint	info;
143 			struct list_head		bp_list;
144 		};
145 #endif
146 	};
147 	struct task_struct		*target;
148 	int				state;
149 	local64_t			prev_count;
150 	u64				sample_period;
151 	u64				last_period;
152 	local64_t			period_left;
153 	u64                             interrupts_seq;
154 	u64				interrupts;
155 
156 	u64				freq_time_stamp;
157 	u64				freq_count_stamp;
158 #endif
159 };
160 
161 /*
162  * hw_perf_event::state flags
163  */
164 #define PERF_HES_STOPPED	0x01 /* the counter is stopped */
165 #define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
166 #define PERF_HES_ARCH		0x04
167 
168 struct perf_event;
169 
170 /*
171  * Common implementation detail of pmu::{start,commit,cancel}_txn
172  */
173 #define PERF_EVENT_TXN 0x1
174 
175 /**
176  * pmu::capabilities flags
177  */
178 #define PERF_PMU_CAP_NO_INTERRUPT		0x01
179 #define PERF_PMU_CAP_NO_NMI			0x02
180 #define PERF_PMU_CAP_AUX_NO_SG			0x04
181 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x08
182 #define PERF_PMU_CAP_EXCLUSIVE			0x10
183 #define PERF_PMU_CAP_ITRACE			0x20
184 
185 /**
186  * struct pmu - generic performance monitoring unit
187  */
188 struct pmu {
189 	struct list_head		entry;
190 
191 	struct module			*module;
192 	struct device			*dev;
193 	const struct attribute_group	**attr_groups;
194 	const char			*name;
195 	int				type;
196 
197 	/*
198 	 * various common per-pmu feature flags
199 	 */
200 	int				capabilities;
201 
202 	int * __percpu			pmu_disable_count;
203 	struct perf_cpu_context * __percpu pmu_cpu_context;
204 	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
205 	int				task_ctx_nr;
206 	int				hrtimer_interval_ms;
207 
208 	/*
209 	 * Fully disable/enable this PMU, can be used to protect from the PMI
210 	 * as well as for lazy/batch writing of the MSRs.
211 	 */
212 	void (*pmu_enable)		(struct pmu *pmu); /* optional */
213 	void (*pmu_disable)		(struct pmu *pmu); /* optional */
214 
215 	/*
216 	 * Try and initialize the event for this PMU.
217 	 * Should return -ENOENT when the @event doesn't match this PMU.
218 	 */
219 	int (*event_init)		(struct perf_event *event);
220 
221 	/*
222 	 * Notification that the event was mapped or unmapped.  Called
223 	 * in the context of the mapping task.
224 	 */
225 	void (*event_mapped)		(struct perf_event *event); /*optional*/
226 	void (*event_unmapped)		(struct perf_event *event); /*optional*/
227 
228 #define PERF_EF_START	0x01		/* start the counter when adding    */
229 #define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
230 #define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
231 
232 	/*
233 	 * Adds/Removes a counter to/from the PMU, can be done inside
234 	 * a transaction, see the ->*_txn() methods.
235 	 */
236 	int  (*add)			(struct perf_event *event, int flags);
237 	void (*del)			(struct perf_event *event, int flags);
238 
239 	/*
240 	 * Starts/Stops a counter present on the PMU. The PMI handler
241 	 * should stop the counter when perf_event_overflow() returns
242 	 * !0. ->start() will be used to continue.
243 	 */
244 	void (*start)			(struct perf_event *event, int flags);
245 	void (*stop)			(struct perf_event *event, int flags);
246 
247 	/*
248 	 * Updates the counter value of the event.
249 	 */
250 	void (*read)			(struct perf_event *event);
251 
252 	/*
253 	 * Group events scheduling is treated as a transaction, add
254 	 * group events as a whole and perform one schedulability test.
255 	 * If the test fails, roll back the whole group
256 	 *
257 	 * Start the transaction, after this ->add() doesn't need to
258 	 * do schedulability tests.
259 	 */
260 	void (*start_txn)		(struct pmu *pmu); /* optional */
261 	/*
262 	 * If ->start_txn() disabled the ->add() schedulability test
263 	 * then ->commit_txn() is required to perform one. On success
264 	 * the transaction is closed. On error the transaction is kept
265 	 * open until ->cancel_txn() is called.
266 	 */
267 	int  (*commit_txn)		(struct pmu *pmu); /* optional */
268 	/*
269 	 * Will cancel the transaction, assumes ->del() is called
270 	 * for each successful ->add() during the transaction.
271 	 */
272 	void (*cancel_txn)		(struct pmu *pmu); /* optional */
273 
274 	/*
275 	 * Will return the value for perf_event_mmap_page::index for this event,
276 	 * if no implementation is provided it will default to: event->hw.idx + 1.
277 	 */
278 	int (*event_idx)		(struct perf_event *event); /*optional */
279 
280 	/*
281 	 * context-switches callback
282 	 */
283 	void (*sched_task)		(struct perf_event_context *ctx,
284 					bool sched_in);
285 	/*
286 	 * PMU specific data size
287 	 */
288 	size_t				task_ctx_size;
289 
290 
291 	/*
292 	 * Return the count value for a counter.
293 	 */
294 	u64 (*count)			(struct perf_event *event); /*optional*/
295 
296 	/*
297 	 * Set up pmu-private data structures for an AUX area
298 	 */
299 	void *(*setup_aux)		(int cpu, void **pages,
300 					 int nr_pages, bool overwrite);
301 					/* optional */
302 
303 	/*
304 	 * Free pmu-private AUX data structures
305 	 */
306 	void (*free_aux)		(void *aux); /* optional */
307 };
308 
309 /**
310  * enum perf_event_active_state - the states of a event
311  */
312 enum perf_event_active_state {
313 	PERF_EVENT_STATE_EXIT		= -3,
314 	PERF_EVENT_STATE_ERROR		= -2,
315 	PERF_EVENT_STATE_OFF		= -1,
316 	PERF_EVENT_STATE_INACTIVE	=  0,
317 	PERF_EVENT_STATE_ACTIVE		=  1,
318 };
319 
320 struct file;
321 struct perf_sample_data;
322 
323 typedef void (*perf_overflow_handler_t)(struct perf_event *,
324 					struct perf_sample_data *,
325 					struct pt_regs *regs);
326 
327 enum perf_group_flag {
328 	PERF_GROUP_SOFTWARE		= 0x1,
329 };
330 
331 #define SWEVENT_HLIST_BITS		8
332 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
333 
334 struct swevent_hlist {
335 	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
336 	struct rcu_head			rcu_head;
337 };
338 
339 #define PERF_ATTACH_CONTEXT	0x01
340 #define PERF_ATTACH_GROUP	0x02
341 #define PERF_ATTACH_TASK	0x04
342 #define PERF_ATTACH_TASK_DATA	0x08
343 
344 struct perf_cgroup;
345 struct ring_buffer;
346 
347 /**
348  * struct perf_event - performance event kernel representation:
349  */
350 struct perf_event {
351 #ifdef CONFIG_PERF_EVENTS
352 	/*
353 	 * entry onto perf_event_context::event_list;
354 	 *   modifications require ctx->lock
355 	 *   RCU safe iterations.
356 	 */
357 	struct list_head		event_entry;
358 
359 	/*
360 	 * XXX: group_entry and sibling_list should be mutually exclusive;
361 	 * either you're a sibling on a group, or you're the group leader.
362 	 * Rework the code to always use the same list element.
363 	 *
364 	 * Locked for modification by both ctx->mutex and ctx->lock; holding
365 	 * either sufficies for read.
366 	 */
367 	struct list_head		group_entry;
368 	struct list_head		sibling_list;
369 
370 	/*
371 	 * We need storage to track the entries in perf_pmu_migrate_context; we
372 	 * cannot use the event_entry because of RCU and we want to keep the
373 	 * group in tact which avoids us using the other two entries.
374 	 */
375 	struct list_head		migrate_entry;
376 
377 	struct hlist_node		hlist_entry;
378 	struct list_head		active_entry;
379 	int				nr_siblings;
380 	int				group_flags;
381 	struct perf_event		*group_leader;
382 	struct pmu			*pmu;
383 
384 	enum perf_event_active_state	state;
385 	unsigned int			attach_state;
386 	local64_t			count;
387 	atomic64_t			child_count;
388 
389 	/*
390 	 * These are the total time in nanoseconds that the event
391 	 * has been enabled (i.e. eligible to run, and the task has
392 	 * been scheduled in, if this is a per-task event)
393 	 * and running (scheduled onto the CPU), respectively.
394 	 *
395 	 * They are computed from tstamp_enabled, tstamp_running and
396 	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
397 	 */
398 	u64				total_time_enabled;
399 	u64				total_time_running;
400 
401 	/*
402 	 * These are timestamps used for computing total_time_enabled
403 	 * and total_time_running when the event is in INACTIVE or
404 	 * ACTIVE state, measured in nanoseconds from an arbitrary point
405 	 * in time.
406 	 * tstamp_enabled: the notional time when the event was enabled
407 	 * tstamp_running: the notional time when the event was scheduled on
408 	 * tstamp_stopped: in INACTIVE state, the notional time when the
409 	 *	event was scheduled off.
410 	 */
411 	u64				tstamp_enabled;
412 	u64				tstamp_running;
413 	u64				tstamp_stopped;
414 
415 	/*
416 	 * timestamp shadows the actual context timing but it can
417 	 * be safely used in NMI interrupt context. It reflects the
418 	 * context time as it was when the event was last scheduled in.
419 	 *
420 	 * ctx_time already accounts for ctx->timestamp. Therefore to
421 	 * compute ctx_time for a sample, simply add perf_clock().
422 	 */
423 	u64				shadow_ctx_time;
424 
425 	struct perf_event_attr		attr;
426 	u16				header_size;
427 	u16				id_header_size;
428 	u16				read_size;
429 	struct hw_perf_event		hw;
430 
431 	struct perf_event_context	*ctx;
432 	atomic_long_t			refcount;
433 
434 	/*
435 	 * These accumulate total time (in nanoseconds) that children
436 	 * events have been enabled and running, respectively.
437 	 */
438 	atomic64_t			child_total_time_enabled;
439 	atomic64_t			child_total_time_running;
440 
441 	/*
442 	 * Protect attach/detach and child_list:
443 	 */
444 	struct mutex			child_mutex;
445 	struct list_head		child_list;
446 	struct perf_event		*parent;
447 
448 	int				oncpu;
449 	int				cpu;
450 
451 	struct list_head		owner_entry;
452 	struct task_struct		*owner;
453 
454 	/* mmap bits */
455 	struct mutex			mmap_mutex;
456 	atomic_t			mmap_count;
457 
458 	struct ring_buffer		*rb;
459 	struct list_head		rb_entry;
460 	unsigned long			rcu_batches;
461 	int				rcu_pending;
462 
463 	/* poll related */
464 	wait_queue_head_t		waitq;
465 	struct fasync_struct		*fasync;
466 
467 	/* delayed work for NMIs and such */
468 	int				pending_wakeup;
469 	int				pending_kill;
470 	int				pending_disable;
471 	struct irq_work			pending;
472 
473 	atomic_t			event_limit;
474 
475 	void (*destroy)(struct perf_event *);
476 	struct rcu_head			rcu_head;
477 
478 	struct pid_namespace		*ns;
479 	u64				id;
480 
481 	u64				(*clock)(void);
482 	perf_overflow_handler_t		overflow_handler;
483 	void				*overflow_handler_context;
484 
485 #ifdef CONFIG_EVENT_TRACING
486 	struct ftrace_event_call	*tp_event;
487 	struct event_filter		*filter;
488 #ifdef CONFIG_FUNCTION_TRACER
489 	struct ftrace_ops               ftrace_ops;
490 #endif
491 #endif
492 
493 #ifdef CONFIG_CGROUP_PERF
494 	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
495 	int				cgrp_defer_enabled;
496 #endif
497 
498 #endif /* CONFIG_PERF_EVENTS */
499 };
500 
501 /**
502  * struct perf_event_context - event context structure
503  *
504  * Used as a container for task events and CPU events as well:
505  */
506 struct perf_event_context {
507 	struct pmu			*pmu;
508 	/*
509 	 * Protect the states of the events in the list,
510 	 * nr_active, and the list:
511 	 */
512 	raw_spinlock_t			lock;
513 	/*
514 	 * Protect the list of events.  Locking either mutex or lock
515 	 * is sufficient to ensure the list doesn't change; to change
516 	 * the list you need to lock both the mutex and the spinlock.
517 	 */
518 	struct mutex			mutex;
519 
520 	struct list_head		active_ctx_list;
521 	struct list_head		pinned_groups;
522 	struct list_head		flexible_groups;
523 	struct list_head		event_list;
524 	int				nr_events;
525 	int				nr_active;
526 	int				is_active;
527 	int				nr_stat;
528 	int				nr_freq;
529 	int				rotate_disable;
530 	atomic_t			refcount;
531 	struct task_struct		*task;
532 
533 	/*
534 	 * Context clock, runs when context enabled.
535 	 */
536 	u64				time;
537 	u64				timestamp;
538 
539 	/*
540 	 * These fields let us detect when two contexts have both
541 	 * been cloned (inherited) from a common ancestor.
542 	 */
543 	struct perf_event_context	*parent_ctx;
544 	u64				parent_gen;
545 	u64				generation;
546 	int				pin_count;
547 	int				nr_cgroups;	 /* cgroup evts */
548 	void				*task_ctx_data; /* pmu specific data */
549 	struct rcu_head			rcu_head;
550 
551 	struct delayed_work		orphans_remove;
552 	bool				orphans_remove_sched;
553 };
554 
555 /*
556  * Number of contexts where an event can trigger:
557  *	task, softirq, hardirq, nmi.
558  */
559 #define PERF_NR_CONTEXTS	4
560 
561 /**
562  * struct perf_event_cpu_context - per cpu event context structure
563  */
564 struct perf_cpu_context {
565 	struct perf_event_context	ctx;
566 	struct perf_event_context	*task_ctx;
567 	int				active_oncpu;
568 	int				exclusive;
569 	struct hrtimer			hrtimer;
570 	ktime_t				hrtimer_interval;
571 	struct pmu			*unique_pmu;
572 	struct perf_cgroup		*cgrp;
573 };
574 
575 struct perf_output_handle {
576 	struct perf_event		*event;
577 	struct ring_buffer		*rb;
578 	unsigned long			wakeup;
579 	unsigned long			size;
580 	union {
581 		void			*addr;
582 		unsigned long		head;
583 	};
584 	int				page;
585 };
586 
587 #ifdef CONFIG_CGROUP_PERF
588 
589 /*
590  * perf_cgroup_info keeps track of time_enabled for a cgroup.
591  * This is a per-cpu dynamically allocated data structure.
592  */
593 struct perf_cgroup_info {
594 	u64				time;
595 	u64				timestamp;
596 };
597 
598 struct perf_cgroup {
599 	struct cgroup_subsys_state	css;
600 	struct perf_cgroup_info	__percpu *info;
601 };
602 
603 /*
604  * Must ensure cgroup is pinned (css_get) before calling
605  * this function. In other words, we cannot call this function
606  * if there is no cgroup event for the current CPU context.
607  */
608 static inline struct perf_cgroup *
609 perf_cgroup_from_task(struct task_struct *task)
610 {
611 	return container_of(task_css(task, perf_event_cgrp_id),
612 			    struct perf_cgroup, css);
613 }
614 #endif /* CONFIG_CGROUP_PERF */
615 
616 #ifdef CONFIG_PERF_EVENTS
617 
618 extern void *perf_aux_output_begin(struct perf_output_handle *handle,
619 				   struct perf_event *event);
620 extern void perf_aux_output_end(struct perf_output_handle *handle,
621 				unsigned long size, bool truncated);
622 extern int perf_aux_output_skip(struct perf_output_handle *handle,
623 				unsigned long size);
624 extern void *perf_get_aux(struct perf_output_handle *handle);
625 
626 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
627 extern void perf_pmu_unregister(struct pmu *pmu);
628 
629 extern int perf_num_counters(void);
630 extern const char *perf_pmu_name(void);
631 extern void __perf_event_task_sched_in(struct task_struct *prev,
632 				       struct task_struct *task);
633 extern void __perf_event_task_sched_out(struct task_struct *prev,
634 					struct task_struct *next);
635 extern int perf_event_init_task(struct task_struct *child);
636 extern void perf_event_exit_task(struct task_struct *child);
637 extern void perf_event_free_task(struct task_struct *task);
638 extern void perf_event_delayed_put(struct task_struct *task);
639 extern void perf_event_print_debug(void);
640 extern void perf_pmu_disable(struct pmu *pmu);
641 extern void perf_pmu_enable(struct pmu *pmu);
642 extern void perf_sched_cb_dec(struct pmu *pmu);
643 extern void perf_sched_cb_inc(struct pmu *pmu);
644 extern int perf_event_task_disable(void);
645 extern int perf_event_task_enable(void);
646 extern int perf_event_refresh(struct perf_event *event, int refresh);
647 extern void perf_event_update_userpage(struct perf_event *event);
648 extern int perf_event_release_kernel(struct perf_event *event);
649 extern struct perf_event *
650 perf_event_create_kernel_counter(struct perf_event_attr *attr,
651 				int cpu,
652 				struct task_struct *task,
653 				perf_overflow_handler_t callback,
654 				void *context);
655 extern void perf_pmu_migrate_context(struct pmu *pmu,
656 				int src_cpu, int dst_cpu);
657 extern u64 perf_event_read_value(struct perf_event *event,
658 				 u64 *enabled, u64 *running);
659 
660 
661 struct perf_sample_data {
662 	/*
663 	 * Fields set by perf_sample_data_init(), group so as to
664 	 * minimize the cachelines touched.
665 	 */
666 	u64				addr;
667 	struct perf_raw_record		*raw;
668 	struct perf_branch_stack	*br_stack;
669 	u64				period;
670 	u64				weight;
671 	u64				txn;
672 	union  perf_mem_data_src	data_src;
673 
674 	/*
675 	 * The other fields, optionally {set,used} by
676 	 * perf_{prepare,output}_sample().
677 	 */
678 	u64				type;
679 	u64				ip;
680 	struct {
681 		u32	pid;
682 		u32	tid;
683 	}				tid_entry;
684 	u64				time;
685 	u64				id;
686 	u64				stream_id;
687 	struct {
688 		u32	cpu;
689 		u32	reserved;
690 	}				cpu_entry;
691 	struct perf_callchain_entry	*callchain;
692 
693 	/*
694 	 * regs_user may point to task_pt_regs or to regs_user_copy, depending
695 	 * on arch details.
696 	 */
697 	struct perf_regs		regs_user;
698 	struct pt_regs			regs_user_copy;
699 
700 	struct perf_regs		regs_intr;
701 	u64				stack_user_size;
702 } ____cacheline_aligned;
703 
704 /* default value for data source */
705 #define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
706 		    PERF_MEM_S(LVL, NA)   |\
707 		    PERF_MEM_S(SNOOP, NA) |\
708 		    PERF_MEM_S(LOCK, NA)  |\
709 		    PERF_MEM_S(TLB, NA))
710 
711 static inline void perf_sample_data_init(struct perf_sample_data *data,
712 					 u64 addr, u64 period)
713 {
714 	/* remaining struct members initialized in perf_prepare_sample() */
715 	data->addr = addr;
716 	data->raw  = NULL;
717 	data->br_stack = NULL;
718 	data->period = period;
719 	data->weight = 0;
720 	data->data_src.val = PERF_MEM_NA;
721 	data->txn = 0;
722 }
723 
724 extern void perf_output_sample(struct perf_output_handle *handle,
725 			       struct perf_event_header *header,
726 			       struct perf_sample_data *data,
727 			       struct perf_event *event);
728 extern void perf_prepare_sample(struct perf_event_header *header,
729 				struct perf_sample_data *data,
730 				struct perf_event *event,
731 				struct pt_regs *regs);
732 
733 extern int perf_event_overflow(struct perf_event *event,
734 				 struct perf_sample_data *data,
735 				 struct pt_regs *regs);
736 
737 static inline bool is_sampling_event(struct perf_event *event)
738 {
739 	return event->attr.sample_period != 0;
740 }
741 
742 /*
743  * Return 1 for a software event, 0 for a hardware event
744  */
745 static inline int is_software_event(struct perf_event *event)
746 {
747 	return event->pmu->task_ctx_nr == perf_sw_context;
748 }
749 
750 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
751 
752 extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
753 extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
754 
755 #ifndef perf_arch_fetch_caller_regs
756 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
757 #endif
758 
759 /*
760  * Take a snapshot of the regs. Skip ip and frame pointer to
761  * the nth caller. We only need a few of the regs:
762  * - ip for PERF_SAMPLE_IP
763  * - cs for user_mode() tests
764  * - bp for callchains
765  * - eflags, for future purposes, just in case
766  */
767 static inline void perf_fetch_caller_regs(struct pt_regs *regs)
768 {
769 	memset(regs, 0, sizeof(*regs));
770 
771 	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
772 }
773 
774 static __always_inline void
775 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
776 {
777 	if (static_key_false(&perf_swevent_enabled[event_id]))
778 		__perf_sw_event(event_id, nr, regs, addr);
779 }
780 
781 DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
782 
783 /*
784  * 'Special' version for the scheduler, it hard assumes no recursion,
785  * which is guaranteed by us not actually scheduling inside other swevents
786  * because those disable preemption.
787  */
788 static __always_inline void
789 perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
790 {
791 	if (static_key_false(&perf_swevent_enabled[event_id])) {
792 		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
793 
794 		perf_fetch_caller_regs(regs);
795 		___perf_sw_event(event_id, nr, regs, addr);
796 	}
797 }
798 
799 extern struct static_key_deferred perf_sched_events;
800 
801 static __always_inline bool
802 perf_sw_migrate_enabled(void)
803 {
804 	if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
805 		return true;
806 	return false;
807 }
808 
809 static inline void perf_event_task_migrate(struct task_struct *task)
810 {
811 	if (perf_sw_migrate_enabled())
812 		task->sched_migrated = 1;
813 }
814 
815 static inline void perf_event_task_sched_in(struct task_struct *prev,
816 					    struct task_struct *task)
817 {
818 	if (static_key_false(&perf_sched_events.key))
819 		__perf_event_task_sched_in(prev, task);
820 
821 	if (perf_sw_migrate_enabled() && task->sched_migrated) {
822 		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
823 
824 		perf_fetch_caller_regs(regs);
825 		___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
826 		task->sched_migrated = 0;
827 	}
828 }
829 
830 static inline void perf_event_task_sched_out(struct task_struct *prev,
831 					     struct task_struct *next)
832 {
833 	perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
834 
835 	if (static_key_false(&perf_sched_events.key))
836 		__perf_event_task_sched_out(prev, next);
837 }
838 
839 static inline u64 __perf_event_count(struct perf_event *event)
840 {
841 	return local64_read(&event->count) + atomic64_read(&event->child_count);
842 }
843 
844 extern void perf_event_mmap(struct vm_area_struct *vma);
845 extern struct perf_guest_info_callbacks *perf_guest_cbs;
846 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
847 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
848 
849 extern void perf_event_exec(void);
850 extern void perf_event_comm(struct task_struct *tsk, bool exec);
851 extern void perf_event_fork(struct task_struct *tsk);
852 
853 /* Callchains */
854 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
855 
856 extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
857 extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
858 
859 static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
860 {
861 	if (entry->nr < PERF_MAX_STACK_DEPTH)
862 		entry->ip[entry->nr++] = ip;
863 }
864 
865 extern int sysctl_perf_event_paranoid;
866 extern int sysctl_perf_event_mlock;
867 extern int sysctl_perf_event_sample_rate;
868 extern int sysctl_perf_cpu_time_max_percent;
869 
870 extern void perf_sample_event_took(u64 sample_len_ns);
871 
872 extern int perf_proc_update_handler(struct ctl_table *table, int write,
873 		void __user *buffer, size_t *lenp,
874 		loff_t *ppos);
875 extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
876 		void __user *buffer, size_t *lenp,
877 		loff_t *ppos);
878 
879 
880 static inline bool perf_paranoid_tracepoint_raw(void)
881 {
882 	return sysctl_perf_event_paranoid > -1;
883 }
884 
885 static inline bool perf_paranoid_cpu(void)
886 {
887 	return sysctl_perf_event_paranoid > 0;
888 }
889 
890 static inline bool perf_paranoid_kernel(void)
891 {
892 	return sysctl_perf_event_paranoid > 1;
893 }
894 
895 extern void perf_event_init(void);
896 extern void perf_tp_event(u64 addr, u64 count, void *record,
897 			  int entry_size, struct pt_regs *regs,
898 			  struct hlist_head *head, int rctx,
899 			  struct task_struct *task);
900 extern void perf_bp_event(struct perf_event *event, void *data);
901 
902 #ifndef perf_misc_flags
903 # define perf_misc_flags(regs) \
904 		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
905 # define perf_instruction_pointer(regs)	instruction_pointer(regs)
906 #endif
907 
908 static inline bool has_branch_stack(struct perf_event *event)
909 {
910 	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
911 }
912 
913 static inline bool needs_branch_stack(struct perf_event *event)
914 {
915 	return event->attr.branch_sample_type != 0;
916 }
917 
918 static inline bool has_aux(struct perf_event *event)
919 {
920 	return event->pmu->setup_aux;
921 }
922 
923 extern int perf_output_begin(struct perf_output_handle *handle,
924 			     struct perf_event *event, unsigned int size);
925 extern void perf_output_end(struct perf_output_handle *handle);
926 extern unsigned int perf_output_copy(struct perf_output_handle *handle,
927 			     const void *buf, unsigned int len);
928 extern unsigned int perf_output_skip(struct perf_output_handle *handle,
929 				     unsigned int len);
930 extern int perf_swevent_get_recursion_context(void);
931 extern void perf_swevent_put_recursion_context(int rctx);
932 extern u64 perf_swevent_set_period(struct perf_event *event);
933 extern void perf_event_enable(struct perf_event *event);
934 extern void perf_event_disable(struct perf_event *event);
935 extern int __perf_event_disable(void *info);
936 extern void perf_event_task_tick(void);
937 #else /* !CONFIG_PERF_EVENTS: */
938 static inline void *
939 perf_aux_output_begin(struct perf_output_handle *handle,
940 		      struct perf_event *event)				{ return NULL; }
941 static inline void
942 perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
943 		    bool truncated)					{ }
944 static inline int
945 perf_aux_output_skip(struct perf_output_handle *handle,
946 		     unsigned long size)				{ return -EINVAL; }
947 static inline void *
948 perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
949 static inline void
950 perf_event_task_migrate(struct task_struct *task)			{ }
951 static inline void
952 perf_event_task_sched_in(struct task_struct *prev,
953 			 struct task_struct *task)			{ }
954 static inline void
955 perf_event_task_sched_out(struct task_struct *prev,
956 			  struct task_struct *next)			{ }
957 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
958 static inline void perf_event_exit_task(struct task_struct *child)	{ }
959 static inline void perf_event_free_task(struct task_struct *task)	{ }
960 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
961 static inline void perf_event_print_debug(void)				{ }
962 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
963 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
964 static inline int perf_event_refresh(struct perf_event *event, int refresh)
965 {
966 	return -EINVAL;
967 }
968 
969 static inline void
970 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
971 static inline void
972 perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)			{ }
973 static inline void
974 perf_bp_event(struct perf_event *event, void *data)			{ }
975 
976 static inline int perf_register_guest_info_callbacks
977 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
978 static inline int perf_unregister_guest_info_callbacks
979 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
980 
981 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
982 static inline void perf_event_exec(void)				{ }
983 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
984 static inline void perf_event_fork(struct task_struct *tsk)		{ }
985 static inline void perf_event_init(void)				{ }
986 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
987 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
988 static inline u64 perf_swevent_set_period(struct perf_event *event)	{ return 0; }
989 static inline void perf_event_enable(struct perf_event *event)		{ }
990 static inline void perf_event_disable(struct perf_event *event)		{ }
991 static inline int __perf_event_disable(void *info)			{ return -1; }
992 static inline void perf_event_task_tick(void)				{ }
993 #endif
994 
995 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
996 extern bool perf_event_can_stop_tick(void);
997 #else
998 static inline bool perf_event_can_stop_tick(void)			{ return true; }
999 #endif
1000 
1001 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
1002 extern void perf_restore_debug_store(void);
1003 #else
1004 static inline void perf_restore_debug_store(void)			{ }
1005 #endif
1006 
1007 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
1008 
1009 /*
1010  * This has to have a higher priority than migration_notifier in sched/core.c.
1011  */
1012 #define perf_cpu_notifier(fn)						\
1013 do {									\
1014 	static struct notifier_block fn##_nb =				\
1015 		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
1016 	unsigned long cpu = smp_processor_id();				\
1017 	unsigned long flags;						\
1018 									\
1019 	cpu_notifier_register_begin();					\
1020 	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,			\
1021 		(void *)(unsigned long)cpu);				\
1022 	local_irq_save(flags);						\
1023 	fn(&fn##_nb, (unsigned long)CPU_STARTING,			\
1024 		(void *)(unsigned long)cpu);				\
1025 	local_irq_restore(flags);					\
1026 	fn(&fn##_nb, (unsigned long)CPU_ONLINE,				\
1027 		(void *)(unsigned long)cpu);				\
1028 	__register_cpu_notifier(&fn##_nb);				\
1029 	cpu_notifier_register_done();					\
1030 } while (0)
1031 
1032 /*
1033  * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the
1034  * callback for already online CPUs.
1035  */
1036 #define __perf_cpu_notifier(fn)						\
1037 do {									\
1038 	static struct notifier_block fn##_nb =				\
1039 		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
1040 									\
1041 	__register_cpu_notifier(&fn##_nb);				\
1042 } while (0)
1043 
1044 struct perf_pmu_events_attr {
1045 	struct device_attribute attr;
1046 	u64 id;
1047 	const char *event_str;
1048 };
1049 
1050 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
1051 			      char *page);
1052 
1053 #define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
1054 static struct perf_pmu_events_attr _var = {				\
1055 	.attr = __ATTR(_name, 0444, _show, NULL),			\
1056 	.id   =  _id,							\
1057 };
1058 
1059 #define PMU_EVENT_ATTR_STRING(_name, _var, _str)			    \
1060 static struct perf_pmu_events_attr _var = {				    \
1061 	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
1062 	.id		= 0,						    \
1063 	.event_str	= _str,						    \
1064 };
1065 
1066 #define PMU_FORMAT_ATTR(_name, _format)					\
1067 static ssize_t								\
1068 _name##_show(struct device *dev,					\
1069 			       struct device_attribute *attr,		\
1070 			       char *page)				\
1071 {									\
1072 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
1073 	return sprintf(page, _format "\n");				\
1074 }									\
1075 									\
1076 static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
1077 
1078 #endif /* _LINUX_PERF_EVENT_H */
1079