xref: /linux-6.15/include/linux/psi_types.h (revision aff03707)
12fb75e1bSLiu Xinpeng /* SPDX-License-Identifier: GPL-2.0 */
2eb414681SJohannes Weiner #ifndef _LINUX_PSI_TYPES_H
3eb414681SJohannes Weiner #define _LINUX_PSI_TYPES_H
4eb414681SJohannes Weiner 
50e94682bSSuren Baghdasaryan #include <linux/kthread.h>
6eb414681SJohannes Weiner #include <linux/seqlock.h>
7eb414681SJohannes Weiner #include <linux/types.h>
80e94682bSSuren Baghdasaryan #include <linux/kref.h>
90e94682bSSuren Baghdasaryan #include <linux/wait.h>
10eb414681SJohannes Weiner 
11eb414681SJohannes Weiner #ifdef CONFIG_PSI
12eb414681SJohannes Weiner 
13eb414681SJohannes Weiner /* Tracked task states */
14eb414681SJohannes Weiner enum psi_task_count {
15eb414681SJohannes Weiner 	NR_IOWAIT,
16eb414681SJohannes Weiner 	NR_MEMSTALL,
17eb414681SJohannes Weiner 	NR_RUNNING,
18b05e75d6SJohannes Weiner 	/*
19cb0e52b7SBrian Chen 	 * For IO and CPU stalls the presence of running/oncpu tasks
20cb0e52b7SBrian Chen 	 * in the domain means a partial rather than a full stall.
21cb0e52b7SBrian Chen 	 * For memory it's not so simple because of page reclaimers:
22cb0e52b7SBrian Chen 	 * they are running/oncpu while representing a stall. To tell
23cb0e52b7SBrian Chen 	 * whether a domain has productivity left or not, we need to
24cb0e52b7SBrian Chen 	 * distinguish between regular running (i.e. productive)
25cb0e52b7SBrian Chen 	 * threads and memstall ones.
26cb0e52b7SBrian Chen 	 */
27cb0e52b7SBrian Chen 	NR_MEMSTALL_RUNNING,
2871dbdde7SJohannes Weiner 	NR_PSI_TASK_COUNTS = 4,
29eb414681SJohannes Weiner };
30eb414681SJohannes Weiner 
31eb414681SJohannes Weiner /* Task state bitmasks */
32eb414681SJohannes Weiner #define TSK_IOWAIT	(1 << NR_IOWAIT)
33eb414681SJohannes Weiner #define TSK_MEMSTALL	(1 << NR_MEMSTALL)
34eb414681SJohannes Weiner #define TSK_RUNNING	(1 << NR_RUNNING)
35cb0e52b7SBrian Chen #define TSK_MEMSTALL_RUNNING	(1 << NR_MEMSTALL_RUNNING)
36eb414681SJohannes Weiner 
3771dbdde7SJohannes Weiner /* Only one task can be scheduled, no corresponding task count */
3871dbdde7SJohannes Weiner #define TSK_ONCPU	(1 << NR_PSI_TASK_COUNTS)
3971dbdde7SJohannes Weiner 
40eb414681SJohannes Weiner /* Resources that workloads could be stalled on */
41eb414681SJohannes Weiner enum psi_res {
42eb414681SJohannes Weiner 	PSI_IO,
43eb414681SJohannes Weiner 	PSI_MEM,
44eb414681SJohannes Weiner 	PSI_CPU,
4552b1364bSChengming Zhou #ifdef CONFIG_IRQ_TIME_ACCOUNTING
4652b1364bSChengming Zhou 	PSI_IRQ,
4752b1364bSChengming Zhou #endif
4852b1364bSChengming Zhou 	NR_PSI_RESOURCES,
49eb414681SJohannes Weiner };
50eb414681SJohannes Weiner 
51eb414681SJohannes Weiner /*
52eb414681SJohannes Weiner  * Pressure states for each resource:
53eb414681SJohannes Weiner  *
54eb414681SJohannes Weiner  * SOME: Stalled tasks & working tasks
55eb414681SJohannes Weiner  * FULL: Stalled tasks & no working tasks
56eb414681SJohannes Weiner  */
57eb414681SJohannes Weiner enum psi_states {
58eb414681SJohannes Weiner 	PSI_IO_SOME,
59eb414681SJohannes Weiner 	PSI_IO_FULL,
60eb414681SJohannes Weiner 	PSI_MEM_SOME,
61eb414681SJohannes Weiner 	PSI_MEM_FULL,
62eb414681SJohannes Weiner 	PSI_CPU_SOME,
63e7fcd762SChengming Zhou 	PSI_CPU_FULL,
6452b1364bSChengming Zhou #ifdef CONFIG_IRQ_TIME_ACCOUNTING
6552b1364bSChengming Zhou 	PSI_IRQ_FULL,
6652b1364bSChengming Zhou #endif
67eb414681SJohannes Weiner 	/* Only per-CPU, to weigh the CPU in the global average: */
68eb414681SJohannes Weiner 	PSI_NONIDLE,
6952b1364bSChengming Zhou 	NR_PSI_STATES,
70eb414681SJohannes Weiner };
71eb414681SJohannes Weiner 
7271dbdde7SJohannes Weiner /* Use one bit in the state mask to track TSK_ONCPU */
7371dbdde7SJohannes Weiner #define PSI_ONCPU	(1 << NR_PSI_STATES)
7471dbdde7SJohannes Weiner 
752fcd7bbaSChengming Zhou /* Flag whether to re-arm avgs_work, see details in get_recent_times() */
762fcd7bbaSChengming Zhou #define PSI_STATE_RESCHEDULE	(1 << (NR_PSI_STATES + 1))
772fcd7bbaSChengming Zhou 
780e94682bSSuren Baghdasaryan enum psi_aggregators {
790e94682bSSuren Baghdasaryan 	PSI_AVGS = 0,
800e94682bSSuren Baghdasaryan 	PSI_POLL,
810e94682bSSuren Baghdasaryan 	NR_PSI_AGGREGATORS,
820e94682bSSuren Baghdasaryan };
830e94682bSSuren Baghdasaryan 
84eb414681SJohannes Weiner struct psi_group_cpu {
85eb414681SJohannes Weiner 	/* 1st cacheline updated by the scheduler */
86eb414681SJohannes Weiner 
87eb414681SJohannes Weiner 	/* Aggregator needs to know of concurrent changes */
88eb414681SJohannes Weiner 	seqcount_t seq ____cacheline_aligned_in_smp;
89eb414681SJohannes Weiner 
90eb414681SJohannes Weiner 	/* States of the tasks belonging to this group */
91eb414681SJohannes Weiner 	unsigned int tasks[NR_PSI_TASK_COUNTS];
92eb414681SJohannes Weiner 
9333b2d630SSuren Baghdasaryan 	/* Aggregate pressure state derived from the tasks */
9433b2d630SSuren Baghdasaryan 	u32 state_mask;
9533b2d630SSuren Baghdasaryan 
96eb414681SJohannes Weiner 	/* Period time sampling buckets for each state of interest (ns) */
97eb414681SJohannes Weiner 	u32 times[NR_PSI_STATES];
98eb414681SJohannes Weiner 
99eb414681SJohannes Weiner 	/* Time of last task change in this group (rq_clock) */
100eb414681SJohannes Weiner 	u64 state_start;
101eb414681SJohannes Weiner 
102eb414681SJohannes Weiner 	/* 2nd cacheline updated by the aggregator */
103eb414681SJohannes Weiner 
104eb414681SJohannes Weiner 	/* Delta detection against the sampling buckets */
1050e94682bSSuren Baghdasaryan 	u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
1060e94682bSSuren Baghdasaryan 			____cacheline_aligned_in_smp;
1070e94682bSSuren Baghdasaryan };
1080e94682bSSuren Baghdasaryan 
1090e94682bSSuren Baghdasaryan /* PSI growth tracking window */
1100e94682bSSuren Baghdasaryan struct psi_window {
1110e94682bSSuren Baghdasaryan 	/* Window size in ns */
1120e94682bSSuren Baghdasaryan 	u64 size;
1130e94682bSSuren Baghdasaryan 
1140e94682bSSuren Baghdasaryan 	/* Start time of the current window in ns */
1150e94682bSSuren Baghdasaryan 	u64 start_time;
1160e94682bSSuren Baghdasaryan 
1170e94682bSSuren Baghdasaryan 	/* Value at the start of the window */
1180e94682bSSuren Baghdasaryan 	u64 start_value;
1190e94682bSSuren Baghdasaryan 
1200e94682bSSuren Baghdasaryan 	/* Value growth in the previous window */
1210e94682bSSuren Baghdasaryan 	u64 prev_growth;
1220e94682bSSuren Baghdasaryan };
1230e94682bSSuren Baghdasaryan 
1240e94682bSSuren Baghdasaryan struct psi_trigger {
1250e94682bSSuren Baghdasaryan 	/* PSI state being monitored by the trigger */
1260e94682bSSuren Baghdasaryan 	enum psi_states state;
1270e94682bSSuren Baghdasaryan 
1280e94682bSSuren Baghdasaryan 	/* User-spacified threshold in ns */
1290e94682bSSuren Baghdasaryan 	u64 threshold;
1300e94682bSSuren Baghdasaryan 
1310e94682bSSuren Baghdasaryan 	/* List node inside triggers list */
1320e94682bSSuren Baghdasaryan 	struct list_head node;
1330e94682bSSuren Baghdasaryan 
1340e94682bSSuren Baghdasaryan 	/* Backpointer needed during trigger destruction */
1350e94682bSSuren Baghdasaryan 	struct psi_group *group;
1360e94682bSSuren Baghdasaryan 
1370e94682bSSuren Baghdasaryan 	/* Wait queue for polling */
1380e94682bSSuren Baghdasaryan 	wait_queue_head_t event_wait;
1390e94682bSSuren Baghdasaryan 
140*aff03707SSuren Baghdasaryan 	/* Kernfs file for cgroup triggers */
141*aff03707SSuren Baghdasaryan 	struct kernfs_open_file *of;
142*aff03707SSuren Baghdasaryan 
1430e94682bSSuren Baghdasaryan 	/* Pending event flag */
1440e94682bSSuren Baghdasaryan 	int event;
1450e94682bSSuren Baghdasaryan 
1460e94682bSSuren Baghdasaryan 	/* Tracking window */
1470e94682bSSuren Baghdasaryan 	struct psi_window win;
1480e94682bSSuren Baghdasaryan 
1490e94682bSSuren Baghdasaryan 	/*
1500e94682bSSuren Baghdasaryan 	 * Time last event was generated. Used for rate-limiting
1510e94682bSSuren Baghdasaryan 	 * events to one per window
1520e94682bSSuren Baghdasaryan 	 */
1530e94682bSSuren Baghdasaryan 	u64 last_event_time;
1540e94682bSSuren Baghdasaryan 
155e6df4eadSZhaoyang Huang 	/* Deferred event(s) from previous ratelimit window */
156e6df4eadSZhaoyang Huang 	bool pending_event;
157d82caa27SDomenico Cerasuolo 
158d82caa27SDomenico Cerasuolo 	/* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */
159d82caa27SDomenico Cerasuolo 	enum psi_aggregators aggregator;
160eb414681SJohannes Weiner };
161eb414681SJohannes Weiner 
162eb414681SJohannes Weiner struct psi_group {
163dc86aba7SChengming Zhou 	struct psi_group *parent;
16434f26a15SChengming Zhou 	bool enabled;
165dc86aba7SChengming Zhou 
166bcc78db6SSuren Baghdasaryan 	/* Protects data used by the aggregator */
167bcc78db6SSuren Baghdasaryan 	struct mutex avgs_lock;
168eb414681SJohannes Weiner 
169eb414681SJohannes Weiner 	/* Per-cpu task state & time tracking */
170eb414681SJohannes Weiner 	struct psi_group_cpu __percpu *pcpu;
171eb414681SJohannes Weiner 
172bcc78db6SSuren Baghdasaryan 	/* Running pressure averages */
173bcc78db6SSuren Baghdasaryan 	u64 avg_total[NR_PSI_STATES - 1];
174bcc78db6SSuren Baghdasaryan 	u64 avg_last_update;
175bcc78db6SSuren Baghdasaryan 	u64 avg_next_update;
1760e94682bSSuren Baghdasaryan 
1770e94682bSSuren Baghdasaryan 	/* Aggregator work control */
178bcc78db6SSuren Baghdasaryan 	struct delayed_work avgs_work;
179eb414681SJohannes Weiner 
180d82caa27SDomenico Cerasuolo 	/* Unprivileged triggers against N*PSI_FREQ windows */
181d82caa27SDomenico Cerasuolo 	struct list_head avg_triggers;
182d82caa27SDomenico Cerasuolo 	u32 avg_nr_triggers[NR_PSI_STATES - 1];
183d82caa27SDomenico Cerasuolo 
184eb414681SJohannes Weiner 	/* Total stall times and sampled pressure averages */
1850e94682bSSuren Baghdasaryan 	u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
186eb414681SJohannes Weiner 	unsigned long avg[NR_PSI_STATES - 1][3];
1870e94682bSSuren Baghdasaryan 
18865457b74SDomenico Cerasuolo 	/* Monitor RT polling work control */
18965457b74SDomenico Cerasuolo 	struct task_struct __rcu *rtpoll_task;
19065457b74SDomenico Cerasuolo 	struct timer_list rtpoll_timer;
19165457b74SDomenico Cerasuolo 	wait_queue_head_t rtpoll_wait;
19265457b74SDomenico Cerasuolo 	atomic_t rtpoll_wakeup;
19365457b74SDomenico Cerasuolo 	atomic_t rtpoll_scheduled;
1940e94682bSSuren Baghdasaryan 
1950e94682bSSuren Baghdasaryan 	/* Protects data used by the monitor */
19665457b74SDomenico Cerasuolo 	struct mutex rtpoll_trigger_lock;
1970e94682bSSuren Baghdasaryan 
19865457b74SDomenico Cerasuolo 	/* Configured RT polling triggers */
19965457b74SDomenico Cerasuolo 	struct list_head rtpoll_triggers;
20065457b74SDomenico Cerasuolo 	u32 rtpoll_nr_triggers[NR_PSI_STATES - 1];
20165457b74SDomenico Cerasuolo 	u32 rtpoll_states;
20265457b74SDomenico Cerasuolo 	u64 rtpoll_min_period;
2030e94682bSSuren Baghdasaryan 
20465457b74SDomenico Cerasuolo 	/* Total stall times at the start of RT polling monitor activation */
20565457b74SDomenico Cerasuolo 	u64 rtpoll_total[NR_PSI_STATES - 1];
20665457b74SDomenico Cerasuolo 	u64 rtpoll_next_update;
20765457b74SDomenico Cerasuolo 	u64 rtpoll_until;
208eb414681SJohannes Weiner };
209eb414681SJohannes Weiner 
210eb414681SJohannes Weiner #else /* CONFIG_PSI */
211eb414681SJohannes Weiner 
21234f26a15SChengming Zhou #define NR_PSI_RESOURCES	0
21334f26a15SChengming Zhou 
214eb414681SJohannes Weiner struct psi_group { };
215eb414681SJohannes Weiner 
216eb414681SJohannes Weiner #endif /* CONFIG_PSI */
217eb414681SJohannes Weiner 
218eb414681SJohannes Weiner #endif /* _LINUX_PSI_TYPES_H */
219