xref: /linux-6.15/include/linux/psi_types.h (revision 66cd9d4e)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_PSI_TYPES_H
3 #define _LINUX_PSI_TYPES_H
4 
5 #include <linux/kthread.h>
6 #include <linux/seqlock.h>
7 #include <linux/types.h>
8 #include <linux/kref.h>
9 #include <linux/wait.h>
10 
11 #ifdef CONFIG_PSI
12 
13 /* Tracked task states */
14 enum psi_task_count {
15 	NR_IOWAIT,
16 	NR_MEMSTALL,
17 	NR_RUNNING,
18 	/*
19 	 * This can't have values other than 0 or 1 and could be
20 	 * implemented as a bit flag. But for now we still have room
21 	 * in the first cacheline of psi_group_cpu, and this way we
22 	 * don't have to special case any state tracking for it.
23 	 */
24 	NR_ONCPU,
25 	/*
26 	 * For IO and CPU stalls the presence of running/oncpu tasks
27 	 * in the domain means a partial rather than a full stall.
28 	 * For memory it's not so simple because of page reclaimers:
29 	 * they are running/oncpu while representing a stall. To tell
30 	 * whether a domain has productivity left or not, we need to
31 	 * distinguish between regular running (i.e. productive)
32 	 * threads and memstall ones.
33 	 */
34 	NR_MEMSTALL_RUNNING,
35 	NR_PSI_TASK_COUNTS = 5,
36 };
37 
38 /* Task state bitmasks */
39 #define TSK_IOWAIT	(1 << NR_IOWAIT)
40 #define TSK_MEMSTALL	(1 << NR_MEMSTALL)
41 #define TSK_RUNNING	(1 << NR_RUNNING)
42 #define TSK_ONCPU	(1 << NR_ONCPU)
43 #define TSK_MEMSTALL_RUNNING	(1 << NR_MEMSTALL_RUNNING)
44 
45 /* Resources that workloads could be stalled on */
46 enum psi_res {
47 	PSI_IO,
48 	PSI_MEM,
49 	PSI_CPU,
50 	NR_PSI_RESOURCES = 3,
51 };
52 
53 /*
54  * Pressure states for each resource:
55  *
56  * SOME: Stalled tasks & working tasks
57  * FULL: Stalled tasks & no working tasks
58  */
59 enum psi_states {
60 	PSI_IO_SOME,
61 	PSI_IO_FULL,
62 	PSI_MEM_SOME,
63 	PSI_MEM_FULL,
64 	PSI_CPU_SOME,
65 	PSI_CPU_FULL,
66 	/* Only per-CPU, to weigh the CPU in the global average: */
67 	PSI_NONIDLE,
68 	NR_PSI_STATES = 7,
69 };
70 
71 enum psi_aggregators {
72 	PSI_AVGS = 0,
73 	PSI_POLL,
74 	NR_PSI_AGGREGATORS,
75 };
76 
77 struct psi_group_cpu {
78 	/* 1st cacheline updated by the scheduler */
79 
80 	/* Aggregator needs to know of concurrent changes */
81 	seqcount_t seq ____cacheline_aligned_in_smp;
82 
83 	/* States of the tasks belonging to this group */
84 	unsigned int tasks[NR_PSI_TASK_COUNTS];
85 
86 	/* Aggregate pressure state derived from the tasks */
87 	u32 state_mask;
88 
89 	/* Period time sampling buckets for each state of interest (ns) */
90 	u32 times[NR_PSI_STATES];
91 
92 	/* Time of last task change in this group (rq_clock) */
93 	u64 state_start;
94 
95 	/* 2nd cacheline updated by the aggregator */
96 
97 	/* Delta detection against the sampling buckets */
98 	u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
99 			____cacheline_aligned_in_smp;
100 };
101 
102 /* PSI growth tracking window */
103 struct psi_window {
104 	/* Window size in ns */
105 	u64 size;
106 
107 	/* Start time of the current window in ns */
108 	u64 start_time;
109 
110 	/* Value at the start of the window */
111 	u64 start_value;
112 
113 	/* Value growth in the previous window */
114 	u64 prev_growth;
115 };
116 
117 struct psi_trigger {
118 	/* PSI state being monitored by the trigger */
119 	enum psi_states state;
120 
121 	/* User-spacified threshold in ns */
122 	u64 threshold;
123 
124 	/* List node inside triggers list */
125 	struct list_head node;
126 
127 	/* Backpointer needed during trigger destruction */
128 	struct psi_group *group;
129 
130 	/* Wait queue for polling */
131 	wait_queue_head_t event_wait;
132 
133 	/* Pending event flag */
134 	int event;
135 
136 	/* Tracking window */
137 	struct psi_window win;
138 
139 	/*
140 	 * Time last event was generated. Used for rate-limiting
141 	 * events to one per window
142 	 */
143 	u64 last_event_time;
144 
145 	/* Deferred event(s) from previous ratelimit window */
146 	bool pending_event;
147 };
148 
149 struct psi_group {
150 	/* Protects data used by the aggregator */
151 	struct mutex avgs_lock;
152 
153 	/* Per-cpu task state & time tracking */
154 	struct psi_group_cpu __percpu *pcpu;
155 
156 	/* Running pressure averages */
157 	u64 avg_total[NR_PSI_STATES - 1];
158 	u64 avg_last_update;
159 	u64 avg_next_update;
160 
161 	/* Aggregator work control */
162 	struct delayed_work avgs_work;
163 
164 	/* Total stall times and sampled pressure averages */
165 	u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
166 	unsigned long avg[NR_PSI_STATES - 1][3];
167 
168 	/* Monitor work control */
169 	struct task_struct __rcu *poll_task;
170 	struct timer_list poll_timer;
171 	wait_queue_head_t poll_wait;
172 	atomic_t poll_wakeup;
173 
174 	/* Protects data used by the monitor */
175 	struct mutex trigger_lock;
176 
177 	/* Configured polling triggers */
178 	struct list_head triggers;
179 	u32 nr_triggers[NR_PSI_STATES - 1];
180 	u32 poll_states;
181 	u64 poll_min_period;
182 
183 	/* Total stall times at the start of monitor activation */
184 	u64 polling_total[NR_PSI_STATES - 1];
185 	u64 polling_next_update;
186 	u64 polling_until;
187 };
188 
189 #else /* CONFIG_PSI */
190 
191 struct psi_group { };
192 
193 #endif /* CONFIG_PSI */
194 
195 #endif /* _LINUX_PSI_TYPES_H */
196