xref: /linux-6.15/include/linux/psi_types.h (revision 2fb75e1b)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_PSI_TYPES_H
3 #define _LINUX_PSI_TYPES_H
4 
5 #include <linux/kthread.h>
6 #include <linux/seqlock.h>
7 #include <linux/types.h>
8 #include <linux/kref.h>
9 #include <linux/wait.h>
10 
11 #ifdef CONFIG_PSI
12 
13 /* Tracked task states */
14 enum psi_task_count {
15 	NR_IOWAIT,
16 	NR_MEMSTALL,
17 	NR_RUNNING,
18 	/*
19 	 * This can't have values other than 0 or 1 and could be
20 	 * implemented as a bit flag. But for now we still have room
21 	 * in the first cacheline of psi_group_cpu, and this way we
22 	 * don't have to special case any state tracking for it.
23 	 */
24 	NR_ONCPU,
25 	NR_PSI_TASK_COUNTS = 4,
26 };
27 
28 /* Task state bitmasks */
29 #define TSK_IOWAIT	(1 << NR_IOWAIT)
30 #define TSK_MEMSTALL	(1 << NR_MEMSTALL)
31 #define TSK_RUNNING	(1 << NR_RUNNING)
32 #define TSK_ONCPU	(1 << NR_ONCPU)
33 
34 /* Resources that workloads could be stalled on */
35 enum psi_res {
36 	PSI_IO,
37 	PSI_MEM,
38 	PSI_CPU,
39 	NR_PSI_RESOURCES = 3,
40 };
41 
42 /*
43  * Pressure states for each resource:
44  *
45  * SOME: Stalled tasks & working tasks
46  * FULL: Stalled tasks & no working tasks
47  */
48 enum psi_states {
49 	PSI_IO_SOME,
50 	PSI_IO_FULL,
51 	PSI_MEM_SOME,
52 	PSI_MEM_FULL,
53 	PSI_CPU_SOME,
54 	PSI_CPU_FULL,
55 	/* Only per-CPU, to weigh the CPU in the global average: */
56 	PSI_NONIDLE,
57 	NR_PSI_STATES = 7,
58 };
59 
60 enum psi_aggregators {
61 	PSI_AVGS = 0,
62 	PSI_POLL,
63 	NR_PSI_AGGREGATORS,
64 };
65 
66 struct psi_group_cpu {
67 	/* 1st cacheline updated by the scheduler */
68 
69 	/* Aggregator needs to know of concurrent changes */
70 	seqcount_t seq ____cacheline_aligned_in_smp;
71 
72 	/* States of the tasks belonging to this group */
73 	unsigned int tasks[NR_PSI_TASK_COUNTS];
74 
75 	/* Aggregate pressure state derived from the tasks */
76 	u32 state_mask;
77 
78 	/* Period time sampling buckets for each state of interest (ns) */
79 	u32 times[NR_PSI_STATES];
80 
81 	/* Time of last task change in this group (rq_clock) */
82 	u64 state_start;
83 
84 	/* 2nd cacheline updated by the aggregator */
85 
86 	/* Delta detection against the sampling buckets */
87 	u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
88 			____cacheline_aligned_in_smp;
89 };
90 
91 /* PSI growth tracking window */
92 struct psi_window {
93 	/* Window size in ns */
94 	u64 size;
95 
96 	/* Start time of the current window in ns */
97 	u64 start_time;
98 
99 	/* Value at the start of the window */
100 	u64 start_value;
101 
102 	/* Value growth in the previous window */
103 	u64 prev_growth;
104 };
105 
106 struct psi_trigger {
107 	/* PSI state being monitored by the trigger */
108 	enum psi_states state;
109 
110 	/* User-spacified threshold in ns */
111 	u64 threshold;
112 
113 	/* List node inside triggers list */
114 	struct list_head node;
115 
116 	/* Backpointer needed during trigger destruction */
117 	struct psi_group *group;
118 
119 	/* Wait queue for polling */
120 	wait_queue_head_t event_wait;
121 
122 	/* Pending event flag */
123 	int event;
124 
125 	/* Tracking window */
126 	struct psi_window win;
127 
128 	/*
129 	 * Time last event was generated. Used for rate-limiting
130 	 * events to one per window
131 	 */
132 	u64 last_event_time;
133 
134 	/* Refcounting to prevent premature destruction */
135 	struct kref refcount;
136 };
137 
138 struct psi_group {
139 	/* Protects data used by the aggregator */
140 	struct mutex avgs_lock;
141 
142 	/* Per-cpu task state & time tracking */
143 	struct psi_group_cpu __percpu *pcpu;
144 
145 	/* Running pressure averages */
146 	u64 avg_total[NR_PSI_STATES - 1];
147 	u64 avg_last_update;
148 	u64 avg_next_update;
149 
150 	/* Aggregator work control */
151 	struct delayed_work avgs_work;
152 
153 	/* Total stall times and sampled pressure averages */
154 	u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
155 	unsigned long avg[NR_PSI_STATES - 1][3];
156 
157 	/* Monitor work control */
158 	struct task_struct __rcu *poll_task;
159 	struct timer_list poll_timer;
160 	wait_queue_head_t poll_wait;
161 	atomic_t poll_wakeup;
162 
163 	/* Protects data used by the monitor */
164 	struct mutex trigger_lock;
165 
166 	/* Configured polling triggers */
167 	struct list_head triggers;
168 	u32 nr_triggers[NR_PSI_STATES - 1];
169 	u32 poll_states;
170 	u64 poll_min_period;
171 
172 	/* Total stall times at the start of monitor activation */
173 	u64 polling_total[NR_PSI_STATES - 1];
174 	u64 polling_next_update;
175 	u64 polling_until;
176 };
177 
178 #else /* CONFIG_PSI */
179 
180 struct psi_group { };
181 
182 #endif /* CONFIG_PSI */
183 
184 #endif /* _LINUX_PSI_TYPES_H */
185