12fb75e1bSLiu Xinpeng /* SPDX-License-Identifier: GPL-2.0 */ 2eb414681SJohannes Weiner #ifndef _LINUX_PSI_TYPES_H 3eb414681SJohannes Weiner #define _LINUX_PSI_TYPES_H 4eb414681SJohannes Weiner 50e94682bSSuren Baghdasaryan #include <linux/kthread.h> 6eb414681SJohannes Weiner #include <linux/seqlock.h> 7eb414681SJohannes Weiner #include <linux/types.h> 80e94682bSSuren Baghdasaryan #include <linux/kref.h> 90e94682bSSuren Baghdasaryan #include <linux/wait.h> 10eb414681SJohannes Weiner 11eb414681SJohannes Weiner #ifdef CONFIG_PSI 12eb414681SJohannes Weiner 13eb414681SJohannes Weiner /* Tracked task states */ 14eb414681SJohannes Weiner enum psi_task_count { 15eb414681SJohannes Weiner NR_IOWAIT, 16eb414681SJohannes Weiner NR_MEMSTALL, 17eb414681SJohannes Weiner NR_RUNNING, 18b05e75d6SJohannes Weiner /* 19cb0e52b7SBrian Chen * For IO and CPU stalls the presence of running/oncpu tasks 20cb0e52b7SBrian Chen * in the domain means a partial rather than a full stall. 21cb0e52b7SBrian Chen * For memory it's not so simple because of page reclaimers: 22cb0e52b7SBrian Chen * they are running/oncpu while representing a stall. To tell 23cb0e52b7SBrian Chen * whether a domain has productivity left or not, we need to 24cb0e52b7SBrian Chen * distinguish between regular running (i.e. productive) 25cb0e52b7SBrian Chen * threads and memstall ones. 26cb0e52b7SBrian Chen */ 27cb0e52b7SBrian Chen NR_MEMSTALL_RUNNING, 2871dbdde7SJohannes Weiner NR_PSI_TASK_COUNTS = 4, 29eb414681SJohannes Weiner }; 30eb414681SJohannes Weiner 31eb414681SJohannes Weiner /* Task state bitmasks */ 32eb414681SJohannes Weiner #define TSK_IOWAIT (1 << NR_IOWAIT) 33eb414681SJohannes Weiner #define TSK_MEMSTALL (1 << NR_MEMSTALL) 34eb414681SJohannes Weiner #define TSK_RUNNING (1 << NR_RUNNING) 35cb0e52b7SBrian Chen #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) 36eb414681SJohannes Weiner 3771dbdde7SJohannes Weiner /* Only one task can be scheduled, no corresponding task count */ 3871dbdde7SJohannes Weiner #define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS) 3971dbdde7SJohannes Weiner 40eb414681SJohannes Weiner /* Resources that workloads could be stalled on */ 41eb414681SJohannes Weiner enum psi_res { 42eb414681SJohannes Weiner PSI_IO, 43eb414681SJohannes Weiner PSI_MEM, 44eb414681SJohannes Weiner PSI_CPU, 4552b1364bSChengming Zhou #ifdef CONFIG_IRQ_TIME_ACCOUNTING 4652b1364bSChengming Zhou PSI_IRQ, 4752b1364bSChengming Zhou #endif 4852b1364bSChengming Zhou NR_PSI_RESOURCES, 49eb414681SJohannes Weiner }; 50eb414681SJohannes Weiner 51eb414681SJohannes Weiner /* 52eb414681SJohannes Weiner * Pressure states for each resource: 53eb414681SJohannes Weiner * 54eb414681SJohannes Weiner * SOME: Stalled tasks & working tasks 55eb414681SJohannes Weiner * FULL: Stalled tasks & no working tasks 56eb414681SJohannes Weiner */ 57eb414681SJohannes Weiner enum psi_states { 58eb414681SJohannes Weiner PSI_IO_SOME, 59eb414681SJohannes Weiner PSI_IO_FULL, 60eb414681SJohannes Weiner PSI_MEM_SOME, 61eb414681SJohannes Weiner PSI_MEM_FULL, 62eb414681SJohannes Weiner PSI_CPU_SOME, 63e7fcd762SChengming Zhou PSI_CPU_FULL, 6452b1364bSChengming Zhou #ifdef CONFIG_IRQ_TIME_ACCOUNTING 6552b1364bSChengming Zhou PSI_IRQ_FULL, 6652b1364bSChengming Zhou #endif 67eb414681SJohannes Weiner /* Only per-CPU, to weigh the CPU in the global average: */ 68eb414681SJohannes Weiner PSI_NONIDLE, 6952b1364bSChengming Zhou NR_PSI_STATES, 70eb414681SJohannes Weiner }; 71eb414681SJohannes Weiner 7271dbdde7SJohannes Weiner /* Use one bit in the state mask to track TSK_ONCPU */ 7371dbdde7SJohannes Weiner #define PSI_ONCPU (1 << NR_PSI_STATES) 7471dbdde7SJohannes Weiner 752fcd7bbaSChengming Zhou /* Flag whether to re-arm avgs_work, see details in get_recent_times() */ 762fcd7bbaSChengming Zhou #define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1)) 772fcd7bbaSChengming Zhou 780e94682bSSuren Baghdasaryan enum psi_aggregators { 790e94682bSSuren Baghdasaryan PSI_AVGS = 0, 800e94682bSSuren Baghdasaryan PSI_POLL, 810e94682bSSuren Baghdasaryan NR_PSI_AGGREGATORS, 820e94682bSSuren Baghdasaryan }; 830e94682bSSuren Baghdasaryan 84eb414681SJohannes Weiner struct psi_group_cpu { 85eb414681SJohannes Weiner /* 1st cacheline updated by the scheduler */ 86eb414681SJohannes Weiner 87eb414681SJohannes Weiner /* Aggregator needs to know of concurrent changes */ 88eb414681SJohannes Weiner seqcount_t seq ____cacheline_aligned_in_smp; 89eb414681SJohannes Weiner 90eb414681SJohannes Weiner /* States of the tasks belonging to this group */ 91eb414681SJohannes Weiner unsigned int tasks[NR_PSI_TASK_COUNTS]; 92eb414681SJohannes Weiner 9333b2d630SSuren Baghdasaryan /* Aggregate pressure state derived from the tasks */ 9433b2d630SSuren Baghdasaryan u32 state_mask; 9533b2d630SSuren Baghdasaryan 96eb414681SJohannes Weiner /* Period time sampling buckets for each state of interest (ns) */ 97eb414681SJohannes Weiner u32 times[NR_PSI_STATES]; 98eb414681SJohannes Weiner 99eb414681SJohannes Weiner /* Time of last task change in this group (rq_clock) */ 100eb414681SJohannes Weiner u64 state_start; 101eb414681SJohannes Weiner 102eb414681SJohannes Weiner /* 2nd cacheline updated by the aggregator */ 103eb414681SJohannes Weiner 104eb414681SJohannes Weiner /* Delta detection against the sampling buckets */ 1050e94682bSSuren Baghdasaryan u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES] 1060e94682bSSuren Baghdasaryan ____cacheline_aligned_in_smp; 1070e94682bSSuren Baghdasaryan }; 1080e94682bSSuren Baghdasaryan 1090e94682bSSuren Baghdasaryan /* PSI growth tracking window */ 1100e94682bSSuren Baghdasaryan struct psi_window { 1110e94682bSSuren Baghdasaryan /* Window size in ns */ 1120e94682bSSuren Baghdasaryan u64 size; 1130e94682bSSuren Baghdasaryan 1140e94682bSSuren Baghdasaryan /* Start time of the current window in ns */ 1150e94682bSSuren Baghdasaryan u64 start_time; 1160e94682bSSuren Baghdasaryan 1170e94682bSSuren Baghdasaryan /* Value at the start of the window */ 1180e94682bSSuren Baghdasaryan u64 start_value; 1190e94682bSSuren Baghdasaryan 1200e94682bSSuren Baghdasaryan /* Value growth in the previous window */ 1210e94682bSSuren Baghdasaryan u64 prev_growth; 1220e94682bSSuren Baghdasaryan }; 1230e94682bSSuren Baghdasaryan 1240e94682bSSuren Baghdasaryan struct psi_trigger { 1250e94682bSSuren Baghdasaryan /* PSI state being monitored by the trigger */ 1260e94682bSSuren Baghdasaryan enum psi_states state; 1270e94682bSSuren Baghdasaryan 1280e94682bSSuren Baghdasaryan /* User-spacified threshold in ns */ 1290e94682bSSuren Baghdasaryan u64 threshold; 1300e94682bSSuren Baghdasaryan 1310e94682bSSuren Baghdasaryan /* List node inside triggers list */ 1320e94682bSSuren Baghdasaryan struct list_head node; 1330e94682bSSuren Baghdasaryan 1340e94682bSSuren Baghdasaryan /* Backpointer needed during trigger destruction */ 1350e94682bSSuren Baghdasaryan struct psi_group *group; 1360e94682bSSuren Baghdasaryan 1370e94682bSSuren Baghdasaryan /* Wait queue for polling */ 1380e94682bSSuren Baghdasaryan wait_queue_head_t event_wait; 1390e94682bSSuren Baghdasaryan 140*aff03707SSuren Baghdasaryan /* Kernfs file for cgroup triggers */ 141*aff03707SSuren Baghdasaryan struct kernfs_open_file *of; 142*aff03707SSuren Baghdasaryan 1430e94682bSSuren Baghdasaryan /* Pending event flag */ 1440e94682bSSuren Baghdasaryan int event; 1450e94682bSSuren Baghdasaryan 1460e94682bSSuren Baghdasaryan /* Tracking window */ 1470e94682bSSuren Baghdasaryan struct psi_window win; 1480e94682bSSuren Baghdasaryan 1490e94682bSSuren Baghdasaryan /* 1500e94682bSSuren Baghdasaryan * Time last event was generated. Used for rate-limiting 1510e94682bSSuren Baghdasaryan * events to one per window 1520e94682bSSuren Baghdasaryan */ 1530e94682bSSuren Baghdasaryan u64 last_event_time; 1540e94682bSSuren Baghdasaryan 155e6df4eadSZhaoyang Huang /* Deferred event(s) from previous ratelimit window */ 156e6df4eadSZhaoyang Huang bool pending_event; 157d82caa27SDomenico Cerasuolo 158d82caa27SDomenico Cerasuolo /* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */ 159d82caa27SDomenico Cerasuolo enum psi_aggregators aggregator; 160eb414681SJohannes Weiner }; 161eb414681SJohannes Weiner 162eb414681SJohannes Weiner struct psi_group { 163dc86aba7SChengming Zhou struct psi_group *parent; 16434f26a15SChengming Zhou bool enabled; 165dc86aba7SChengming Zhou 166bcc78db6SSuren Baghdasaryan /* Protects data used by the aggregator */ 167bcc78db6SSuren Baghdasaryan struct mutex avgs_lock; 168eb414681SJohannes Weiner 169eb414681SJohannes Weiner /* Per-cpu task state & time tracking */ 170eb414681SJohannes Weiner struct psi_group_cpu __percpu *pcpu; 171eb414681SJohannes Weiner 172bcc78db6SSuren Baghdasaryan /* Running pressure averages */ 173bcc78db6SSuren Baghdasaryan u64 avg_total[NR_PSI_STATES - 1]; 174bcc78db6SSuren Baghdasaryan u64 avg_last_update; 175bcc78db6SSuren Baghdasaryan u64 avg_next_update; 1760e94682bSSuren Baghdasaryan 1770e94682bSSuren Baghdasaryan /* Aggregator work control */ 178bcc78db6SSuren Baghdasaryan struct delayed_work avgs_work; 179eb414681SJohannes Weiner 180d82caa27SDomenico Cerasuolo /* Unprivileged triggers against N*PSI_FREQ windows */ 181d82caa27SDomenico Cerasuolo struct list_head avg_triggers; 182d82caa27SDomenico Cerasuolo u32 avg_nr_triggers[NR_PSI_STATES - 1]; 183d82caa27SDomenico Cerasuolo 184eb414681SJohannes Weiner /* Total stall times and sampled pressure averages */ 1850e94682bSSuren Baghdasaryan u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1]; 186eb414681SJohannes Weiner unsigned long avg[NR_PSI_STATES - 1][3]; 1870e94682bSSuren Baghdasaryan 18865457b74SDomenico Cerasuolo /* Monitor RT polling work control */ 18965457b74SDomenico Cerasuolo struct task_struct __rcu *rtpoll_task; 19065457b74SDomenico Cerasuolo struct timer_list rtpoll_timer; 19165457b74SDomenico Cerasuolo wait_queue_head_t rtpoll_wait; 19265457b74SDomenico Cerasuolo atomic_t rtpoll_wakeup; 19365457b74SDomenico Cerasuolo atomic_t rtpoll_scheduled; 1940e94682bSSuren Baghdasaryan 1950e94682bSSuren Baghdasaryan /* Protects data used by the monitor */ 19665457b74SDomenico Cerasuolo struct mutex rtpoll_trigger_lock; 1970e94682bSSuren Baghdasaryan 19865457b74SDomenico Cerasuolo /* Configured RT polling triggers */ 19965457b74SDomenico Cerasuolo struct list_head rtpoll_triggers; 20065457b74SDomenico Cerasuolo u32 rtpoll_nr_triggers[NR_PSI_STATES - 1]; 20165457b74SDomenico Cerasuolo u32 rtpoll_states; 20265457b74SDomenico Cerasuolo u64 rtpoll_min_period; 2030e94682bSSuren Baghdasaryan 20465457b74SDomenico Cerasuolo /* Total stall times at the start of RT polling monitor activation */ 20565457b74SDomenico Cerasuolo u64 rtpoll_total[NR_PSI_STATES - 1]; 20665457b74SDomenico Cerasuolo u64 rtpoll_next_update; 20765457b74SDomenico Cerasuolo u64 rtpoll_until; 208eb414681SJohannes Weiner }; 209eb414681SJohannes Weiner 210eb414681SJohannes Weiner #else /* CONFIG_PSI */ 211eb414681SJohannes Weiner 21234f26a15SChengming Zhou #define NR_PSI_RESOURCES 0 21334f26a15SChengming Zhou 214eb414681SJohannes Weiner struct psi_group { }; 215eb414681SJohannes Weiner 216eb414681SJohannes Weiner #endif /* CONFIG_PSI */ 217eb414681SJohannes Weiner 218eb414681SJohannes Weiner #endif /* _LINUX_PSI_TYPES_H */ 219