1 #ifndef _LINUX_PSI_TYPES_H
2 #define _LINUX_PSI_TYPES_H
3 
4 #include <linux/kthread.h>
5 #include <linux/seqlock.h>
6 #include <linux/types.h>
7 #include <linux/kref.h>
8 #include <linux/wait.h>
9 
10 #ifdef CONFIG_PSI
11 
12 /* Tracked task states */
13 enum psi_task_count {
14 	NR_IOWAIT,
15 	NR_MEMSTALL,
16 	NR_RUNNING,
17 	/*
18 	 * This can't have values other than 0 or 1 and could be
19 	 * implemented as a bit flag. But for now we still have room
20 	 * in the first cacheline of psi_group_cpu, and this way we
21 	 * don't have to special case any state tracking for it.
22 	 */
23 	NR_ONCPU,
24 	NR_PSI_TASK_COUNTS = 4,
25 };
26 
27 /* Task state bitmasks */
28 #define TSK_IOWAIT	(1 << NR_IOWAIT)
29 #define TSK_MEMSTALL	(1 << NR_MEMSTALL)
30 #define TSK_RUNNING	(1 << NR_RUNNING)
31 #define TSK_ONCPU	(1 << NR_ONCPU)
32 
33 /* Resources that workloads could be stalled on */
34 enum psi_res {
35 	PSI_IO,
36 	PSI_MEM,
37 	PSI_CPU,
38 	NR_PSI_RESOURCES = 3,
39 };
40 
41 /*
42  * Pressure states for each resource:
43  *
44  * SOME: Stalled tasks & working tasks
45  * FULL: Stalled tasks & no working tasks
46  */
47 enum psi_states {
48 	PSI_IO_SOME,
49 	PSI_IO_FULL,
50 	PSI_MEM_SOME,
51 	PSI_MEM_FULL,
52 	PSI_CPU_SOME,
53 	PSI_CPU_FULL,
54 	/* Only per-CPU, to weigh the CPU in the global average: */
55 	PSI_NONIDLE,
56 	NR_PSI_STATES = 7,
57 };
58 
59 enum psi_aggregators {
60 	PSI_AVGS = 0,
61 	PSI_POLL,
62 	NR_PSI_AGGREGATORS,
63 };
64 
65 struct psi_group_cpu {
66 	/* 1st cacheline updated by the scheduler */
67 
68 	/* Aggregator needs to know of concurrent changes */
69 	seqcount_t seq ____cacheline_aligned_in_smp;
70 
71 	/* States of the tasks belonging to this group */
72 	unsigned int tasks[NR_PSI_TASK_COUNTS];
73 
74 	/* Aggregate pressure state derived from the tasks */
75 	u32 state_mask;
76 
77 	/* Period time sampling buckets for each state of interest (ns) */
78 	u32 times[NR_PSI_STATES];
79 
80 	/* Time of last task change in this group (rq_clock) */
81 	u64 state_start;
82 
83 	/* 2nd cacheline updated by the aggregator */
84 
85 	/* Delta detection against the sampling buckets */
86 	u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
87 			____cacheline_aligned_in_smp;
88 };
89 
90 /* PSI growth tracking window */
91 struct psi_window {
92 	/* Window size in ns */
93 	u64 size;
94 
95 	/* Start time of the current window in ns */
96 	u64 start_time;
97 
98 	/* Value at the start of the window */
99 	u64 start_value;
100 
101 	/* Value growth in the previous window */
102 	u64 prev_growth;
103 };
104 
105 struct psi_trigger {
106 	/* PSI state being monitored by the trigger */
107 	enum psi_states state;
108 
109 	/* User-spacified threshold in ns */
110 	u64 threshold;
111 
112 	/* List node inside triggers list */
113 	struct list_head node;
114 
115 	/* Backpointer needed during trigger destruction */
116 	struct psi_group *group;
117 
118 	/* Wait queue for polling */
119 	wait_queue_head_t event_wait;
120 
121 	/* Pending event flag */
122 	int event;
123 
124 	/* Tracking window */
125 	struct psi_window win;
126 
127 	/*
128 	 * Time last event was generated. Used for rate-limiting
129 	 * events to one per window
130 	 */
131 	u64 last_event_time;
132 
133 	/* Refcounting to prevent premature destruction */
134 	struct kref refcount;
135 };
136 
137 struct psi_group {
138 	/* Protects data used by the aggregator */
139 	struct mutex avgs_lock;
140 
141 	/* Per-cpu task state & time tracking */
142 	struct psi_group_cpu __percpu *pcpu;
143 
144 	/* Running pressure averages */
145 	u64 avg_total[NR_PSI_STATES - 1];
146 	u64 avg_last_update;
147 	u64 avg_next_update;
148 
149 	/* Aggregator work control */
150 	struct delayed_work avgs_work;
151 
152 	/* Total stall times and sampled pressure averages */
153 	u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
154 	unsigned long avg[NR_PSI_STATES - 1][3];
155 
156 	/* Monitor work control */
157 	struct task_struct __rcu *poll_task;
158 	struct timer_list poll_timer;
159 	wait_queue_head_t poll_wait;
160 	atomic_t poll_wakeup;
161 
162 	/* Protects data used by the monitor */
163 	struct mutex trigger_lock;
164 
165 	/* Configured polling triggers */
166 	struct list_head triggers;
167 	u32 nr_triggers[NR_PSI_STATES - 1];
168 	u32 poll_states;
169 	u64 poll_min_period;
170 
171 	/* Total stall times at the start of monitor activation */
172 	u64 polling_total[NR_PSI_STATES - 1];
173 	u64 polling_next_update;
174 	u64 polling_until;
175 };
176 
177 #else /* CONFIG_PSI */
178 
179 struct psi_group { };
180 
181 #endif /* CONFIG_PSI */
182 
183 #endif /* _LINUX_PSI_TYPES_H */
184