stats.h source code [Linux/kernel/sched/stats.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _KERNEL_STATS_H
3	#define _KERNEL_STATS_H
4
5	#ifdef CONFIG_SCHEDSTATS
6
7	extern struct static_key_false sched_schedstats;
8
9	/*
10	* Expects runqueue lock to be held for atomicity of update
11	*/
12	static inline void
13	rq_sched_info_arrive(struct rq rq, unsigned* long long delta)
14	{
15	if (rq) {
16	rq->rq_sched_info.run_delay += delta;
17	rq->rq_sched_info.pcount++;
18	}
19	}
20
21	/*
22	* Expects runqueue lock to be held for atomicity of update
23	*/
24	static inline void
25	rq_sched_info_depart(struct rq rq, unsigned* long long delta)
26	{
27	if (rq)
28	rq->rq_cpu_time += delta;
29	}
30
31	static inline void
32	rq_sched_info_dequeue(struct rq rq, unsigned* long long delta)
33	{
34	if (rq)
35	rq->rq_sched_info.run_delay += delta;
36	}
37	#define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
38	#define __schedstat_inc(var) do { var++; } while (0)
39	#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0)
40	#define __schedstat_add(var, amt) do { var += (amt); } while (0)
41	#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0)
42	#define __schedstat_set(var, val) do { var = (val); } while (0)
43	#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
44	#define schedstat_val(var) (var)
45	#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
46
47	void __update_stats_wait_start(struct rq rq, struct* task_struct *p,
48	struct sched_statistics *stats);
49
50	void __update_stats_wait_end(struct rq rq, struct* task_struct *p,
51	struct sched_statistics *stats);
52	void __update_stats_enqueue_sleeper(struct rq rq, struct* task_struct *p,
53	struct sched_statistics *stats);
54
55	static inline void
56	check_schedstat_required(void)
57	{
58	if (schedstat_enabled())
59	return;
60
61	/ Force schedstat enabled if a dependent tracepoint is active /
62	if (trace_sched_stat_wait_enabled() \|\|
63	trace_sched_stat_sleep_enabled() \|\|
64	trace_sched_stat_iowait_enabled() \|\|
65	trace_sched_stat_blocked_enabled() \|\|
66	trace_sched_stat_runtime_enabled())
67	printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n");
68	}
69
70	#else /* !CONFIG_SCHEDSTATS: */
71
72	static inline void rq_sched_info_arrive (struct rq rq, unsigned* long long delta) { }
73	static inline void rq_sched_info_dequeue(struct rq rq, unsigned* long long delta) { }
74	static inline void rq_sched_info_depart (struct rq rq, unsigned* long long delta) { }
75	# define schedstat_enabled() 0
76	# define __schedstat_inc(var) do { } while (0)
77	# define schedstat_inc(var) do { } while (0)
78	# define __schedstat_add(var, amt) do { } while (0)
79	# define schedstat_add(var, amt) do { } while (0)
80	# define __schedstat_set(var, val) do { } while (0)
81	# define schedstat_set(var, val) do { } while (0)
82	# define schedstat_val(var) 0
83	# define schedstat_val_or_zero(var) 0
84
85	# define __update_stats_wait_start(rq, p, stats) do { } while (0)
86	# define __update_stats_wait_end(rq, p, stats) do { } while (0)
87	# define __update_stats_enqueue_sleeper(rq, p, stats) do { } while (0)
88	# define check_schedstat_required() do { } while (0)
89
90	#endif /* CONFIG_SCHEDSTATS */
91
92	#ifdef CONFIG_FAIR_GROUP_SCHED
93	struct sched_entity_stats {
94	struct sched_entity se;
95	struct sched_statistics stats;
96	} __no_randomize_layout;
97	#endif
98
99	static inline struct sched_statistics *
100	__schedstats_from_se(struct sched_entity *se)
101	{
102	#ifdef CONFIG_FAIR_GROUP_SCHED
103	if (!entity_is_task(se))
104	return &container_of(se, struct sched_entity_stats, se)->stats;
105	#endif
106	return &task_of(se)->stats;
107	}
108
109	#ifdef CONFIG_PSI
110	void psi_task_change(struct task_struct task, int* clear, int set);
111	void psi_task_switch(struct task_struct prev, struct* task_struct *next,
112	bool sleep);
113	#ifdef CONFIG_IRQ_TIME_ACCOUNTING
114	void psi_account_irqtime(struct rq rq, struct* task_struct curr, struct* task_struct *prev);
115	#else /* !CONFIG_IRQ_TIME_ACCOUNTING: */
116	static inline void psi_account_irqtime(struct rq rq, struct* task_struct *curr,
117	struct task_struct *prev) {}
118	#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
119	/*
120	* PSI tracks state that persists across sleeps, such as iowaits and
121	* memory stalls. As a result, it has to distinguish between sleeps,
122	* where a task's runnable state changes, and migrations, where a task
123	* and its runnable state are being moved between CPUs and runqueues.
124	*
125	* A notable case is a task whose dequeue is delayed. PSI considers
126	* those sleeping, but because they are still on the runqueue they can
127	* go through migration requeues. In this case, sleeping states need
128	* to be transferred.
129	*/
130	static inline void psi_enqueue(struct task_struct p, int* flags)
131	{
132	int clear = `0`, set = `0`;
133
134	if (static_branch_likely(&psi_disabled))
135	return;
136
137	/ Same runqueue, nothing changed for psi /
138	if (flags & ENQUEUE_RESTORE)
139	return;
140
141	/ psi_sched_switch() will handle the flags /
142	if (task_on_cpu(task_rq(p), p))
143	return;
144
145	if (p->se.sched_delayed) {
146	/ CPU migration of "sleeping" task /
147	WARN_ON_ONCE(!(flags & ENQUEUE_MIGRATED));
148	if (p->in_memstall)
149	set \|= TSK_MEMSTALL;
150	if (p->in_iowait)
151	set \|= TSK_IOWAIT;
152	} else if (flags & ENQUEUE_MIGRATED) {
153	/ CPU migration of runnable task /
154	set = TSK_RUNNING;
155	if (p->in_memstall)
156	set \|= TSK_MEMSTALL \| TSK_MEMSTALL_RUNNING;
157	} else {
158	/ Wakeup of new or sleeping task /
159	if (p->in_iowait)
160	clear \|= TSK_IOWAIT;
161	set = TSK_RUNNING;
162	if (p->in_memstall)
163	set \|= TSK_MEMSTALL_RUNNING;
164	}
165
166	psi_task_change(p, clear, set);
167	}
168
169	static inline void psi_dequeue(struct task_struct p, int* flags)
170	{
171	if (static_branch_likely(&psi_disabled))
172	return;
173
174	/ Same runqueue, nothing changed for psi /
175	if (flags & DEQUEUE_SAVE)
176	return;
177
178	/*
179	* A voluntary sleep is a dequeue followed by a task switch. To
180	* avoid walking all ancestors twice, psi_task_switch() handles
181	* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
182	* Do nothing here.
183	*/
184	if (flags & DEQUEUE_SLEEP)
185	return;
186
187	/*
188	* When migrating a task to another CPU, clear all psi
189	* state. The enqueue callback above will work it out.
190	*/
191	psi_task_change(p, p->psi_flags, `0`);
192	}
193
194	static inline void psi_ttwu_dequeue(struct task_struct *p)
195	{
196	if (static_branch_likely(&psi_disabled))
197	return;
198	/*
199	* Is the task being migrated during a wakeup? Make sure to
200	* deregister its sleep-persistent psi states from the old
201	* queue, and let psi_enqueue() know it has to requeue.
202	*/
203	if (unlikely(p->psi_flags)) {
204	struct rq_flags rf;
205	struct rq *rq;
206
207	rq = __task_rq_lock(p, &rf);
208	psi_task_change(p, p->psi_flags, `0`);
209	__task_rq_unlock(rq, &rf);
210	}
211	}
212
213	static inline void psi_sched_switch(struct task_struct *prev,
214	struct task_struct *next,
215	bool sleep)
216	{
217	if (static_branch_likely(&psi_disabled))
218	return;
219
220	psi_task_switch(prev, next, sleep);
221	}
222
223	#else /* !CONFIG_PSI: */
224	static inline void psi_enqueue(struct task_struct *p, bool migrate) {}
225	static inline void psi_dequeue(struct task_struct *p, bool migrate) {}
226	static inline void psi_ttwu_dequeue(struct task_struct *p) {}
227	static inline void psi_sched_switch(struct task_struct *prev,
228	struct task_struct *next,
229	bool sleep) {}
230	static inline void psi_account_irqtime(struct rq rq, struct* task_struct *curr,
231	struct task_struct *prev) {}
232	#endif /* !CONFIG_PSI */
233
234	#ifdef CONFIG_SCHED_INFO
235	/*
236	* We are interested in knowing how long it was from the first time a
237	* task was queued to the time that it finally hit a CPU, we call this routine
238	* from dequeue_task() to account for possible rq->clock skew across CPUs. The
239	* delta taken on each CPU would annul the skew.
240	*/
241	static inline void sched_info_dequeue(struct rq rq, struct* task_struct *t)
242	{
243	unsigned long long delta = `0`;
244
245	if (!t->sched_info.last_queued)
246	return;
247
248	delta = rq_clock(rq) - t->sched_info.last_queued;
249	t->sched_info.last_queued = `0`;
250	t->sched_info.run_delay += delta;
251	if (delta > t->sched_info.max_run_delay)
252	t->sched_info.max_run_delay = delta;
253	if (delta && (!t->sched_info.min_run_delay \|\| delta < t->sched_info.min_run_delay))
254	t->sched_info.min_run_delay = delta;
255	rq_sched_info_dequeue(rq, delta);
256	}
257
258	/*
259	* Called when a task finally hits the CPU. We can now calculate how
260	* long it was waiting to run. We also note when it began so that we
261	* can keep stats on how long its time-slice is.
262	*/
263	static void sched_info_arrive(struct rq rq, struct* task_struct *t)
264	{
265	unsigned long long now, delta = `0`;
266
267	if (!t->sched_info.last_queued)
268	return;
269
270	now = rq_clock(rq);
271	delta = now - t->sched_info.last_queued;
272	t->sched_info.last_queued = `0`;
273	t->sched_info.run_delay += delta;
274	t->sched_info.last_arrival = now;
275	t->sched_info.pcount++;
276	if (delta > t->sched_info.max_run_delay)
277	t->sched_info.max_run_delay = delta;
278	if (delta && (!t->sched_info.min_run_delay \|\| delta < t->sched_info.min_run_delay))
279	t->sched_info.min_run_delay = delta;
280
281	rq_sched_info_arrive(rq, delta);
282	}
283
284	/*
285	* This function is only called from enqueue_task(), but also only updates
286	* the timestamp if it is already not set. It's assumed that
287	* sched_info_dequeue() will clear that stamp when appropriate.
288	*/
289	static inline void sched_info_enqueue(struct rq rq, struct* task_struct *t)
290	{
291	if (!t->sched_info.last_queued)
292	t->sched_info.last_queued = rq_clock(rq);
293	}
294
295	/*
296	* Called when a process ceases being the active-running process involuntarily
297	* due, typically, to expiring its time slice (this may also be called when
298	* switching to the idle task). Now we can calculate how long we ran.
299	* Also, if the process is still in the TASK_RUNNING state, call
300	* sched_info_enqueue() to mark that it has now again started waiting on
301	* the runqueue.
302	*/
303	static inline void sched_info_depart(struct rq rq, struct* task_struct *t)
304	{
305	unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival;
306
307	rq_sched_info_depart(rq, delta);
308
309	if (task_is_running(t))
310	sched_info_enqueue(rq, t);
311	}
312
313	/*
314	* Called when tasks are switched involuntarily due, typically, to expiring
315	* their time slice. (This may also be called when switching to or from
316	* the idle task.) We are only called when prev != next.
317	*/
318	static inline void
319	sched_info_switch(struct rq rq, struct* task_struct prev, struct* task_struct *next)
320	{
321	/*
322	* prev now departs the CPU. It's not interesting to record
323	* stats about how efficient we were at scheduling the idle
324	* process, however.
325	*/
326	if (prev != rq->idle)
327	sched_info_depart(rq, t: prev);
328
329	if (next != rq->idle)
330	sched_info_arrive(rq, t: next);
331	}
332
333	#else /* !CONFIG_SCHED_INFO: */
334	# define sched_info_enqueue(rq, t) do { } while (0)
335	# define sched_info_dequeue(rq, t) do { } while (0)
336	# define sched_info_switch(rq, t, next) do { } while (0)
337	#endif /* !CONFIG_SCHED_INFO */
338
339	#endif /* _KERNEL_STATS_H */
340

Browse the source code of Linux/kernel/sched/stats.h