sched.h source code [Linux/kernel/sched/sched.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	/*
3	* Scheduler internal types and methods:
4	*/
5	#ifndef _KERNEL_SCHED_SCHED_H
6	#define _KERNEL_SCHED_SCHED_H
7
8	#include <linux/sched/affinity.h>
9	#include <linux/sched/autogroup.h>
10	#include <linux/sched/cpufreq.h>
11	#include <linux/sched/deadline.h>
12	#include <linux/sched.h>
13	#include <linux/sched/loadavg.h>
14	#include <linux/sched/mm.h>
15	#include <linux/sched/rseq_api.h>
16	#include <linux/sched/signal.h>
17	#include <linux/sched/smt.h>
18	#include <linux/sched/stat.h>
19	#include <linux/sched/sysctl.h>
20	#include <linux/sched/task_flags.h>
21	#include <linux/sched/task.h>
22	#include <linux/sched/topology.h>
23
24	#include <linux/atomic.h>
25	#include <linux/bitmap.h>
26	#include <linux/bug.h>
27	#include <linux/capability.h>
28	#include <linux/cgroup_api.h>
29	#include <linux/cgroup.h>
30	#include <linux/context_tracking.h>
31	#include <linux/cpufreq.h>
32	#include <linux/cpumask_api.h>
33	#include <linux/ctype.h>
34	#include <linux/file.h>
35	#include <linux/fs_api.h>
36	#include <linux/hrtimer_api.h>
37	#include <linux/interrupt.h>
38	#include <linux/irq_work.h>
39	#include <linux/jiffies.h>
40	#include <linux/kref_api.h>
41	#include <linux/kthread.h>
42	#include <linux/ktime_api.h>
43	#include <linux/lockdep_api.h>
44	#include <linux/lockdep.h>
45	#include <linux/minmax.h>
46	#include <linux/mm.h>
47	#include <linux/module.h>
48	#include <linux/mutex_api.h>
49	#include <linux/plist.h>
50	#include <linux/poll.h>
51	#include <linux/proc_fs.h>
52	#include <linux/profile.h>
53	#include <linux/psi.h>
54	#include <linux/rcupdate.h>
55	#include <linux/seq_file.h>
56	#include <linux/seqlock.h>
57	#include <linux/softirq.h>
58	#include <linux/spinlock_api.h>
59	#include <linux/static_key.h>
60	#include <linux/stop_machine.h>
61	#include <linux/syscalls_api.h>
62	#include <linux/syscalls.h>
63	#include <linux/tick.h>
64	#include <linux/topology.h>
65	#include <linux/types.h>
66	#include <linux/u64_stats_sync_api.h>
67	#include <linux/uaccess.h>
68	#include <linux/wait_api.h>
69	#include <linux/wait_bit.h>
70	#include <linux/workqueue_api.h>
71	#include <linux/delayacct.h>
72	#include <linux/mmu_context.h>
73
74	#include <trace/events/power.h>
75	#include <trace/events/sched.h>
76
77	#include "../workqueue_internal.h"
78
79	struct rq;
80	struct cfs_rq;
81	struct rt_rq;
82	struct sched_group;
83	struct cpuidle_state;
84
85	#ifdef CONFIG_PARAVIRT
86	# include <asm/paravirt.h>
87	# include <asm/paravirt_api_clock.h>
88	#endif
89
90	#include <asm/barrier.h>
91
92	#include "cpupri.h"
93	#include "cpudeadline.h"
94
95	/ task_struct::on_rq states: /
96	#define TASK_ON_RQ_QUEUED 1
97	#define TASK_ON_RQ_MIGRATING 2
98
99	extern __read_mostly int scheduler_running;
100
101	extern unsigned long calc_load_update;
102	extern atomic_long_t calc_load_tasks;
103
104	extern void calc_global_load_tick(struct rq *this_rq);
105	extern long calc_load_fold_active(struct rq this_rq, long* adjust);
106
107	extern void call_trace_sched_update_nr_running(struct rq rq, int* count);
108
109	extern int sysctl_sched_rt_period;
110	extern int sysctl_sched_rt_runtime;
111	extern int sched_rr_timeslice;
112
113	/*
114	* Asymmetric CPU capacity bits
115	*/
116	struct asym_cap_data {
117	struct list_head link;
118	struct rcu_head rcu;
119	unsigned long capacity;
120	unsigned long cpus[];
121	};
122
123	extern struct list_head asym_cap_list;
124
125	#define cpu_capacity_span(asym_data) to_cpumask((asym_data)->cpus)
126
127	/*
128	* Helpers for converting nanosecond timing to jiffy resolution
129	*/
130	#define NS_TO_JIFFIES(time) ((unsigned long)(time) / (NSEC_PER_SEC/HZ))
131
132	/*
133	* Increase resolution of nice-level calculations for 64-bit architectures.
134	* The extra resolution improves shares distribution and load balancing of
135	* low-weight task groups (eg. nice +19 on an autogroup), deeper task-group
136	* hierarchies, especially on larger systems. This is not a user-visible change
137	* and does not change the user-interface for setting shares/weights.
138	*
139	* We increase resolution only if we have enough bits to allow this increased
140	* resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
141	* are pretty high and the returns do not justify the increased costs.
142	*
143	* Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
144	* increase coverage and consistency always enable it on 64-bit platforms.
145	*/
146	#ifdef CONFIG_64BIT
147	# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
148	# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
149	# define scale_load_down(w) \
150	({ \
151	unsigned long __w = (w); \
152	\
153	if (__w) \
154	__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
155	__w; \
156	})
157	#else
158	# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
159	# define scale_load(w) (w)
160	# define scale_load_down(w) (w)
161	#endif
162
163	/*
164	* Task weight (visible to users) and its load (invisible to users) have
165	* independent resolution, but they should be well calibrated. We use
166	* scale_load() and scale_load_down(w) to convert between them. The
167	* following must be true:
168	*
169	* scale_load(sched_prio_to_weight[NICE_TO_PRIO(0)-MAX_RT_PRIO]) == NICE_0_LOAD
170	*
171	*/
172	#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
173
174	/*
175	* Single value that decides SCHED_DEADLINE internal math precision.
176	* 10 -> just above 1us
177	* 9 -> just above 0.5us
178	*/
179	#define DL_SCALE 10
180
181	/*
182	* Single value that denotes runtime == period, ie unlimited time.
183	*/
184	#define RUNTIME_INF ((u64)~0ULL)
185
186	static inline int idle_policy(int policy)
187	{
188	return policy == SCHED_IDLE;
189	}
190
191	static inline int normal_policy(int policy)
192	{
193	#ifdef CONFIG_SCHED_CLASS_EXT
194	if (policy == SCHED_EXT)
195	return true;
196	#endif
197	return policy == SCHED_NORMAL;
198	}
199
200	static inline int fair_policy(int policy)
201	{
202	return normal_policy(policy) \|\| policy == SCHED_BATCH;
203	}
204
205	static inline int rt_policy(int policy)
206	{
207	return policy == SCHED_FIFO \|\| policy == SCHED_RR;
208	}
209
210	static inline int dl_policy(int policy)
211	{
212	return policy == SCHED_DEADLINE;
213	}
214
215	static inline bool valid_policy(int policy)
216	{
217	return idle_policy(policy) \|\| fair_policy(policy) \|\|
218	rt_policy(policy) \|\| dl_policy(policy);
219	}
220
221	static inline int task_has_idle_policy(struct task_struct *p)
222	{
223	return idle_policy(policy: p->policy);
224	}
225
226	static inline int task_has_rt_policy(struct task_struct *p)
227	{
228	return rt_policy(policy: p->policy);
229	}
230
231	static inline int task_has_dl_policy(struct task_struct *p)
232	{
233	return dl_policy(policy: p->policy);
234	}
235
236	#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
237
238	static inline void update_avg(u64 *avg, u64 sample)
239	{
240	s64 diff = sample - *avg;
241
242	*avg += diff / `8`;
243	}
244
245	/*
246	* Shifting a value by an exponent greater or equal to the size of said value
247	* is UB; cap at size-1.
248	*/
249	#define shr_bound(val, shift) \
250	(val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
251
252	/*
253	* cgroup weight knobs should use the common MIN, DFL and MAX values which are
254	* 1, 100 and 10000 respectively. While it loses a bit of range on both ends, it
255	* maps pretty well onto the shares value used by scheduler and the round-trip
256	* conversions preserve the original value over the entire range.
257	*/
258	static inline unsigned long sched_weight_from_cgroup(unsigned long cgrp_weight)
259	{
260	return DIV_ROUND_CLOSEST_ULL(cgrp_weight * `1024`, CGROUP_WEIGHT_DFL);
261	}
262
263	static inline unsigned long sched_weight_to_cgroup(unsigned long weight)
264	{
265	return clamp_t(unsigned long,
266	DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, `1024`),
267	CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
268	}
269
270	/*
271	* !! For sched_setattr_nocheck() (kernel) only !!
272	*
273	* This is actually gross. :(
274	*
275	* It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
276	* tasks, but still be able to sleep. We need this on platforms that cannot
277	* atomically change clock frequency. Remove once fast switching will be
278	* available on such platforms.
279	*
280	* SUGOV stands for SchedUtil GOVernor.
281	*/
282	#define SCHED_FLAG_SUGOV 0x10000000
283
284	#define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM \| SCHED_FLAG_DL_OVERRUN \| SCHED_FLAG_SUGOV)
285
286	static inline bool dl_entity_is_special(const struct sched_dl_entity *dl_se)
287	{
288	#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
289	return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
290	#else
291	return false;
292	#endif
293	}
294
295	/*
296	* Tells if entity @a should preempt entity @b.
297	*/
298	static inline bool dl_entity_preempt(const struct sched_dl_entity *a,
299	const struct sched_dl_entity *b)
300	{
301	return dl_entity_is_special(dl_se: a) \|\|
302	dl_time_before(a: a->deadline, b: b->deadline);
303	}
304
305	/*
306	* This is the priority-queue data structure of the RT scheduling class:
307	*/
308	struct rt_prio_array {
309	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+`1`); / include 1 bit for delimiter /
310	struct list_head queue[MAX_RT_PRIO];
311	};
312
313	struct rt_bandwidth {
314	/ nests inside the rq lock: /
315	raw_spinlock_t rt_runtime_lock;
316	ktime_t rt_period;
317	u64 rt_runtime;
318	struct hrtimer rt_period_timer;
319	unsigned int rt_period_active;
320	};
321
322	static inline int dl_bandwidth_enabled(void)
323	{
324	return sysctl_sched_rt_runtime >= `0`;
325	}
326
327	/*
328	* To keep the bandwidth of -deadline tasks under control
329	* we need some place where:
330	* - store the maximum -deadline bandwidth of each cpu;
331	* - cache the fraction of bandwidth that is currently allocated in
332	* each root domain;
333	*
334	* This is all done in the data structure below. It is similar to the
335	* one used for RT-throttling (rt_bandwidth), with the main difference
336	* that, since here we are only interested in admission control, we
337	* do not decrease any runtime while the group "executes", neither we
338	* need a timer to replenish it.
339	*
340	* With respect to SMP, bandwidth is given on a per root domain basis,
341	* meaning that:
342	* - bw (< 100%) is the deadline bandwidth of each CPU;
343	* - total_bw is the currently allocated bandwidth in each root domain;
344	*/
345	struct dl_bw {
346	raw_spinlock_t lock;
347	u64 bw;
348	u64 total_bw;
349	};
350
351	extern void init_dl_bw(struct dl_bw *dl_b);
352	extern int sched_dl_global_validate(void);
353	extern void sched_dl_do_global(void);
354	extern int sched_dl_overflow(struct task_struct p, int* policy, const struct sched_attr *attr);
355	extern void __setparam_dl(struct task_struct p, const* struct sched_attr *attr);
356	extern void __getparam_dl(struct task_struct p, struct* sched_attr *attr);
357	extern bool __checkparam_dl(const struct sched_attr *attr);
358	extern bool dl_param_changed(struct task_struct p, const* struct sched_attr *attr);
359	extern int dl_cpuset_cpumask_can_shrink(const struct cpumask cur, const* struct cpumask *trial);
360	extern int dl_bw_deactivate(int cpu);
361	extern s64 dl_scaled_delta_exec(struct rq rq, struct* sched_dl_entity *dl_se, s64 delta_exec);
362	/*
363	* SCHED_DEADLINE supports servers (nested scheduling) with the following
364	* interface:
365	*
366	* dl_se::rq -- runqueue we belong to.
367	*
368	* dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
369	* returns NULL.
370	*
371	* dl_server_update() -- called from update_curr_common(), propagates runtime
372	* to the server.
373	*
374	* dl_server_start() -- start the server when it has tasks; it will stop
375	* automatically when there are no more tasks, per
376	* dl_se::server_pick() returning NULL.
377	*
378	* dl_server_stop() -- (force) stop the server; use when updating
379	* parameters.
380	*
381	* dl_server_init() -- initializes the server.
382	*
383	* When started the dl_server will (per dl_defer) schedule a timer for its
384	* zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a
385	* server will run at the very end of its period.
386	*
387	* This is done such that any runtime from the target class can be accounted
388	* against the server -- through dl_server_update() above -- such that when it
389	* becomes time to run, it might already be out of runtime and get deferred
390	* until the next period. In this case dl_server_timer() will alternate
391	* between defer and replenish but never actually enqueue the server.
392	*
393	* Only when the target class does not manage to exhaust the server's runtime
394	* (there's actualy starvation in the given period), will the dl_server get on
395	* the runqueue. Once queued it will pick tasks from the target class and run
396	* them until either its runtime is exhaused, at which point its back to
397	* dl_server_timer, or until there are no more tasks to run, at which point
398	* the dl_server stops itself.
399	*
400	* By stopping at this point the dl_server retains bandwidth, which, if a new
401	* task wakes up imminently (starting the server again), can be used --
402	* subject to CBS wakeup rules -- without having to wait for the next period.
403	*
404	* Additionally, because of the dl_defer behaviour the start/stop behaviour is
405	* naturally thottled to once per period, avoiding high context switch
406	* workloads from spamming the hrtimer program/cancel paths.
407	*/
408	extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
409	extern void dl_server_start(struct sched_dl_entity *dl_se);
410	extern void dl_server_stop(struct sched_dl_entity *dl_se);
411	extern void dl_server_init(struct sched_dl_entity dl_se, struct* rq *rq,
412	dl_server_pick_f pick_task);
413	extern void sched_init_dl_servers(void);
414
415	extern void dl_server_update_idle_time(struct rq *rq,
416	struct task_struct *p);
417	extern void fair_server_init(struct rq *rq);
418	extern void __dl_server_attach_root(struct sched_dl_entity dl_se, struct* rq *rq);
419	extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
420	u64 runtime, u64 period, bool init);
421
422	static inline bool dl_server_active(struct sched_dl_entity *dl_se)
423	{
424	return dl_se->dl_server_active;
425	}
426
427	#ifdef CONFIG_CGROUP_SCHED
428
429	extern struct list_head task_groups;
430
431	#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
432	extern const u64 max_bw_quota_period_us;
433
434	/*
435	* default period for group bandwidth.
436	* default: 0.1s, units: microseconds
437	*/
438	static inline u64 default_bw_period_us(void)
439	{
440	return `100000ULL`;
441	}
442	#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */
443
444	struct cfs_bandwidth {
445	#ifdef CONFIG_CFS_BANDWIDTH
446	raw_spinlock_t lock;
447	ktime_t period;
448	u64 quota;
449	u64 runtime;
450	u64 burst;
451	u64 runtime_snap;
452	s64 hierarchical_quota;
453
454	u8 idle;
455	u8 period_active;
456	u8 slack_started;
457	struct hrtimer period_timer;
458	struct hrtimer slack_timer;
459	struct list_head throttled_cfs_rq;
460
461	/ Statistics: /
462	int nr_periods;
463	int nr_throttled;
464	int nr_burst;
465	u64 throttled_time;
466	u64 burst_time;
467	#endif /* CONFIG_CFS_BANDWIDTH */
468	};
469
470	/ Task group related information /
471	struct task_group {
472	struct cgroup_subsys_state css;
473
474	#ifdef CONFIG_GROUP_SCHED_WEIGHT
475	/ A positive value indicates that this is a SCHED_IDLE group. /
476	int idle;
477	#endif
478
479	#ifdef CONFIG_FAIR_GROUP_SCHED
480	/ schedulable entities of this group on each CPU /
481	struct sched_entity **se;
482	/ runqueue "owned" by this group on each CPU /
483	struct cfs_rq **cfs_rq;
484	unsigned long shares;
485	/*
486	* load_avg can be heavily contended at clock tick time, so put
487	* it in its own cache-line separated from the fields above which
488	* will also be accessed at each tick.
489	*/
490	atomic_long_t load_avg ____cacheline_aligned;
491	#endif /* CONFIG_FAIR_GROUP_SCHED */
492
493	#ifdef CONFIG_RT_GROUP_SCHED
494	struct sched_rt_entity **rt_se;
495	struct rt_rq **rt_rq;
496
497	struct rt_bandwidth rt_bandwidth;
498	#endif
499
500	struct scx_task_group scx;
501
502	struct rcu_head rcu;
503	struct list_head list;
504
505	struct task_group *parent;
506	struct list_head siblings;
507	struct list_head children;
508
509	#ifdef CONFIG_SCHED_AUTOGROUP
510	struct autogroup *autogroup;
511	#endif
512
513	struct cfs_bandwidth cfs_bandwidth;
514
515	#ifdef CONFIG_UCLAMP_TASK_GROUP
516	/ The two decimal precision [%] value requested from user-space /
517	unsigned int uclamp_pct[UCLAMP_CNT];
518	/ Clamp values requested for a task group /
519	struct uclamp_se uclamp_req[UCLAMP_CNT];
520	/ Effective clamp values used for a task group /
521	struct uclamp_se uclamp[UCLAMP_CNT];
522	#endif
523
524	};
525
526	#ifdef CONFIG_GROUP_SCHED_WEIGHT
527	#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
528
529	/*
530	* A weight of 0 or 1 can cause arithmetics problems.
531	* A weight of a cfs_rq is the sum of weights of which entities
532	* are queued on this cfs_rq, so a weight of a entity should not be
533	* too large, so as the shares value of a task group.
534	* (The default weight is 1024 - so there's no practical
535	* limitation from this.)
536	*/
537	#define MIN_SHARES (1UL << 1)
538	#define MAX_SHARES (1UL << 18)
539	#endif
540
541	typedef int (tg_visitor)(struct* task_group , void* *);
542
543	extern int walk_tg_tree_from(struct task_group *from,
544	tg_visitor down, tg_visitor up, void *data);
545
546	/*
547	* Iterate the full tree, calling @down when first entering a node and @up when
548	* leaving it for the final time.
549	*
550	* Caller must hold rcu_lock or sufficient equivalent.
551	*/
552	static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
553	{
554	return walk_tg_tree_from(from: &root_task_group, down, up, data);
555	}
556
557	static inline struct task_group css_tg(struct* cgroup_subsys_state *css)
558	{
559	return css ? container_of(css, struct task_group, css) : NULL;
560	}
561
562	extern int tg_nop(struct task_group tg, void* *data);
563
564	#ifdef CONFIG_FAIR_GROUP_SCHED
565	extern void free_fair_sched_group(struct task_group *tg);
566	extern int alloc_fair_sched_group(struct task_group tg, struct* task_group *parent);
567	extern void online_fair_sched_group(struct task_group *tg);
568	extern void unregister_fair_sched_group(struct task_group *tg);
569	#else /* !CONFIG_FAIR_GROUP_SCHED: */
570	static inline void free_fair_sched_group(struct task_group *tg) { }
571	static inline int alloc_fair_sched_group(struct task_group tg, struct* task_group *parent)
572	{
573	return `1`;
574	}
575	static inline void online_fair_sched_group(struct task_group *tg) { }
576	static inline void unregister_fair_sched_group(struct task_group *tg) { }
577	#endif /* !CONFIG_FAIR_GROUP_SCHED */
578
579	extern void init_tg_cfs_entry(struct task_group tg, struct* cfs_rq *cfs_rq,
580	struct sched_entity se, int* cpu,
581	struct sched_entity *parent);
582	extern void init_cfs_bandwidth(struct cfs_bandwidth cfs_b, struct* cfs_bandwidth *parent);
583
584	extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
585	extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
586	extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
587	extern bool cfs_task_bw_constrained(struct task_struct *p);
588
589	extern void init_tg_rt_entry(struct task_group tg, struct* rt_rq *rt_rq,
590	struct sched_rt_entity rt_se, int* cpu,
591	struct sched_rt_entity *parent);
592	extern int sched_group_set_rt_runtime(struct task_group tg, long* rt_runtime_us);
593	extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
594	extern long sched_group_rt_runtime(struct task_group *tg);
595	extern long sched_group_rt_period(struct task_group *tg);
596	extern int sched_rt_can_attach(struct task_group tg, struct* task_struct *tsk);
597
598	extern struct task_group sched_create_group(struct* task_group *parent);
599	extern void sched_online_group(struct task_group *tg,
600	struct task_group *parent);
601	extern void sched_destroy_group(struct task_group *tg);
602	extern void sched_release_group(struct task_group *tg);
603
604	extern void sched_move_task(struct task_struct *tsk, bool for_autogroup);
605
606	#ifdef CONFIG_FAIR_GROUP_SCHED
607	extern int sched_group_set_shares(struct task_group tg, unsigned* long shares);
608
609	extern int sched_group_set_idle(struct task_group tg, long* idle);
610
611	extern void set_task_rq_fair(struct sched_entity *se,
612	struct cfs_rq prev, struct* cfs_rq *next);
613	#else /* !CONFIG_FAIR_GROUP_SCHED: */
614	static inline int sched_group_set_shares(struct task_group tg, unsigned* long shares) { return `0`; }
615	static inline int sched_group_set_idle(struct task_group tg, long* idle) { return `0`; }
616	#endif /* !CONFIG_FAIR_GROUP_SCHED */
617
618	#else /* !CONFIG_CGROUP_SCHED: */
619
620	struct cfs_bandwidth { };
621
622	static inline bool cfs_task_bw_constrained(struct task_struct p) { return* false; }
623
624	#endif /* !CONFIG_CGROUP_SCHED */
625
626	extern void unregister_rt_sched_group(struct task_group *tg);
627	extern void free_rt_sched_group(struct task_group *tg);
628	extern int alloc_rt_sched_group(struct task_group tg, struct* task_group *parent);
629
630	/*
631	* u64_u32_load/u64_u32_store
632	*
633	* Use a copy of a u64 value to protect against data race. This is only
634	* applicable for 32-bits architectures.
635	*/
636	#ifdef CONFIG_64BIT
637	# define u64_u32_load_copy(var, copy) var
638	# define u64_u32_store_copy(var, copy, val) (var = val)
639	#else
640	# define u64_u32_load_copy(var, copy) \
641	({ \
642	u64 __val, __val_copy; \
643	do { \
644	__val_copy = copy; \
645	/* \
646	* paired with u64_u32_store_copy(), ordering access \
647	* to var and copy. \
648	*/ \
649	smp_rmb(); \
650	__val = var; \
651	} while (__val != __val_copy); \
652	__val; \
653	})
654	# define u64_u32_store_copy(var, copy, val) \
655	do { \
656	typeof(val) __val = (val); \
657	var = __val; \
658	/* \
659	* paired with u64_u32_load_copy(), ordering access to var and \
660	* copy. \
661	*/ \
662	smp_wmb(); \
663	copy = __val; \
664	} while (0)
665	#endif
666	# define u64_u32_load(var) u64_u32_load_copy(var, var##_copy)
667	# define u64_u32_store(var, val) u64_u32_store_copy(var, var##_copy, val)
668
669	struct balance_callback {
670	struct balance_callback *next;
671	void (func)(struct* rq *rq);
672	};
673
674	/ CFS-related fields in a runqueue /
675	struct cfs_rq {
676	struct load_weight load;
677	unsigned int nr_queued;
678	unsigned int h_nr_queued; / SCHED_{NORMAL,BATCH,IDLE} /
679	unsigned int h_nr_runnable; / SCHED_{NORMAL,BATCH,IDLE} /
680	unsigned int h_nr_idle; / SCHED_IDLE /
681
682	s64 avg_vruntime;
683	u64 avg_load;
684
685	u64 min_vruntime;
686	#ifdef CONFIG_SCHED_CORE
687	unsigned int forceidle_seq;
688	u64 min_vruntime_fi;
689	#endif
690
691	struct rb_root_cached tasks_timeline;
692
693	/*
694	* 'curr' points to currently running entity on this cfs_rq.
695	* It is set to NULL otherwise (i.e when none are currently running).
696	*/
697	struct sched_entity *curr;
698	struct sched_entity *next;
699
700	/*
701	* CFS load tracking
702	*/
703	struct sched_avg avg;
704	#ifndef CONFIG_64BIT
705	u64 last_update_time_copy;
706	#endif
707	struct {
708	raw_spinlock_t lock ____cacheline_aligned;
709	int nr;
710	unsigned long load_avg;
711	unsigned long util_avg;
712	unsigned long runnable_avg;
713	} removed;
714
715	#ifdef CONFIG_FAIR_GROUP_SCHED
716	u64 last_update_tg_load_avg;
717	unsigned long tg_load_avg_contrib;
718	long propagate;
719	long prop_runnable_sum;
720
721	/*
722	* h_load = weight * f(tg)
723	*
724	* Where f(tg) is the recursive weight fraction assigned to
725	* this group.
726	*/
727	unsigned long h_load;
728	u64 last_h_load_update;
729	struct sched_entity *h_load_next;
730	#endif /* CONFIG_FAIR_GROUP_SCHED */
731
732	#ifdef CONFIG_FAIR_GROUP_SCHED
733	struct rq rq; /* CPU runqueue to which this cfs_rq is attached /
734
735	/*
736	* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
737	* a hierarchy). Non-leaf lrqs hold other higher schedulable entities
738	* (like users, containers etc.)
739	*
740	* leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
741	* This list is used during load balance.
742	*/
743	int on_list;
744	struct list_head leaf_cfs_rq_list;
745	struct task_group tg; /* group that "owns" this runqueue /
746
747	/ Locally cached copy of our task_group's idle value /
748	int idle;
749
750	#ifdef CONFIG_CFS_BANDWIDTH
751	int runtime_enabled;
752	s64 runtime_remaining;
753
754	u64 throttled_pelt_idle;
755	#ifndef CONFIG_64BIT
756	u64 throttled_pelt_idle_copy;
757	#endif
758	u64 throttled_clock;
759	u64 throttled_clock_pelt;
760	u64 throttled_clock_pelt_time;
761	u64 throttled_clock_self;
762	u64 throttled_clock_self_time;
763	bool throttled:`1`;
764	bool pelt_clock_throttled:`1`;
765	int throttle_count;
766	struct list_head throttled_list;
767	struct list_head throttled_csd_list;
768	struct list_head throttled_limbo_list;
769	#endif /* CONFIG_CFS_BANDWIDTH */
770	#endif /* CONFIG_FAIR_GROUP_SCHED */
771	};
772
773	#ifdef CONFIG_SCHED_CLASS_EXT
774	/ scx_rq->flags, protected by the rq lock /
775	enum scx_rq_flags {
776	/*
777	* A hotplugged CPU starts scheduling before rq_online_scx(). Track
778	* ops.cpu_on/offline() state so that ops.enqueue/dispatch() are called
779	* only while the BPF scheduler considers the CPU to be online.
780	*/
781	SCX_RQ_ONLINE = `1` << `0`,
782	SCX_RQ_CAN_STOP_TICK = `1` << `1`,
783	SCX_RQ_BAL_PENDING = `1` << `2`, / balance hasn't run yet /
784	SCX_RQ_BAL_KEEP = `1` << `3`, / balance decided to keep current /
785	SCX_RQ_BYPASSING = `1` << `4`,
786	SCX_RQ_CLK_VALID = `1` << `5`, / RQ clock is fresh and valid /
787
788	SCX_RQ_IN_WAKEUP = `1` << `16`,
789	SCX_RQ_IN_BALANCE = `1` << `17`,
790	};
791
792	struct scx_rq {
793	struct scx_dispatch_q local_dsq;
794	struct list_head runnable_list; / runnable tasks on this rq /
795	struct list_head ddsp_deferred_locals; / deferred ddsps from enq /
796	unsigned long ops_qseq;
797	u64 extra_enq_flags; / see move_task_to_local_dsq() /
798	u32 nr_running;
799	u32 cpuperf_target; / [0, SCHED_CAPACITY_SCALE] /
800	bool cpu_released;
801	u32 flags;
802	u64 clock; / current per-rq clock -- see scx_bpf_now() /
803	cpumask_var_t cpus_to_kick;
804	cpumask_var_t cpus_to_kick_if_idle;
805	cpumask_var_t cpus_to_preempt;
806	cpumask_var_t cpus_to_wait;
807	unsigned long pnt_seq;
808	struct balance_callback deferred_bal_cb;
809	struct irq_work deferred_irq_work;
810	struct irq_work kick_cpus_irq_work;
811	};
812	#endif /* CONFIG_SCHED_CLASS_EXT */
813
814	static inline int rt_bandwidth_enabled(void)
815	{
816	return sysctl_sched_rt_runtime >= `0`;
817	}
818
819	/ RT IPI pull logic requires IRQ_WORK /
820	#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
821	# define HAVE_RT_PUSH_IPI
822	#endif
823
824	/ Real-Time classes' related field in a runqueue: /
825	struct rt_rq {
826	struct rt_prio_array active;
827	unsigned int rt_nr_running;
828	unsigned int rr_nr_running;
829	struct {
830	int curr; / highest queued rt task prio /
831	int next; / next highest /
832	} highest_prio;
833	bool overloaded;
834	struct plist_head pushable_tasks;
835
836	int rt_queued;
837
838	#ifdef CONFIG_RT_GROUP_SCHED
839	int rt_throttled;
840	u64 rt_time; / consumed RT time, goes up in update_curr_rt /
841	u64 rt_runtime; / allotted RT time, "slice" from rt_bandwidth, RT sharing/balancing /
842	/ Nests inside the rq lock: /
843	raw_spinlock_t rt_runtime_lock;
844
845	unsigned int rt_nr_boosted;
846
847	struct rq rq; /* this is always top-level rq, cache? /
848	#endif
849	#ifdef CONFIG_CGROUP_SCHED
850	struct task_group tg; /* this tg has "this" rt_rq on given CPU for runnable entities /
851	#endif
852	};
853
854	static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
855	{
856	return rt_rq->rt_queued && rt_rq->rt_nr_running;
857	}
858
859	/ Deadline class' related fields in a runqueue /
860	struct dl_rq {
861	/ runqueue is an rbtree, ordered by deadline /
862	struct rb_root_cached root;
863
864	unsigned int dl_nr_running;
865
866	/*
867	* Deadline values of the currently executing and the
868	* earliest ready task on this rq. Caching these facilitates
869	* the decision whether or not a ready but not running task
870	* should migrate somewhere else.
871	*/
872	struct {
873	u64 curr;
874	u64 next;
875	} earliest_dl;
876
877	bool overloaded;
878
879	/*
880	* Tasks on this rq that can be pushed away. They are kept in
881	* an rb-tree, ordered by tasks' deadlines, with caching
882	* of the leftmost (earliest deadline) element.
883	*/
884	struct rb_root_cached pushable_dl_tasks_root;
885
886	/*
887	* "Active utilization" for this runqueue: increased when a
888	* task wakes up (becomes TASK_RUNNING) and decreased when a
889	* task blocks
890	*/
891	u64 running_bw;
892
893	/*
894	* Utilization of the tasks "assigned" to this runqueue (including
895	* the tasks that are in runqueue and the tasks that executed on this
896	* CPU and blocked). Increased when a task moves to this runqueue, and
897	* decreased when the task moves away (migrates, changes scheduling
898	* policy, or terminates).
899	* This is needed to compute the "inactive utilization" for the
900	* runqueue (inactive utilization = this_bw - running_bw).
901	*/
902	u64 this_bw;
903	u64 extra_bw;
904
905	/*
906	* Maximum available bandwidth for reclaiming by SCHED_FLAG_RECLAIM
907	* tasks of this rq. Used in calculation of reclaimable bandwidth(GRUB).
908	*/
909	u64 max_bw;
910
911	/*
912	* Inverse of the fraction of CPU utilization that can be reclaimed
913	* by the GRUB algorithm.
914	*/
915	u64 bw_ratio;
916	};
917
918	#ifdef CONFIG_FAIR_GROUP_SCHED
919
920	/ An entity is a task if it doesn't "own" a runqueue /
921	#define entity_is_task(se) (!se->my_q)
922
923	static inline void se_update_runnable(struct sched_entity *se)
924	{
925	if (!entity_is_task(se))
926	se->runnable_weight = se->my_q->h_nr_runnable;
927	}
928
929	static inline long se_runnable(struct sched_entity *se)
930	{
931	if (se->sched_delayed)
932	return false;
933
934	if (entity_is_task(se))
935	return !!se->on_rq;
936	else
937	return se->runnable_weight;
938	}
939
940	#else /* !CONFIG_FAIR_GROUP_SCHED: */
941
942	#define entity_is_task(se) 1
943
944	static inline void se_update_runnable(struct sched_entity *se) { }
945
946	static inline long se_runnable(struct sched_entity *se)
947	{
948	if (se->sched_delayed)
949	return false;
950
951	return !!se->on_rq;
952	}
953
954	#endif /* !CONFIG_FAIR_GROUP_SCHED */
955
956	/*
957	* XXX we want to get rid of these helpers and use the full load resolution.
958	*/
959	static inline long se_weight(struct sched_entity *se)
960	{
961	return scale_load_down(se->load.weight);
962	}
963
964
965	static inline bool sched_asym_prefer(int a, int b)
966	{
967	return arch_asym_cpu_priority(cpu: a) > arch_asym_cpu_priority(cpu: b);
968	}
969
970	struct perf_domain {
971	struct em_perf_domain *em_pd;
972	struct perf_domain *next;
973	struct rcu_head rcu;
974	};
975
976	/*
977	* We add the notion of a root-domain which will be used to define per-domain
978	* variables. Each exclusive cpuset essentially defines an island domain by
979	* fully partitioning the member CPUs from any other cpuset. Whenever a new
980	* exclusive cpuset is created, we also create and attach a new root-domain
981	* object.
982	*
983	*/
984	struct root_domain {
985	atomic_t refcount;
986	atomic_t rto_count;
987	struct rcu_head rcu;
988	cpumask_var_t span;
989	cpumask_var_t online;
990
991	/*
992	* Indicate pullable load on at least one CPU, e.g:
993	* - More than one runnable task
994	* - Running task is misfit
995	*/
996	bool overloaded;
997
998	/ Indicate one or more CPUs over-utilized (tipping point) /
999	bool overutilized;
1000
1001	/*
1002	* The bit corresponding to a CPU gets set here if such CPU has more
1003	* than one runnable -deadline task (as it is below for RT tasks).
1004	*/
1005	cpumask_var_t dlo_mask;
1006	atomic_t dlo_count;
1007	struct dl_bw dl_bw;
1008	struct cpudl cpudl;
1009
1010	/*
1011	* Indicate whether a root_domain's dl_bw has been checked or
1012	* updated. It's monotonously increasing value.
1013	*
1014	* Also, some corner cases, like 'wrap around' is dangerous, but given
1015	* that u64 is 'big enough'. So that shouldn't be a concern.
1016	*/
1017	u64 visit_cookie;
1018
1019	#ifdef HAVE_RT_PUSH_IPI
1020	/*
1021	* For IPI pull requests, loop across the rto_mask.
1022	*/
1023	struct irq_work rto_push_work;
1024	raw_spinlock_t rto_lock;
1025	/ These are only updated and read within rto_lock /
1026	int rto_loop;
1027	int rto_cpu;
1028	/ These atomics are updated outside of a lock /
1029	atomic_t rto_loop_next;
1030	atomic_t rto_loop_start;
1031	#endif /* HAVE_RT_PUSH_IPI */
1032	/*
1033	* The "RT overload" flag: it gets set if a CPU has more than
1034	* one runnable RT task.
1035	*/
1036	cpumask_var_t rto_mask;
1037	struct cpupri cpupri;
1038
1039	/*
1040	* NULL-terminated list of performance domains intersecting with the
1041	* CPUs of the rd. Protected by RCU.
1042	*/
1043	struct perf_domain __rcu *pd;
1044	};
1045
1046	extern void init_defrootdomain(void);
1047	extern int sched_init_domains(const struct cpumask *cpu_map);
1048	extern void rq_attach_root(struct rq rq, struct* root_domain *rd);
1049	extern void sched_get_rd(struct root_domain *rd);
1050	extern void sched_put_rd(struct root_domain *rd);
1051
1052	static inline int get_rd_overloaded(struct root_domain *rd)
1053	{
1054	return READ_ONCE(rd->overloaded);
1055	}
1056
1057	static inline void set_rd_overloaded(struct root_domain rd, int* status)
1058	{
1059	if (get_rd_overloaded(rd) != status)
1060	WRITE_ONCE(rd->overloaded, status);
1061	}
1062
1063	#ifdef HAVE_RT_PUSH_IPI
1064	extern void rto_push_irq_work_func(struct irq_work *work);
1065	#endif
1066
1067	#ifdef CONFIG_UCLAMP_TASK
1068	/*
1069	* struct uclamp_bucket - Utilization clamp bucket
1070	* @value: utilization clamp value for tasks on this clamp bucket
1071	* @tasks: number of RUNNABLE tasks on this clamp bucket
1072	*
1073	* Keep track of how many tasks are RUNNABLE for a given utilization
1074	* clamp value.
1075	*/
1076	struct uclamp_bucket {
1077	unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
1078	unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
1079	};
1080
1081	/*
1082	* struct uclamp_rq - rq's utilization clamp
1083	* @value: currently active clamp values for a rq
1084	* @bucket: utilization clamp buckets affecting a rq
1085	*
1086	* Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
1087	* A clamp value is affecting a rq when there is at least one task RUNNABLE
1088	* (or actually running) with that value.
1089	*
1090	* There are up to UCLAMP_CNT possible different clamp values, currently there
1091	* are only two: minimum utilization and maximum utilization.
1092	*
1093	* All utilization clamping values are MAX aggregated, since:
1094	* - for util_min: we want to run the CPU at least at the max of the minimum
1095	* utilization required by its currently RUNNABLE tasks.
1096	* - for util_max: we want to allow the CPU to run up to the max of the
1097	* maximum utilization allowed by its currently RUNNABLE tasks.
1098	*
1099	* Since on each system we expect only a limited number of different
1100	* utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
1101	* the metrics required to compute all the per-rq utilization clamp values.
1102	*/
1103	struct uclamp_rq {
1104	unsigned int value;
1105	struct uclamp_bucket bucket[UCLAMP_BUCKETS];
1106	};
1107
1108	DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
1109	#endif /* CONFIG_UCLAMP_TASK */
1110
1111	/*
1112	* This is the main, per-CPU runqueue data structure.
1113	*
1114	* Locking rule: those places that want to lock multiple runqueues
1115	* (such as the load balancing or the thread migration code), lock
1116	* acquire operations must be ordered by ascending &runqueue.
1117	*/
1118	struct rq {
1119	/ runqueue lock: /
1120	raw_spinlock_t __lock;
1121
1122	unsigned int nr_running;
1123	#ifdef CONFIG_NUMA_BALANCING
1124	unsigned int nr_numa_running;
1125	unsigned int nr_preferred_running;
1126	unsigned int numa_migrate_on;
1127	#endif
1128	#ifdef CONFIG_NO_HZ_COMMON
1129	unsigned long last_blocked_load_update_tick;
1130	unsigned int has_blocked_load;
1131	call_single_data_t nohz_csd;
1132	unsigned int nohz_tick_stopped;
1133	atomic_t nohz_flags;
1134	#endif /* CONFIG_NO_HZ_COMMON */
1135
1136	unsigned int ttwu_pending;
1137	u64 nr_switches;
1138
1139	#ifdef CONFIG_UCLAMP_TASK
1140	/ Utilization clamp values based on CPU's RUNNABLE tasks /
1141	struct uclamp_rq uclamp[UCLAMP_CNT] ____cacheline_aligned;
1142	unsigned int uclamp_flags;
1143	#define UCLAMP_FLAG_IDLE 0x01
1144	#endif
1145
1146	struct cfs_rq cfs;
1147	struct rt_rq rt;
1148	struct dl_rq dl;
1149	#ifdef CONFIG_SCHED_CLASS_EXT
1150	struct scx_rq scx;
1151	#endif
1152
1153	struct sched_dl_entity fair_server;
1154
1155	#ifdef CONFIG_FAIR_GROUP_SCHED
1156	/ list of leaf cfs_rq on this CPU: /
1157	struct list_head leaf_cfs_rq_list;
1158	struct list_head *tmp_alone_branch;
1159	#endif /* CONFIG_FAIR_GROUP_SCHED */
1160
1161	/*
1162	* This is part of a global counter where only the total sum
1163	* over all CPUs matters. A task can increase this counter on
1164	* one CPU and if it got migrated afterwards it may decrease
1165	* it on another CPU. Always updated under the runqueue lock:
1166	*/
1167	unsigned long nr_uninterruptible;
1168
1169	#ifdef CONFIG_SCHED_PROXY_EXEC
1170	struct task_struct __rcu donor; /* Scheduling context /
1171	struct task_struct __rcu curr; /* Execution context /
1172	#else
1173	union {
1174	struct task_struct __rcu donor; /* Scheduler context /
1175	struct task_struct __rcu curr; /* Execution context /
1176	};
1177	#endif
1178	struct sched_dl_entity *dl_server;
1179	struct task_struct *idle;
1180	struct task_struct *stop;
1181	unsigned long next_balance;
1182	struct mm_struct *prev_mm;
1183
1184	unsigned int clock_update_flags;
1185	u64 clock;
1186	/ Ensure that all clocks are in the same cache line /
1187	u64 clock_task ____cacheline_aligned;
1188	u64 clock_pelt;
1189	unsigned long lost_idle_time;
1190	u64 clock_pelt_idle;
1191	u64 clock_idle;
1192	#ifndef CONFIG_64BIT
1193	u64 clock_pelt_idle_copy;
1194	u64 clock_idle_copy;
1195	#endif
1196
1197	atomic_t nr_iowait;
1198
1199	u64 last_seen_need_resched_ns;
1200	int ticks_without_resched;
1201
1202	#ifdef CONFIG_MEMBARRIER
1203	int membarrier_state;
1204	#endif
1205
1206	struct root_domain *rd;
1207	struct sched_domain __rcu *sd;
1208
1209	unsigned long cpu_capacity;
1210
1211	struct balance_callback *balance_callback;
1212
1213	unsigned char nohz_idle_balance;
1214	unsigned char idle_balance;
1215
1216	unsigned long misfit_task_load;
1217
1218	/ For active balancing /
1219	int active_balance;
1220	int push_cpu;
1221	struct cpu_stop_work active_balance_work;
1222
1223	/ CPU of this runqueue: /
1224	int cpu;
1225	int online;
1226
1227	struct list_head cfs_tasks;
1228
1229	struct sched_avg avg_rt;
1230	struct sched_avg avg_dl;
1231	#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
1232	struct sched_avg avg_irq;
1233	#endif
1234	#ifdef CONFIG_SCHED_HW_PRESSURE
1235	struct sched_avg avg_hw;
1236	#endif
1237	u64 idle_stamp;
1238	u64 avg_idle;
1239
1240	/ This is used to determine avg_idle's max value /
1241	u64 max_idle_balance_cost;
1242
1243	#ifdef CONFIG_HOTPLUG_CPU
1244	struct rcuwait hotplug_wait;
1245	#endif
1246
1247	#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1248	u64 prev_irq_time;
1249	u64 psi_irq_time;
1250	#endif
1251	#ifdef CONFIG_PARAVIRT
1252	u64 prev_steal_time;
1253	#endif
1254	#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
1255	u64 prev_steal_time_rq;
1256	#endif
1257
1258	/ calc_load related fields /
1259	unsigned long calc_load_update;
1260	long calc_load_active;
1261
1262	#ifdef CONFIG_SCHED_HRTICK
1263	call_single_data_t hrtick_csd;
1264	struct hrtimer hrtick_timer;
1265	ktime_t hrtick_time;
1266	#endif
1267
1268	#ifdef CONFIG_SCHEDSTATS
1269	/ latency stats /
1270	struct sched_info rq_sched_info;
1271	unsigned long long rq_cpu_time;
1272
1273	/ sys_sched_yield() stats /
1274	unsigned int yld_count;
1275
1276	/ schedule() stats /
1277	unsigned int sched_count;
1278	unsigned int sched_goidle;
1279
1280	/ try_to_wake_up() stats /
1281	unsigned int ttwu_count;
1282	unsigned int ttwu_local;
1283	#endif
1284
1285	#ifdef CONFIG_CPU_IDLE
1286	/ Must be inspected within a RCU lock section /
1287	struct cpuidle_state *idle_state;
1288	#endif
1289
1290	unsigned int nr_pinned;
1291	unsigned int push_busy;
1292	struct cpu_stop_work push_work;
1293
1294	#ifdef CONFIG_SCHED_CORE
1295	/ per rq /
1296	struct rq *core;
1297	struct task_struct *core_pick;
1298	struct sched_dl_entity *core_dl_server;
1299	unsigned int core_enabled;
1300	unsigned int core_sched_seq;
1301	struct rb_root core_tree;
1302
1303	/ shared state -- careful with sched_core_cpu_deactivate() /
1304	unsigned int core_task_seq;
1305	unsigned int core_pick_seq;
1306	unsigned long core_cookie;
1307	unsigned int core_forceidle_count;
1308	unsigned int core_forceidle_seq;
1309	unsigned int core_forceidle_occupation;
1310	u64 core_forceidle_start;
1311	#endif /* CONFIG_SCHED_CORE */
1312
1313	/ Scratch cpumask to be temporarily used under rq_lock /
1314	cpumask_var_t scratch_mask;
1315
1316	#ifdef CONFIG_CFS_BANDWIDTH
1317	call_single_data_t cfsb_csd;
1318	struct list_head cfsb_csd_list;
1319	#endif
1320	};
1321
1322	#ifdef CONFIG_FAIR_GROUP_SCHED
1323
1324	/ CPU runqueue to which this cfs_rq is attached /
1325	static inline struct rq rq_of(struct* cfs_rq *cfs_rq)
1326	{
1327	return cfs_rq->rq;
1328	}
1329
1330	#else /* !CONFIG_FAIR_GROUP_SCHED: */
1331
1332	static inline struct rq rq_of(struct* cfs_rq *cfs_rq)
1333	{
1334	return container_of(cfs_rq, struct rq, cfs);
1335	}
1336	#endif /* !CONFIG_FAIR_GROUP_SCHED */
1337
1338	static inline int cpu_of(struct rq *rq)
1339	{
1340	return rq->cpu;
1341	}
1342
1343	#define MDF_PUSH 0x01
1344
1345	static inline bool is_migration_disabled(struct task_struct *p)
1346	{
1347	return p->migration_disabled;
1348	}
1349
1350	DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
1351
1352	#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
1353	#define this_rq() this_cpu_ptr(&runqueues)
1354	#define task_rq(p) cpu_rq(task_cpu(p))
1355	#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
1356	#define raw_rq() raw_cpu_ptr(&runqueues)
1357
1358	#ifdef CONFIG_SCHED_PROXY_EXEC
1359	static inline void rq_set_donor(struct rq rq, struct* task_struct *t)
1360	{
1361	rcu_assign_pointer(rq->donor, t);
1362	}
1363	#else
1364	static inline void rq_set_donor(struct rq rq, struct* task_struct *t)
1365	{
1366	/ Do nothing /
1367	}
1368	#endif
1369
1370	#ifdef CONFIG_SCHED_CORE
1371	static inline struct cpumask sched_group_span(struct* sched_group *sg);
1372
1373	DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
1374
1375	static inline bool sched_core_enabled(struct rq *rq)
1376	{
1377	return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;
1378	}
1379
1380	static inline bool sched_core_disabled(void)
1381	{
1382	return !static_branch_unlikely(&__sched_core_enabled);
1383	}
1384
1385	/*
1386	* Be careful with this function; not for general use. The return value isn't
1387	* stable unless you actually hold a relevant rq->__lock.
1388	*/
1389	static inline raw_spinlock_t rq_lockp(struct* rq *rq)
1390	{
1391	if (sched_core_enabled(rq))
1392	return &rq->core->__lock;
1393
1394	return &rq->__lock;
1395	}
1396
1397	static inline raw_spinlock_t __rq_lockp(struct* rq *rq)
1398	{
1399	if (rq->core_enabled)
1400	return &rq->core->__lock;
1401
1402	return &rq->__lock;
1403	}
1404
1405	extern bool
1406	cfs_prio_less(const struct task_struct a, const* struct task_struct *b, bool fi);
1407
1408	extern void task_vruntime_update(struct rq rq, struct* task_struct *p, bool in_fi);
1409
1410	/*
1411	* Helpers to check if the CPU's core cookie matches with the task's cookie
1412	* when core scheduling is enabled.
1413	* A special case is that the task's cookie always matches with CPU's core
1414	* cookie if the CPU is in an idle core.
1415	*/
1416	static inline bool sched_cpu_cookie_match(struct rq rq, struct* task_struct *p)
1417	{
1418	/ Ignore cookie match if core scheduler is not enabled on the CPU. /
1419	if (!sched_core_enabled(rq))
1420	return true;
1421
1422	return rq->core->core_cookie == p->core_cookie;
1423	}
1424
1425	static inline bool sched_core_cookie_match(struct rq rq, struct* task_struct *p)
1426	{
1427	bool idle_core = true;
1428	int cpu;
1429
1430	/ Ignore cookie match if core scheduler is not enabled on the CPU. /
1431	if (!sched_core_enabled(rq))
1432	return true;
1433
1434	for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
1435	if (!available_idle_cpu(cpu)) {
1436	idle_core = false;
1437	break;
1438	}
1439	}
1440
1441	/*
1442	* A CPU in an idle core is always the best choice for tasks with
1443	* cookies.
1444	*/
1445	return idle_core \|\| rq->core->core_cookie == p->core_cookie;
1446	}
1447
1448	static inline bool sched_group_cookie_match(struct rq *rq,
1449	struct task_struct *p,
1450	struct sched_group *group)
1451	{
1452	int cpu;
1453
1454	/ Ignore cookie match if core scheduler is not enabled on the CPU. /
1455	if (!sched_core_enabled(rq))
1456	return true;
1457
1458	for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
1459	if (sched_core_cookie_match(cpu_rq(cpu), p))
1460	return true;
1461	}
1462	return false;
1463	}
1464
1465	static inline bool sched_core_enqueued(struct task_struct *p)
1466	{
1467	return !RB_EMPTY_NODE(&p->core_node);
1468	}
1469
1470	extern void sched_core_enqueue(struct rq rq, struct* task_struct *p);
1471	extern void sched_core_dequeue(struct rq rq, struct* task_struct p, int* flags);
1472
1473	extern void sched_core_get(void);
1474	extern void sched_core_put(void);
1475
1476	#else /* !CONFIG_SCHED_CORE: */
1477
1478	static inline bool sched_core_enabled(struct rq *rq)
1479	{
1480	return false;
1481	}
1482
1483	static inline bool sched_core_disabled(void)
1484	{
1485	return true;
1486	}
1487
1488	static inline raw_spinlock_t rq_lockp(struct* rq *rq)
1489	{
1490	return &rq->__lock;
1491	}
1492
1493	static inline raw_spinlock_t __rq_lockp(struct* rq *rq)
1494	{
1495	return &rq->__lock;
1496	}
1497
1498	static inline bool sched_cpu_cookie_match(struct rq rq, struct* task_struct *p)
1499	{
1500	return true;
1501	}
1502
1503	static inline bool sched_core_cookie_match(struct rq rq, struct* task_struct *p)
1504	{
1505	return true;
1506	}
1507
1508	static inline bool sched_group_cookie_match(struct rq *rq,
1509	struct task_struct *p,
1510	struct sched_group *group)
1511	{
1512	return true;
1513	}
1514
1515	#endif /* !CONFIG_SCHED_CORE */
1516
1517	#ifdef CONFIG_RT_GROUP_SCHED
1518	# ifdef CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED
1519	DECLARE_STATIC_KEY_FALSE(rt_group_sched);
1520	static inline bool rt_group_sched_enabled(void)
1521	{
1522	return static_branch_unlikely(&rt_group_sched);
1523	}
1524	# else /* !CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED: */
1525	DECLARE_STATIC_KEY_TRUE(rt_group_sched);
1526	static inline bool rt_group_sched_enabled(void)
1527	{
1528	return static_branch_likely(&rt_group_sched);
1529	}
1530	# endif /* !CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED */
1531	#else /* !CONFIG_RT_GROUP_SCHED: */
1532	# define rt_group_sched_enabled() false
1533	#endif /* !CONFIG_RT_GROUP_SCHED */
1534
1535	static inline void lockdep_assert_rq_held(struct rq *rq)
1536	{
1537	lockdep_assert_held(__rq_lockp(rq));
1538	}
1539
1540	extern void raw_spin_rq_lock_nested(struct rq rq, int* subclass);
1541	extern bool raw_spin_rq_trylock(struct rq *rq);
1542	extern void raw_spin_rq_unlock(struct rq *rq);
1543
1544	static inline void raw_spin_rq_lock(struct rq *rq)
1545	{
1546	raw_spin_rq_lock_nested(rq, subclass: `0`);
1547	}
1548
1549	static inline void raw_spin_rq_lock_irq(struct rq *rq)
1550	{
1551	local_irq_disable();
1552	raw_spin_rq_lock(rq);
1553	}
1554
1555	static inline void raw_spin_rq_unlock_irq(struct rq *rq)
1556	{
1557	raw_spin_rq_unlock(rq);
1558	local_irq_enable();
1559	}
1560
1561	static inline unsigned long _raw_spin_rq_lock_irqsave(struct rq *rq)
1562	{
1563	unsigned long flags;
1564
1565	local_irq_save(flags);
1566	raw_spin_rq_lock(rq);
1567
1568	return flags;
1569	}
1570
1571	static inline void raw_spin_rq_unlock_irqrestore(struct rq rq, unsigned* long flags)
1572	{
1573	raw_spin_rq_unlock(rq);
1574	local_irq_restore(flags);
1575	}
1576
1577	#define raw_spin_rq_lock_irqsave(rq, flags) \
1578	do { \
1579	flags = _raw_spin_rq_lock_irqsave(rq); \
1580	} while (0)
1581
1582	#ifdef CONFIG_SCHED_SMT
1583	extern void __update_idle_core(struct rq *rq);
1584
1585	static inline void update_idle_core(struct rq *rq)
1586	{
1587	if (static_branch_unlikely(&sched_smt_present))
1588	__update_idle_core(rq);
1589	}
1590
1591	#else /* !CONFIG_SCHED_SMT: */
1592	static inline void update_idle_core(struct rq *rq) { }
1593	#endif /* !CONFIG_SCHED_SMT */
1594
1595	#ifdef CONFIG_FAIR_GROUP_SCHED
1596
1597	static inline struct task_struct task_of(struct* sched_entity *se)
1598	{
1599	WARN_ON_ONCE(!entity_is_task(se));
1600	return container_of(se, struct task_struct, se);
1601	}
1602
1603	static inline struct cfs_rq task_cfs_rq(struct* task_struct *p)
1604	{
1605	return p->se.cfs_rq;
1606	}
1607
1608	/ runqueue on which this entity is (to be) queued /
1609	static inline struct cfs_rq cfs_rq_of(const* struct sched_entity *se)
1610	{
1611	return se->cfs_rq;
1612	}
1613
1614	/ runqueue "owned" by this group /
1615	static inline struct cfs_rq group_cfs_rq(struct* sched_entity *grp)
1616	{
1617	return grp->my_q;
1618	}
1619
1620	#else /* !CONFIG_FAIR_GROUP_SCHED: */
1621
1622	#define task_of(_se) container_of(_se, struct task_struct, se)
1623
1624	static inline struct cfs_rq task_cfs_rq(const* struct task_struct *p)
1625	{
1626	return &task_rq(p)->cfs;
1627	}
1628
1629	static inline struct cfs_rq cfs_rq_of(const* struct sched_entity *se)
1630	{
1631	const struct task_struct *p = task_of(se);
1632	struct rq *rq = task_rq(p);
1633
1634	return &rq->cfs;
1635	}
1636
1637	/ runqueue "owned" by this group /
1638	static inline struct cfs_rq group_cfs_rq(struct* sched_entity *grp)
1639	{
1640	return NULL;
1641	}
1642
1643	#endif /* !CONFIG_FAIR_GROUP_SCHED */
1644
1645	extern void update_rq_clock(struct rq *rq);
1646
1647	/*
1648	* rq::clock_update_flags bits
1649	*
1650	* %RQCF_REQ_SKIP - will request skipping of clock update on the next
1651	* call to __schedule(). This is an optimisation to avoid
1652	* neighbouring rq clock updates.
1653	*
1654	* %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
1655	* in effect and calls to update_rq_clock() are being ignored.
1656	*
1657	* %RQCF_UPDATED - is a debug flag that indicates whether a call has been
1658	* made to update_rq_clock() since the last time rq::lock was pinned.
1659	*
1660	* If inside of __schedule(), clock_update_flags will have been
1661	* shifted left (a left shift is a cheap operation for the fast path
1662	* to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
1663	*
1664	* if (rq-clock_update_flags >= RQCF_UPDATED)
1665	*
1666	* to check if %RQCF_UPDATED is set. It'll never be shifted more than
1667	* one position though, because the next rq_unpin_lock() will shift it
1668	* back.
1669	*/
1670	#define RQCF_REQ_SKIP 0x01
1671	#define RQCF_ACT_SKIP 0x02
1672	#define RQCF_UPDATED 0x04
1673
1674	static inline void assert_clock_updated(struct rq *rq)
1675	{
1676	/*
1677	* The only reason for not seeing a clock update since the
1678	* last rq_pin_lock() is if we're currently skipping updates.
1679	*/
1680	WARN_ON_ONCE(rq->clock_update_flags < RQCF_ACT_SKIP);
1681	}
1682
1683	static inline u64 rq_clock(struct rq *rq)
1684	{
1685	lockdep_assert_rq_held(rq);
1686	assert_clock_updated(rq);
1687
1688	return rq->clock;
1689	}
1690
1691	static inline u64 rq_clock_task(struct rq *rq)
1692	{
1693	lockdep_assert_rq_held(rq);
1694	assert_clock_updated(rq);
1695
1696	return rq->clock_task;
1697	}
1698
1699	static inline void rq_clock_skip_update(struct rq *rq)
1700	{
1701	lockdep_assert_rq_held(rq);
1702	rq->clock_update_flags \|= RQCF_REQ_SKIP;
1703	}
1704
1705	/*
1706	* See rt task throttling, which is the only time a skip
1707	* request is canceled.
1708	*/
1709	static inline void rq_clock_cancel_skipupdate(struct rq *rq)
1710	{
1711	lockdep_assert_rq_held(rq);
1712	rq->clock_update_flags &= ~RQCF_REQ_SKIP;
1713	}
1714
1715	/*
1716	* During cpu offlining and rq wide unthrottling, we can trigger
1717	* an update_rq_clock() for several cfs and rt runqueues (Typically
1718	* when using list_for_each_entry_*)
1719	* rq_clock_start_loop_update() can be called after updating the clock
1720	* once and before iterating over the list to prevent multiple update.
1721	* After the iterative traversal, we need to call rq_clock_stop_loop_update()
1722	* to clear RQCF_ACT_SKIP of rq->clock_update_flags.
1723	*/
1724	static inline void rq_clock_start_loop_update(struct rq *rq)
1725	{
1726	lockdep_assert_rq_held(rq);
1727	WARN_ON_ONCE(rq->clock_update_flags & RQCF_ACT_SKIP);
1728	rq->clock_update_flags \|= RQCF_ACT_SKIP;
1729	}
1730
1731	static inline void rq_clock_stop_loop_update(struct rq *rq)
1732	{
1733	lockdep_assert_rq_held(rq);
1734	rq->clock_update_flags &= ~RQCF_ACT_SKIP;
1735	}
1736
1737	struct rq_flags {
1738	unsigned long flags;
1739	struct pin_cookie cookie;
1740	/*
1741	* A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
1742	* current pin context is stashed here in case it needs to be
1743	* restored in rq_repin_lock().
1744	*/
1745	unsigned int clock_update_flags;
1746	};
1747
1748	extern struct balance_callback balance_push_callback;
1749
1750	#ifdef CONFIG_SCHED_CLASS_EXT
1751	extern const struct sched_class ext_sched_class;
1752
1753	DECLARE_STATIC_KEY_FALSE(__scx_enabled); / SCX BPF scheduler loaded /
1754	DECLARE_STATIC_KEY_FALSE(__scx_switched_all); / all fair class tasks on SCX /
1755
1756	#define scx_enabled() static_branch_unlikely(&__scx_enabled)
1757	#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
1758
1759	static inline void scx_rq_clock_update(struct rq *rq, u64 clock)
1760	{
1761	if (!scx_enabled())
1762	return;
1763	WRITE_ONCE(rq->scx.clock, clock);
1764	smp_store_release(&rq->scx.flags, rq->scx.flags \| SCX_RQ_CLK_VALID);
1765	}
1766
1767	static inline void scx_rq_clock_invalidate(struct rq *rq)
1768	{
1769	if (!scx_enabled())
1770	return;
1771	WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
1772	}
1773
1774	#else /* !CONFIG_SCHED_CLASS_EXT: */
1775	#define scx_enabled() false
1776	#define scx_switched_all() false
1777
1778	static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {}
1779	static inline void scx_rq_clock_invalidate(struct rq *rq) {}
1780	#endif /* !CONFIG_SCHED_CLASS_EXT */
1781
1782	/*
1783	* Lockdep annotation that avoids accidental unlocks; it's like a
1784	* sticky/continuous lockdep_assert_held().
1785	*
1786	* This avoids code that has access to 'struct rq *rq' (basically everything in
1787	* the scheduler) from accidentally unlocking the rq if they do not also have a
1788	* copy of the (on-stack) 'struct rq_flags rf'.
1789	*
1790	* Also see Documentation/locking/lockdep-design.rst.
1791	*/
1792	static inline void rq_pin_lock(struct rq rq, struct* rq_flags *rf)
1793	{
1794	rf->cookie = lockdep_pin_lock(__rq_lockp(rq));
1795
1796	rq->clock_update_flags &= (RQCF_REQ_SKIP\|RQCF_ACT_SKIP);
1797	rf->clock_update_flags = `0`;
1798	WARN_ON_ONCE(rq->balance_callback && rq->balance_callback != &balance_push_callback);
1799	}
1800
1801	static inline void rq_unpin_lock(struct rq rq, struct* rq_flags *rf)
1802	{
1803	if (rq->clock_update_flags > RQCF_ACT_SKIP)
1804	rf->clock_update_flags = RQCF_UPDATED;
1805
1806	scx_rq_clock_invalidate(rq);
1807	lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
1808	}
1809
1810	static inline void rq_repin_lock(struct rq rq, struct* rq_flags *rf)
1811	{
1812	lockdep_repin_lock(__rq_lockp(rq), rf->cookie);
1813
1814	/*
1815	* Restore the value we stashed in @rf for this pin context.
1816	*/
1817	rq->clock_update_flags \|= rf->clock_update_flags;
1818	}
1819
1820	extern
1821	struct rq __task_rq_lock(struct* task_struct p, struct* rq_flags *rf)
1822	__acquires(rq->lock);
1823
1824	extern
1825	struct rq task_rq_lock(struct* task_struct p, struct* rq_flags *rf)
1826	__acquires(p->pi_lock)
1827	__acquires(rq->lock);
1828
1829	static inline void __task_rq_unlock(struct rq rq, struct* rq_flags *rf)
1830	__releases(rq->lock)
1831	{
1832	rq_unpin_lock(rq, rf);
1833	raw_spin_rq_unlock(rq);
1834	}
1835
1836	static inline void
1837	task_rq_unlock(struct rq rq, struct* task_struct p, struct* rq_flags *rf)
1838	__releases(rq->lock)
1839	__releases(p->pi_lock)
1840	{
1841	rq_unpin_lock(rq, rf);
1842	raw_spin_rq_unlock(rq);
1843	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
1844	}
1845
1846	DEFINE_LOCK_GUARD_1(task_rq_lock, struct task_struct,
1847	_T->rq = task_rq_lock(_T->lock, &_T->rf),
1848	task_rq_unlock(_T->rq, _T->lock, &_T->rf),
1849	struct rq rq; struct* rq_flags rf)
1850
1851	static inline void rq_lock_irqsave(struct rq rq, struct* rq_flags *rf)
1852	__acquires(rq->lock)
1853	{
1854	raw_spin_rq_lock_irqsave(rq, rf->flags);
1855	rq_pin_lock(rq, rf);
1856	}
1857
1858	static inline void rq_lock_irq(struct rq rq, struct* rq_flags *rf)
1859	__acquires(rq->lock)
1860	{
1861	raw_spin_rq_lock_irq(rq);
1862	rq_pin_lock(rq, rf);
1863	}
1864
1865	static inline void rq_lock(struct rq rq, struct* rq_flags *rf)
1866	__acquires(rq->lock)
1867	{
1868	raw_spin_rq_lock(rq);
1869	rq_pin_lock(rq, rf);
1870	}
1871
1872	static inline void rq_unlock_irqrestore(struct rq rq, struct* rq_flags *rf)
1873	__releases(rq->lock)
1874	{
1875	rq_unpin_lock(rq, rf);
1876	raw_spin_rq_unlock_irqrestore(rq, flags: rf->flags);
1877	}
1878
1879	static inline void rq_unlock_irq(struct rq rq, struct* rq_flags *rf)
1880	__releases(rq->lock)
1881	{
1882	rq_unpin_lock(rq, rf);
1883	raw_spin_rq_unlock_irq(rq);
1884	}
1885
1886	static inline void rq_unlock(struct rq rq, struct* rq_flags *rf)
1887	__releases(rq->lock)
1888	{
1889	rq_unpin_lock(rq, rf);
1890	raw_spin_rq_unlock(rq);
1891	}
1892
1893	DEFINE_LOCK_GUARD_1(rq_lock, struct rq,
1894	rq_lock(_T->lock, &_T->rf),
1895	rq_unlock(_T->lock, &_T->rf),
1896	struct rq_flags rf)
1897
1898	DEFINE_LOCK_GUARD_1(rq_lock_irq, struct rq,
1899	rq_lock_irq(_T->lock, &_T->rf),
1900	rq_unlock_irq(_T->lock, &_T->rf),
1901	struct rq_flags rf)
1902
1903	DEFINE_LOCK_GUARD_1(rq_lock_irqsave, struct rq,
1904	rq_lock_irqsave(_T->lock, &_T->rf),
1905	rq_unlock_irqrestore(_T->lock, &_T->rf),
1906	struct rq_flags rf)
1907
1908	static inline struct rq this_rq_lock_irq(struct* rq_flags *rf)
1909	__acquires(rq->lock)
1910	{
1911	struct rq *rq;
1912
1913	local_irq_disable();
1914	rq = this_rq();
1915	rq_lock(rq, rf);
1916
1917	return rq;
1918	}
1919
1920	#ifdef CONFIG_NUMA
1921
1922	enum numa_topology_type {
1923	NUMA_DIRECT,
1924	NUMA_GLUELESS_MESH,
1925	NUMA_BACKPLANE,
1926	};
1927
1928	extern enum numa_topology_type sched_numa_topology_type;
1929	extern int sched_max_numa_distance;
1930	extern bool find_numa_distance(int distance);
1931	extern void sched_init_numa(int offline_node);
1932	extern void sched_update_numa(int cpu, bool online);
1933	extern void sched_domains_numa_masks_set(unsigned int cpu);
1934	extern void sched_domains_numa_masks_clear(unsigned int cpu);
1935	extern int sched_numa_find_closest(const struct cpumask cpus, int* cpu);
1936
1937	#else /* !CONFIG_NUMA: */
1938
1939	static inline void sched_init_numa(int offline_node) { }
1940	static inline void sched_update_numa(int cpu, bool online) { }
1941	static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
1942	static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
1943
1944	static inline int sched_numa_find_closest(const struct cpumask cpus, int* cpu)
1945	{
1946	return nr_cpu_ids;
1947	}
1948
1949	#endif /* !CONFIG_NUMA */
1950
1951	#ifdef CONFIG_NUMA_BALANCING
1952
1953	/ The regions in numa_faults array from task_struct /
1954	enum numa_faults_stats {
1955	NUMA_MEM = `0`,
1956	NUMA_CPU,
1957	NUMA_MEMBUF,
1958	NUMA_CPUBUF
1959	};
1960
1961	extern void sched_setnuma(struct task_struct p, int* node);
1962	extern int migrate_task_to(struct task_struct p, int* cpu);
1963	extern int migrate_swap(struct task_struct p, struct* task_struct *t,
1964	int cpu, int scpu);
1965	extern void init_numa_balancing(u64 clone_flags, struct task_struct *p);
1966
1967	#else /* !CONFIG_NUMA_BALANCING: */
1968
1969	static inline void
1970	init_numa_balancing(u64 clone_flags, struct task_struct *p)
1971	{
1972	}
1973
1974	#endif /* !CONFIG_NUMA_BALANCING */
1975
1976	static inline void
1977	queue_balance_callback(struct rq *rq,
1978	struct balance_callback *head,
1979	void (func)(struct* rq *rq))
1980	{
1981	lockdep_assert_rq_held(rq);
1982
1983	/*
1984	* Don't (re)queue an already queued item; nor queue anything when
1985	* balance_push() is active, see the comment with
1986	* balance_push_callback.
1987	*/
1988	if (unlikely(head->next \|\| rq->balance_callback == &balance_push_callback))
1989	return;
1990
1991	head->func = func;
1992	head->next = rq->balance_callback;
1993	rq->balance_callback = head;
1994	}
1995
1996	#define rcu_dereference_check_sched_domain(p) \
1997	rcu_dereference_check((p), lockdep_is_held(&sched_domains_mutex))
1998
1999	/*
2000	* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
2001	* See destroy_sched_domains: call_rcu for details.
2002	*
2003	* The domain tree of any CPU may only be accessed from within
2004	* preempt-disabled sections.
2005	*/
2006	#define for_each_domain(cpu, __sd) \
2007	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
2008	__sd; __sd = __sd->parent)
2009
2010	/ A mask of all the SD flags that have the SDF_SHARED_CHILD metaflag /
2011	#define SD_FLAG(name, mflags) (name * !!((mflags) & SDF_SHARED_CHILD)) \|
2012	static const unsigned int SD_SHARED_CHILD_MASK =
2013	#include <linux/sched/sd_flags.h>
2014	`0`;
2015	#undef SD_FLAG
2016
2017	/**
2018	* highest_flag_domain - Return highest sched_domain containing flag.
2019	* @cpu: The CPU whose highest level of sched domain is to
2020	* be returned.
2021	* @flag: The flag to check for the highest sched_domain
2022	* for the given CPU.
2023	*
2024	* Returns the highest sched_domain of a CPU which contains @flag. If @flag has
2025	* the SDF_SHARED_CHILD metaflag, all the children domains also have @flag.
2026	*/
2027	static inline struct sched_domain highest_flag_domain(int* cpu, int flag)
2028	{
2029	struct sched_domain sd, hsd = NULL;
2030
2031	for_each_domain(cpu, sd) {
2032	if (sd->flags & flag) {
2033	hsd = sd;
2034	continue;
2035	}
2036
2037	/*
2038	* Stop the search if @flag is known to be shared at lower
2039	* levels. It will not be found further up.
2040	*/
2041	if (flag & SD_SHARED_CHILD_MASK)
2042	break;
2043	}
2044
2045	return hsd;
2046	}
2047
2048	static inline struct sched_domain lowest_flag_domain(int* cpu, int flag)
2049	{
2050	struct sched_domain *sd;
2051
2052	for_each_domain(cpu, sd) {
2053	if (sd->flags & flag)
2054	break;
2055	}
2056
2057	return sd;
2058	}
2059
2060	DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
2061	DECLARE_PER_CPU(int, sd_llc_size);
2062	DECLARE_PER_CPU(int, sd_llc_id);
2063	DECLARE_PER_CPU(int, sd_share_id);
2064	DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
2065	DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
2066	DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
2067	DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
2068
2069	extern struct static_key_false sched_asym_cpucapacity;
2070	extern struct static_key_false sched_cluster_active;
2071
2072	static __always_inline bool sched_asym_cpucap_active(void)
2073	{
2074	return static_branch_unlikely(&sched_asym_cpucapacity);
2075	}
2076
2077	struct sched_group_capacity {
2078	atomic_t ref;
2079	/*
2080	* CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
2081	* for a single CPU.
2082	*/
2083	unsigned long capacity;
2084	unsigned long min_capacity; / Min per-CPU capacity in group /
2085	unsigned long max_capacity; / Max per-CPU capacity in group /
2086	unsigned long next_update;
2087	int imbalance; / XXX unrelated to capacity but shared group state /
2088
2089	int id;
2090
2091	unsigned long cpumask[]; / Balance mask /
2092	};
2093
2094	struct sched_group {
2095	struct sched_group next; /* Must be a circular list /
2096	atomic_t ref;
2097
2098	unsigned int group_weight;
2099	unsigned int cores;
2100	struct sched_group_capacity *sgc;
2101	int asym_prefer_cpu; / CPU of highest priority in group /
2102	int flags;
2103
2104	/*
2105	* The CPUs this group covers.
2106	*
2107	* NOTE: this field is variable length. (Allocated dynamically
2108	* by attaching extra space to the end of the structure,
2109	* depending on how many CPUs the kernel has booted up with)
2110	*/
2111	unsigned long cpumask[];
2112	};
2113
2114	static inline struct cpumask sched_group_span(struct* sched_group *sg)
2115	{
2116	return to_cpumask(sg->cpumask);
2117	}
2118
2119	/*
2120	* See build_balance_mask().
2121	*/
2122	static inline struct cpumask group_balance_mask(struct* sched_group *sg)
2123	{
2124	return to_cpumask(sg->sgc->cpumask);
2125	}
2126
2127	extern int group_balance_cpu(struct sched_group *sg);
2128
2129	extern void update_sched_domain_debugfs(void);
2130	extern void dirty_sched_domain_sysctl(int cpu);
2131
2132	extern int sched_update_scaling(void);
2133
2134	static inline const struct cpumask task_user_cpus(struct* task_struct *p)
2135	{
2136	if (!p->user_cpus_ptr)
2137	return cpu_possible_mask; / &init_task.cpus_mask /
2138	return p->user_cpus_ptr;
2139	}
2140
2141	#ifdef CONFIG_CGROUP_SCHED
2142
2143	/*
2144	* Return the group to which this tasks belongs.
2145	*
2146	* We cannot use task_css() and friends because the cgroup subsystem
2147	* changes that value before the cgroup_subsys::attach() method is called,
2148	* therefore we cannot pin it and might observe the wrong value.
2149	*
2150	* The same is true for autogroup's p->signal->autogroup->tg, the autogroup
2151	* core changes this before calling sched_move_task().
2152	*
2153	* Instead we use a 'copy' which is updated from sched_move_task() while
2154	* holding both task_struct::pi_lock and rq::lock.
2155	*/
2156	static inline struct task_group task_group(struct* task_struct *p)
2157	{
2158	return p->sched_task_group;
2159	}
2160
2161	/ Change a task's cfs_rq and parent entity if it moves across CPUs/groups /
2162	static inline void set_task_rq(struct task_struct p, unsigned* int cpu)
2163	{
2164	#if defined(CONFIG_FAIR_GROUP_SCHED) \|\| defined(CONFIG_RT_GROUP_SCHED)
2165	struct task_group *tg = task_group(p);
2166	#endif
2167
2168	#ifdef CONFIG_FAIR_GROUP_SCHED
2169	set_task_rq_fair(se: &p->se, prev: p->se.cfs_rq, next: tg->cfs_rq[cpu]);
2170	p->se.cfs_rq = tg->cfs_rq[cpu];
2171	p->se.parent = tg->se[cpu];
2172	p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + `1` : `0`;
2173	#endif
2174
2175	#ifdef CONFIG_RT_GROUP_SCHED
2176	/*
2177	* p->rt.rt_rq is NULL initially and it is easier to assign
2178	* root_task_group's rt_rq than switching in rt_rq_of_se()
2179	* Clobbers tg(!)
2180	*/
2181	if (!rt_group_sched_enabled())
2182	tg = &root_task_group;
2183	p->rt.rt_rq = tg->rt_rq[cpu];
2184	p->rt.parent = tg->rt_se[cpu];
2185	#endif /* CONFIG_RT_GROUP_SCHED */
2186	}
2187
2188	#else /* !CONFIG_CGROUP_SCHED: */
2189
2190	static inline void set_task_rq(struct task_struct p, unsigned* int cpu) { }
2191
2192	static inline struct task_group task_group(struct* task_struct *p)
2193	{
2194	return NULL;
2195	}
2196
2197	#endif /* !CONFIG_CGROUP_SCHED */
2198
2199	static inline void __set_task_cpu(struct task_struct p, unsigned* int cpu)
2200	{
2201	set_task_rq(p, cpu);
2202	#ifdef CONFIG_SMP
2203	/*
2204	* After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
2205	* successfully executed on another CPU. We must ensure that updates of
2206	* per-task data have been completed by this moment.
2207	*/
2208	smp_wmb();
2209	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
2210	p->wake_cpu = cpu;
2211	#endif /* CONFIG_SMP */
2212	}
2213
2214	/*
2215	* Tunables:
2216	*/
2217
2218	#define SCHED_FEAT(name, enabled) \
2219	__SCHED_FEAT_##name ,
2220
2221	enum {
2222	#include "features.h"
2223	__SCHED_FEAT_NR,
2224	};
2225
2226	#undef SCHED_FEAT
2227
2228	/*
2229	* To support run-time toggling of sched features, all the translation units
2230	* (but core.c) reference the sysctl_sched_features defined in core.c.
2231	*/
2232	extern __read_mostly unsigned int sysctl_sched_features;
2233
2234	#ifdef CONFIG_JUMP_LABEL
2235
2236	#define SCHED_FEAT(name, enabled) \
2237	static __always_inline bool static_branch_##name(struct static_key *key) \
2238	{ \
2239	return static_key_##enabled(key); \
2240	}
2241
2242	#include "features.h"
2243	#undef SCHED_FEAT
2244
2245	extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
2246	#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
2247
2248	#else /* !CONFIG_JUMP_LABEL: */
2249
2250	#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
2251
2252	#endif /* !CONFIG_JUMP_LABEL */
2253
2254	extern struct static_key_false sched_numa_balancing;
2255	extern struct static_key_false sched_schedstats;
2256
2257	static inline u64 global_rt_period(void)
2258	{
2259	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
2260	}
2261
2262	static inline u64 global_rt_runtime(void)
2263	{
2264	if (sysctl_sched_rt_runtime < `0`)
2265	return RUNTIME_INF;
2266
2267	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
2268	}
2269
2270	/*
2271	* Is p the current execution context?
2272	*/
2273	static inline int task_current(struct rq rq, struct* task_struct *p)
2274	{
2275	return rq->curr == p;
2276	}
2277
2278	/*
2279	* Is p the current scheduling context?
2280	*
2281	* Note that it might be the current execution context at the same time if
2282	* rq->curr == rq->donor == p.
2283	*/
2284	static inline int task_current_donor(struct rq rq, struct* task_struct *p)
2285	{
2286	return rq->donor == p;
2287	}
2288
2289	static inline bool task_is_blocked(struct task_struct *p)
2290	{
2291	if (!sched_proxy_exec())
2292	return false;
2293
2294	return !!p->blocked_on;
2295	}
2296
2297	static inline int task_on_cpu(struct rq rq, struct* task_struct *p)
2298	{
2299	return p->on_cpu;
2300	}
2301
2302	static inline int task_on_rq_queued(struct task_struct *p)
2303	{
2304	return READ_ONCE(p->on_rq) == TASK_ON_RQ_QUEUED;
2305	}
2306
2307	static inline int task_on_rq_migrating(struct task_struct *p)
2308	{
2309	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
2310	}
2311
2312	/ Wake flags. The first three directly map to some SD flag value /
2313	#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
2314	#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
2315	#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */
2316
2317	#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
2318	#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
2319	#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
2320	#define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */
2321
2322	static_assert(WF_EXEC == SD_BALANCE_EXEC);
2323	static_assert(WF_FORK == SD_BALANCE_FORK);
2324	static_assert(WF_TTWU == SD_BALANCE_WAKE);
2325
2326	/*
2327	* To aid in avoiding the subversion of "niceness" due to uneven distribution
2328	* of tasks with abnormal "nice" values across CPUs the contribution that
2329	* each task makes to its run queue's load is weighted according to its
2330	* scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
2331	* scaled version of the new time slice allocation that they receive on time
2332	* slice expiry etc.
2333	*/
2334
2335	#define WEIGHT_IDLEPRIO 3
2336	#define WMULT_IDLEPRIO 1431655765
2337
2338	extern const int sched_prio_to_weight[`40`];
2339	extern const u32 sched_prio_to_wmult[`40`];
2340
2341	/*
2342	* {de,en}queue flags:
2343	*
2344	* DEQUEUE_SLEEP - task is no longer runnable
2345	* ENQUEUE_WAKEUP - task just became runnable
2346	*
2347	* SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
2348	* are in a known state which allows modification. Such pairs
2349	* should preserve as much state as possible.
2350	*
2351	* MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
2352	* in the runqueue.
2353	*
2354	* NOCLOCK - skip the update_rq_clock() (avoids double updates)
2355	*
2356	* MIGRATION - p->on_rq == TASK_ON_RQ_MIGRATING (used for DEADLINE)
2357	*
2358	* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
2359	* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
2360	* ENQUEUE_MIGRATED - the task was migrated during wakeup
2361	* ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
2362	*
2363	*/
2364
2365	#define DEQUEUE_SLEEP 0x01 /* Matches ENQUEUE_WAKEUP */
2366	#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
2367	#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
2368	#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
2369	#define DEQUEUE_SPECIAL 0x10
2370	#define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */
2371	#define DEQUEUE_DELAYED 0x200 /* Matches ENQUEUE_DELAYED */
2372	#define DEQUEUE_THROTTLE 0x800
2373
2374	#define ENQUEUE_WAKEUP 0x01
2375	#define ENQUEUE_RESTORE 0x02
2376	#define ENQUEUE_MOVE 0x04
2377	#define ENQUEUE_NOCLOCK 0x08
2378
2379	#define ENQUEUE_HEAD 0x10
2380	#define ENQUEUE_REPLENISH 0x20
2381	#define ENQUEUE_MIGRATED 0x40
2382	#define ENQUEUE_INITIAL 0x80
2383	#define ENQUEUE_MIGRATING 0x100
2384	#define ENQUEUE_DELAYED 0x200
2385	#define ENQUEUE_RQ_SELECTED 0x400
2386
2387	#define RETRY_TASK ((void *)-1UL)
2388
2389	struct affinity_context {
2390	const struct cpumask *new_mask;
2391	struct cpumask *user_mask;
2392	unsigned int flags;
2393	};
2394
2395	extern s64 update_curr_common(struct rq *rq);
2396
2397	struct sched_class {
2398
2399	#ifdef CONFIG_UCLAMP_TASK
2400	int uclamp_enabled;
2401	#endif
2402
2403	void (enqueue_task) (struct* rq rq, struct* task_struct p, int* flags);
2404	bool (dequeue_task) (struct* rq rq, struct* task_struct p, int* flags);
2405	void (yield_task) (struct* rq *rq);
2406	bool (yield_to_task)(struct* rq rq, struct* task_struct *p);
2407
2408	void (wakeup_preempt)(struct* rq rq, struct* task_struct p, int* flags);
2409
2410	int (balance)(struct* rq rq, struct* task_struct prev, struct* rq_flags *rf);
2411	struct task_struct (pick_task)(struct rq *rq);
2412	/*
2413	* Optional! When implemented pick_next_task() should be equivalent to:
2414	*
2415	* next = pick_task();
2416	* if (next) {
2417	* put_prev_task(prev);
2418	* set_next_task_first(next);
2419	* }
2420	*/
2421	struct task_struct (pick_next_task)(struct rq rq, struct* task_struct *prev);
2422
2423	void (put_prev_task)(struct* rq rq, struct* task_struct p, struct* task_struct *next);
2424	void (set_next_task)(struct* rq rq, struct* task_struct *p, bool first);
2425
2426	int (select_task_rq)(struct* task_struct p, int* task_cpu, int flags);
2427
2428	void (migrate_task_rq)(struct* task_struct p, int* new_cpu);
2429
2430	void (task_woken)(struct* rq this_rq, struct* task_struct *task);
2431
2432	void (set_cpus_allowed)(struct* task_struct p, struct* affinity_context *ctx);
2433
2434	void (rq_online)(struct* rq *rq);
2435	void (rq_offline)(struct* rq *rq);
2436
2437	struct rq (find_lock_rq)(struct task_struct p, struct* rq *rq);
2438
2439	void (task_tick)(struct* rq rq, struct* task_struct p, int* queued);
2440	void (task_fork)(struct* task_struct *p);
2441	void (task_dead)(struct* task_struct *p);
2442
2443	/*
2444	* The switched_from() call is allowed to drop rq->lock, therefore we
2445	* cannot assume the switched_from/switched_to pair is serialized by
2446	* rq->lock. They are however serialized by p->pi_lock.
2447	*/
2448	void (switching_to) (struct* rq this_rq, struct* task_struct *task);
2449	void (switched_from)(struct* rq this_rq, struct* task_struct *task);
2450	void (switched_to) (struct* rq this_rq, struct* task_struct *task);
2451	void (reweight_task)(struct* rq this_rq, struct* task_struct *task,
2452	const struct load_weight *lw);
2453	void (prio_changed) (struct* rq this_rq, struct* task_struct *task,
2454	int oldprio);
2455
2456	unsigned int (get_rr_interval)(struct* rq *rq,
2457	struct task_struct *task);
2458
2459	void (update_curr)(struct* rq *rq);
2460
2461	#ifdef CONFIG_FAIR_GROUP_SCHED
2462	void (task_change_group)(struct* task_struct *p);
2463	#endif
2464
2465	#ifdef CONFIG_SCHED_CORE
2466	int (task_is_throttled)(struct* task_struct p, int* cpu);
2467	#endif
2468	};
2469
2470	static inline void put_prev_task(struct rq rq, struct* task_struct *prev)
2471	{
2472	WARN_ON_ONCE(rq->donor != prev);
2473	prev->sched_class->put_prev_task(rq, prev, NULL);
2474	}
2475
2476	static inline void set_next_task(struct rq rq, struct* task_struct *next)
2477	{
2478	next->sched_class->set_next_task(rq, next, false);
2479	}
2480
2481	static inline void
2482	__put_prev_set_next_dl_server(struct rq *rq,
2483	struct task_struct *prev,
2484	struct task_struct *next)
2485	{
2486	prev->dl_server = NULL;
2487	next->dl_server = rq->dl_server;
2488	rq->dl_server = NULL;
2489	}
2490
2491	static inline void put_prev_set_next_task(struct rq *rq,
2492	struct task_struct *prev,
2493	struct task_struct *next)
2494	{
2495	WARN_ON_ONCE(rq->donor != prev);
2496
2497	__put_prev_set_next_dl_server(rq, prev, next);
2498
2499	if (next == prev)
2500	return;
2501
2502	prev->sched_class->put_prev_task(rq, prev, next);
2503	next->sched_class->set_next_task(rq, next, true);
2504	}
2505
2506	/*
2507	* Helper to define a sched_class instance; each one is placed in a separate
2508	* section which is ordered by the linker script:
2509	*
2510	* include/asm-generic/vmlinux.lds.h
2511	*
2512	* CAREFUL they are laid out in REVERSE order!!!
2513	*
2514	* Also enforce alignment on the instance, not the type, to guarantee layout.
2515	*/
2516	#define DEFINE_SCHED_CLASS(name) \
2517	const struct sched_class name##_sched_class \
2518	__aligned(__alignof__(struct sched_class)) \
2519	__section("__" #name "_sched_class")
2520
2521	/ Defined in include/asm-generic/vmlinux.lds.h /
2522	extern struct sched_class __sched_class_highest[];
2523	extern struct sched_class __sched_class_lowest[];
2524
2525	extern const struct sched_class stop_sched_class;
2526	extern const struct sched_class dl_sched_class;
2527	extern const struct sched_class rt_sched_class;
2528	extern const struct sched_class fair_sched_class;
2529	extern const struct sched_class idle_sched_class;
2530
2531	/*
2532	* Iterate only active classes. SCX can take over all fair tasks or be
2533	* completely disabled. If the former, skip fair. If the latter, skip SCX.
2534	*/
2535	static inline const struct sched_class next_active_class(const* struct sched_class *class)
2536	{
2537	class++;
2538	#ifdef CONFIG_SCHED_CLASS_EXT
2539	if (scx_switched_all() && class == &fair_sched_class)
2540	class++;
2541	if (!scx_enabled() && class == &ext_sched_class)
2542	class++;
2543	#endif
2544	return class;
2545	}
2546
2547	#define for_class_range(class, _from, _to) \
2548	for (class = (_from); class < (_to); class++)
2549
2550	#define for_each_class(class) \
2551	for_class_range(class, __sched_class_highest, __sched_class_lowest)
2552
2553	#define for_active_class_range(class, _from, _to) \
2554	for (class = (_from); class != (_to); class = next_active_class(class))
2555
2556	#define for_each_active_class(class) \
2557	for_active_class_range(class, __sched_class_highest, __sched_class_lowest)
2558
2559	#define sched_class_above(_a, _b) ((_a) < (_b))
2560
2561	static inline bool sched_stop_runnable(struct rq *rq)
2562	{
2563	return rq->stop && task_on_rq_queued(p: rq->stop);
2564	}
2565
2566	static inline bool sched_dl_runnable(struct rq *rq)
2567	{
2568	return rq->dl.dl_nr_running > `0`;
2569	}
2570
2571	static inline bool sched_rt_runnable(struct rq *rq)
2572	{
2573	return rq->rt.rt_queued > `0`;
2574	}
2575
2576	static inline bool sched_fair_runnable(struct rq *rq)
2577	{
2578	return rq->cfs.nr_queued > `0`;
2579	}
2580
2581	extern struct task_struct pick_next_task_fair(struct* rq rq, struct* task_struct prev, struct* rq_flags *rf);
2582	extern struct task_struct pick_task_idle(struct* rq *rq);
2583
2584	#define SCA_CHECK 0x01
2585	#define SCA_MIGRATE_DISABLE 0x02
2586	#define SCA_MIGRATE_ENABLE 0x04
2587	#define SCA_USER 0x08
2588
2589	extern void update_group_capacity(struct sched_domain sd, int* cpu);
2590
2591	extern void sched_balance_trigger(struct rq *rq);
2592
2593	extern int __set_cpus_allowed_ptr(struct task_struct p, struct* affinity_context *ctx);
2594	extern void set_cpus_allowed_common(struct task_struct p, struct* affinity_context *ctx);
2595
2596	static inline bool task_allowed_on_cpu(struct task_struct p, int* cpu)
2597	{
2598	/ When not in the task's cpumask, no point in looking further. /
2599	if (!cpumask_test_cpu(cpu, cpumask: p->cpus_ptr))
2600	return false;
2601
2602	/ Can @cpu run a user thread? /
2603	if (!(p->flags & PF_KTHREAD) && !task_cpu_possible(cpu, p))
2604	return false;
2605
2606	return true;
2607	}
2608
2609	static inline cpumask_t alloc_user_cpus_ptr(int* node)
2610	{
2611	/*
2612	* See do_set_cpus_allowed() above for the rcu_head usage.
2613	*/
2614	int size = max_t(int, cpumask_size(), sizeof(struct rcu_head));
2615
2616	return kmalloc_node(size, GFP_KERNEL, node);
2617	}
2618
2619	static inline struct task_struct get_push_task(struct* rq *rq)
2620	{
2621	struct task_struct *p = rq->donor;
2622
2623	lockdep_assert_rq_held(rq);
2624
2625	if (rq->push_busy)
2626	return NULL;
2627
2628	if (p->nr_cpus_allowed == `1`)
2629	return NULL;
2630
2631	if (p->migration_disabled)
2632	return NULL;
2633
2634	rq->push_busy = true;
2635	return get_task_struct(t: p);
2636	}
2637
2638	extern int push_cpu_stop(void *arg);
2639
2640	#ifdef CONFIG_CPU_IDLE
2641
2642	static inline void idle_set_state(struct rq *rq,
2643	struct cpuidle_state *idle_state)
2644	{
2645	rq->idle_state = idle_state;
2646	}
2647
2648	static inline struct cpuidle_state idle_get_state(struct* rq *rq)
2649	{
2650	WARN_ON_ONCE(!rcu_read_lock_held());
2651
2652	return rq->idle_state;
2653	}
2654
2655	#else /* !CONFIG_CPU_IDLE: */
2656
2657	static inline void idle_set_state(struct rq *rq,
2658	struct cpuidle_state *idle_state)
2659	{
2660	}
2661
2662	static inline struct cpuidle_state idle_get_state(struct* rq *rq)
2663	{
2664	return NULL;
2665	}
2666
2667	#endif /* !CONFIG_CPU_IDLE */
2668
2669	extern void schedule_idle(void);
2670	asmlinkage void schedule_user(void);
2671
2672	extern void sysrq_sched_debug_show(void);
2673	extern void sched_init_granularity(void);
2674	extern void update_max_interval(void);
2675
2676	extern void init_sched_dl_class(void);
2677	extern void init_sched_rt_class(void);
2678	extern void init_sched_fair_class(void);
2679
2680	extern void resched_curr(struct rq *rq);
2681	extern void resched_curr_lazy(struct rq *rq);
2682	extern void resched_cpu(int cpu);
2683
2684	extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
2685	extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
2686
2687	extern void init_dl_entity(struct sched_dl_entity *dl_se);
2688
2689	extern void init_cfs_throttle_work(struct task_struct *p);
2690
2691	#define BW_SHIFT 20
2692	#define BW_UNIT (1 << BW_SHIFT)
2693	#define RATIO_SHIFT 8
2694	#define MAX_BW_BITS (64 - BW_SHIFT)
2695	#define MAX_BW ((1ULL << MAX_BW_BITS) - 1)
2696
2697	extern unsigned long to_ratio(u64 period, u64 runtime);
2698
2699	extern void init_entity_runnable_average(struct sched_entity *se);
2700	extern void post_init_entity_util_avg(struct task_struct *p);
2701
2702	#ifdef CONFIG_NO_HZ_FULL
2703	extern bool sched_can_stop_tick(struct rq *rq);
2704	extern int __init sched_tick_offload_init(void);
2705
2706	/*
2707	* Tick may be needed by tasks in the runqueue depending on their policy and
2708	* requirements. If tick is needed, lets send the target an IPI to kick it out of
2709	* nohz mode if necessary.
2710	*/
2711	static inline void sched_update_tick_dependency(struct rq *rq)
2712	{
2713	int cpu = cpu_of(rq);
2714
2715	if (!tick_nohz_full_cpu(cpu))
2716	return;
2717
2718	if (sched_can_stop_tick(rq))
2719	tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
2720	else
2721	tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
2722	}
2723	#else /* !CONFIG_NO_HZ_FULL: */
2724	static inline int sched_tick_offload_init(void) { return `0`; }
2725	static inline void sched_update_tick_dependency(struct rq *rq) { }
2726	#endif /* !CONFIG_NO_HZ_FULL */
2727
2728	static inline void add_nr_running(struct rq rq, unsigned* count)
2729	{
2730	unsigned prev_nr = rq->nr_running;
2731
2732	rq->nr_running = prev_nr + count;
2733	if (trace_sched_update_nr_running_tp_enabled()) {
2734	call_trace_sched_update_nr_running(rq, count);
2735	}
2736
2737	if (prev_nr < `2` && rq->nr_running >= `2`)
2738	set_rd_overloaded(rd: rq->rd, status: `1`);
2739
2740	sched_update_tick_dependency(rq);
2741	}
2742
2743	static inline void sub_nr_running(struct rq rq, unsigned* count)
2744	{
2745	rq->nr_running -= count;
2746	if (trace_sched_update_nr_running_tp_enabled()) {
2747	call_trace_sched_update_nr_running(rq, count: -count);
2748	}
2749
2750	/ Check if we still need preemption /
2751	sched_update_tick_dependency(rq);
2752	}
2753
2754	static inline void __block_task(struct rq rq, struct* task_struct *p)
2755	{
2756	if (p->sched_contributes_to_load)
2757	rq->nr_uninterruptible++;
2758
2759	if (p->in_iowait) {
2760	atomic_inc(v: &rq->nr_iowait);
2761	delayacct_blkio_start();
2762	}
2763
2764	ASSERT_EXCLUSIVE_WRITER(p->on_rq);
2765
2766	/*
2767	* The moment this write goes through, ttwu() can swoop in and migrate
2768	* this task, rendering our rq->__lock ineffective.
2769	*
2770	* __schedule() try_to_wake_up()
2771	* LOCK rq->__lock LOCK p->pi_lock
2772	* pick_next_task()
2773	* pick_next_task_fair()
2774	* pick_next_entity()
2775	* dequeue_entities()
2776	* __block_task()
2777	* RELEASE p->on_rq = 0 if (p->on_rq && ...)
2778	* break;
2779	*
2780	* ACQUIRE (after ctrl-dep)
2781	*
2782	* cpu = select_task_rq();
2783	* set_task_cpu(p, cpu);
2784	* ttwu_queue()
2785	* ttwu_do_activate()
2786	* LOCK rq->__lock
2787	* activate_task()
2788	* STORE p->on_rq = 1
2789	* UNLOCK rq->__lock
2790	*
2791	* Callers must ensure to not reference @p after this -- we no longer
2792	* own it.
2793	*/
2794	smp_store_release(&p->on_rq, `0`);
2795	}
2796
2797	extern void activate_task(struct rq rq, struct* task_struct p, int* flags);
2798	extern void deactivate_task(struct rq rq, struct* task_struct p, int* flags);
2799
2800	extern void wakeup_preempt(struct rq rq, struct* task_struct p, int* flags);
2801
2802	#ifdef CONFIG_PREEMPT_RT
2803	# define SCHED_NR_MIGRATE_BREAK 8
2804	#else
2805	# define SCHED_NR_MIGRATE_BREAK 32
2806	#endif
2807
2808	extern __read_mostly unsigned int sysctl_sched_nr_migrate;
2809	extern __read_mostly unsigned int sysctl_sched_migration_cost;
2810
2811	extern unsigned int sysctl_sched_base_slice;
2812
2813	extern int sysctl_resched_latency_warn_ms;
2814	extern int sysctl_resched_latency_warn_once;
2815
2816	extern unsigned int sysctl_sched_tunable_scaling;
2817
2818	extern unsigned int sysctl_numa_balancing_scan_delay;
2819	extern unsigned int sysctl_numa_balancing_scan_period_min;
2820	extern unsigned int sysctl_numa_balancing_scan_period_max;
2821	extern unsigned int sysctl_numa_balancing_scan_size;
2822	extern unsigned int sysctl_numa_balancing_hot_threshold;
2823
2824	#ifdef CONFIG_SCHED_HRTICK
2825
2826	/*
2827	* Use hrtick when:
2828	* - enabled by features
2829	* - hrtimer is actually high res
2830	*/
2831	static inline int hrtick_enabled(struct rq *rq)
2832	{
2833	if (!cpu_active(cpu: cpu_of(rq)))
2834	return `0`;
2835	return hrtimer_is_hres_active(timer: &rq->hrtick_timer);
2836	}
2837
2838	static inline int hrtick_enabled_fair(struct rq *rq)
2839	{
2840	if (!sched_feat(HRTICK))
2841	return `0`;
2842	return hrtick_enabled(rq);
2843	}
2844
2845	static inline int hrtick_enabled_dl(struct rq *rq)
2846	{
2847	if (!sched_feat(HRTICK_DL))
2848	return `0`;
2849	return hrtick_enabled(rq);
2850	}
2851
2852	extern void hrtick_start(struct rq *rq, u64 delay);
2853
2854	#else /* !CONFIG_SCHED_HRTICK: */
2855
2856	static inline int hrtick_enabled_fair(struct rq *rq)
2857	{
2858	return `0`;
2859	}
2860
2861	static inline int hrtick_enabled_dl(struct rq *rq)
2862	{
2863	return `0`;
2864	}
2865
2866	static inline int hrtick_enabled(struct rq *rq)
2867	{
2868	return `0`;
2869	}
2870
2871	#endif /* !CONFIG_SCHED_HRTICK */
2872
2873	#ifndef arch_scale_freq_tick
2874	static __always_inline void arch_scale_freq_tick(void) { }
2875	#endif
2876
2877	#ifndef arch_scale_freq_capacity
2878	/**
2879	* arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
2880	* @cpu: the CPU in question.
2881	*
2882	* Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
2883	*
2884	* f_curr
2885	* ------ * SCHED_CAPACITY_SCALE
2886	* f_max
2887	*/
2888	static __always_inline
2889	unsigned long arch_scale_freq_capacity(int cpu)
2890	{
2891	return SCHED_CAPACITY_SCALE;
2892	}
2893	#endif
2894
2895	/*
2896	* In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to
2897	* acquire rq lock instead of rq_lock(). So at the end of these two functions
2898	* we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of
2899	* rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
2900	*/
2901	static inline void double_rq_clock_clear_update(struct rq rq1, struct* rq *rq2)
2902	{
2903	rq1->clock_update_flags &= (RQCF_REQ_SKIP\|RQCF_ACT_SKIP);
2904	rq2->clock_update_flags &= (RQCF_REQ_SKIP\|RQCF_ACT_SKIP);
2905	}
2906
2907	#define DEFINE_LOCK_GUARD_2(name, type, _lock, _unlock, ...) \
2908	__DEFINE_UNLOCK_GUARD(name, type, _unlock, type *lock2; __VA_ARGS__) \
2909	static inline class_##name##_t class_##name##_constructor(type lock, type lock2) \
2910	{ class_##name##_t _t = { .lock = lock, .lock2 = lock2 }, *_T = &_t; \
2911	_lock; return _t; }
2912
2913	static inline bool rq_order_less(struct rq rq1, struct* rq *rq2)
2914	{
2915	#ifdef CONFIG_SCHED_CORE
2916	/*
2917	* In order to not have {0,2},{1,3} turn into into an AB-BA,
2918	* order by core-id first and cpu-id second.
2919	*
2920	* Notably:
2921	*
2922	* double_rq_lock(0,3); will take core-0, core-1 lock
2923	* double_rq_lock(1,2); will take core-1, core-0 lock
2924	*
2925	* when only cpu-id is considered.
2926	*/
2927	if (rq1->core->cpu < rq2->core->cpu)
2928	return true;
2929	if (rq1->core->cpu > rq2->core->cpu)
2930	return false;
2931
2932	/*
2933	* __sched_core_flip() relies on SMT having cpu-id lock order.
2934	*/
2935	#endif /* CONFIG_SCHED_CORE */
2936	return rq1->cpu < rq2->cpu;
2937	}
2938
2939	extern void double_rq_lock(struct rq rq1, struct* rq *rq2);
2940
2941	#ifdef CONFIG_PREEMPTION
2942
2943	/*
2944	* fair double_lock_balance: Safely acquires both rq->locks in a fair
2945	* way at the expense of forcing extra atomic operations in all
2946	* invocations. This assures that the double_lock is acquired using the
2947	* same underlying policy as the spinlock_t on this architecture, which
2948	* reduces latency compared to the unfair variant below. However, it
2949	* also adds more overhead and therefore may reduce throughput.
2950	*/
2951	static inline int _double_lock_balance(struct rq this_rq, struct* rq *busiest)
2952	__releases(this_rq->lock)
2953	__acquires(busiest->lock)
2954	__acquires(this_rq->lock)
2955	{
2956	raw_spin_rq_unlock(rq: this_rq);
2957	double_rq_lock(rq1: this_rq, rq2: busiest);
2958
2959	return `1`;
2960	}
2961
2962	#else /* !CONFIG_PREEMPTION: */
2963	/*
2964	* Unfair double_lock_balance: Optimizes throughput at the expense of
2965	* latency by eliminating extra atomic operations when the locks are
2966	* already in proper order on entry. This favors lower CPU-ids and will
2967	* grant the double lock to lower CPUs over higher ids under contention,
2968	* regardless of entry order into the function.
2969	*/
2970	static inline int _double_lock_balance(struct rq this_rq, struct* rq *busiest)
2971	__releases(this_rq->lock)
2972	__acquires(busiest->lock)
2973	__acquires(this_rq->lock)
2974	{
2975	if (__rq_lockp(this_rq) == __rq_lockp(busiest) \|\|
2976	likely(raw_spin_rq_trylock(busiest))) {
2977	double_rq_clock_clear_update(this_rq, busiest);
2978	return `0`;
2979	}
2980
2981	if (rq_order_less(this_rq, busiest)) {
2982	raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
2983	double_rq_clock_clear_update(this_rq, busiest);
2984	return `0`;
2985	}
2986
2987	raw_spin_rq_unlock(this_rq);
2988	double_rq_lock(this_rq, busiest);
2989
2990	return `1`;
2991	}
2992
2993	#endif /* !CONFIG_PREEMPTION */
2994
2995	/*
2996	* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
2997	*/
2998	static inline int double_lock_balance(struct rq this_rq, struct* rq *busiest)
2999	{
3000	lockdep_assert_irqs_disabled();
3001
3002	return _double_lock_balance(this_rq, busiest);
3003	}
3004
3005	static inline void double_unlock_balance(struct rq this_rq, struct* rq *busiest)
3006	__releases(busiest->lock)
3007	{
3008	if (__rq_lockp(rq: this_rq) != __rq_lockp(rq: busiest))
3009	raw_spin_rq_unlock(rq: busiest);
3010	lock_set_subclass(&__rq_lockp(this_rq)->dep_map, `0`, _RET_IP_);
3011	}
3012
3013	static inline void double_lock(spinlock_t l1, spinlock_t l2)
3014	{
3015	if (l1 > l2)
3016	swap(l1, l2);
3017
3018	spin_lock(lock: l1);
3019	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
3020	}
3021
3022	static inline void double_lock_irq(spinlock_t l1, spinlock_t l2)
3023	{
3024	if (l1 > l2)
3025	swap(l1, l2);
3026
3027	spin_lock_irq(lock: l1);
3028	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
3029	}
3030
3031	static inline void double_raw_lock(raw_spinlock_t l1, raw_spinlock_t l2)
3032	{
3033	if (l1 > l2)
3034	swap(l1, l2);
3035
3036	raw_spin_lock(l1);
3037	raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
3038	}
3039
3040	static inline void double_raw_unlock(raw_spinlock_t l1, raw_spinlock_t l2)
3041	{
3042	raw_spin_unlock(l1);
3043	raw_spin_unlock(l2);
3044	}
3045
3046	DEFINE_LOCK_GUARD_2(double_raw_spinlock, raw_spinlock_t,
3047	double_raw_lock(_T->lock, _T->lock2),
3048	double_raw_unlock(_T->lock, _T->lock2))
3049
3050	/*
3051	* double_rq_unlock - safely unlock two runqueues
3052	*
3053	* Note this does not restore interrupts like task_rq_unlock,
3054	* you need to do so manually after calling.
3055	*/
3056	static inline void double_rq_unlock(struct rq rq1, struct* rq *rq2)
3057	__releases(rq1->lock)
3058	__releases(rq2->lock)
3059	{
3060	if (__rq_lockp(rq: rq1) != __rq_lockp(rq: rq2))
3061	raw_spin_rq_unlock(rq: rq2);
3062	else
3063	__release(rq2->lock);
3064	raw_spin_rq_unlock(rq: rq1);
3065	}
3066
3067	extern void set_rq_online (struct rq *rq);
3068	extern void set_rq_offline(struct rq *rq);
3069
3070	extern bool sched_smp_initialized;
3071
3072	DEFINE_LOCK_GUARD_2(double_rq_lock, struct rq,
3073	double_rq_lock(_T->lock, _T->lock2),
3074	double_rq_unlock(_T->lock, _T->lock2))
3075
3076	extern struct sched_entity __pick_root_entity(struct* cfs_rq *cfs_rq);
3077	extern struct sched_entity __pick_first_entity(struct* cfs_rq *cfs_rq);
3078	extern struct sched_entity __pick_last_entity(struct* cfs_rq *cfs_rq);
3079
3080	extern bool sched_debug_verbose;
3081
3082	extern void print_cfs_stats(struct seq_file m, int* cpu);
3083	extern void print_rt_stats(struct seq_file m, int* cpu);
3084	extern void print_dl_stats(struct seq_file m, int* cpu);
3085	extern void print_cfs_rq(struct seq_file m, int* cpu, struct cfs_rq *cfs_rq);
3086	extern void print_rt_rq(struct seq_file m, int* cpu, struct rt_rq *rt_rq);
3087	extern void print_dl_rq(struct seq_file m, int* cpu, struct dl_rq *dl_rq);
3088
3089	extern void resched_latency_warn(int cpu, u64 latency);
3090
3091	#ifdef CONFIG_NUMA_BALANCING
3092	extern void show_numa_stats(struct task_struct p, struct* seq_file *m);
3093	extern void
3094	print_numa_stats(struct seq_file m, int* node, unsigned long tsf,
3095	unsigned long tpf, unsigned long gsf, unsigned long gpf);
3096	#endif /* CONFIG_NUMA_BALANCING */
3097
3098	extern void init_cfs_rq(struct cfs_rq *cfs_rq);
3099	extern void init_rt_rq(struct rt_rq *rt_rq);
3100	extern void init_dl_rq(struct dl_rq *dl_rq);
3101
3102	extern void cfs_bandwidth_usage_inc(void);
3103	extern void cfs_bandwidth_usage_dec(void);
3104
3105	#ifdef CONFIG_NO_HZ_COMMON
3106
3107	#define NOHZ_BALANCE_KICK_BIT 0
3108	#define NOHZ_STATS_KICK_BIT 1
3109	#define NOHZ_NEWILB_KICK_BIT 2
3110	#define NOHZ_NEXT_KICK_BIT 3
3111
3112	/ Run sched_balance_domains() /
3113	#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
3114	/ Update blocked load /
3115	#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
3116	/ Update blocked load when entering idle /
3117	#define NOHZ_NEWILB_KICK BIT(NOHZ_NEWILB_KICK_BIT)
3118	/ Update nohz.next_balance /
3119	#define NOHZ_NEXT_KICK BIT(NOHZ_NEXT_KICK_BIT)
3120
3121	#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK \| NOHZ_STATS_KICK \| NOHZ_NEXT_KICK)
3122
3123	#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
3124
3125	extern void nohz_balance_exit_idle(struct rq *rq);
3126	#else /* !CONFIG_NO_HZ_COMMON: */
3127	static inline void nohz_balance_exit_idle(struct rq *rq) { }
3128	#endif /* !CONFIG_NO_HZ_COMMON */
3129
3130	#ifdef CONFIG_NO_HZ_COMMON
3131	extern void nohz_run_idle_balance(int cpu);
3132	#else
3133	static inline void nohz_run_idle_balance(int cpu) { }
3134	#endif
3135
3136	#include "stats.h"
3137
3138	#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
3139
3140	extern void __sched_core_account_forceidle(struct rq *rq);
3141
3142	static inline void sched_core_account_forceidle(struct rq *rq)
3143	{
3144	if (schedstat_enabled())
3145	__sched_core_account_forceidle(rq);
3146	}
3147
3148	extern void __sched_core_tick(struct rq *rq);
3149
3150	static inline void sched_core_tick(struct rq *rq)
3151	{
3152	if (sched_core_enabled(rq) && schedstat_enabled())
3153	__sched_core_tick(rq);
3154	}
3155
3156	#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */
3157
3158	static inline void sched_core_account_forceidle(struct rq *rq) { }
3159
3160	static inline void sched_core_tick(struct rq *rq) { }
3161
3162	#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */
3163
3164	#ifdef CONFIG_IRQ_TIME_ACCOUNTING
3165
3166	struct irqtime {
3167	u64 total;
3168	u64 tick_delta;
3169	u64 irq_start_time;
3170	struct u64_stats_sync sync;
3171	};
3172
3173	DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
3174	extern int sched_clock_irqtime;
3175
3176	static inline int irqtime_enabled(void)
3177	{
3178	return sched_clock_irqtime;
3179	}
3180
3181	/*
3182	* Returns the irqtime minus the softirq time computed by ksoftirqd.
3183	* Otherwise ksoftirqd's sum_exec_runtime is subtracted its own runtime
3184	* and never move forward.
3185	*/
3186	static inline u64 irq_time_read(int cpu)
3187	{
3188	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
3189	unsigned int seq;
3190	u64 total;
3191
3192	do {
3193	seq = __u64_stats_fetch_begin(&irqtime->sync);
3194	total = irqtime->total;
3195	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
3196
3197	return total;
3198	}
3199
3200	#else /* !CONFIG_IRQ_TIME_ACCOUNTING: */
3201
3202	static inline int irqtime_enabled(void)
3203	{
3204	return `0`;
3205	}
3206
3207	#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
3208
3209	#ifdef CONFIG_CPU_FREQ
3210
3211	DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
3212
3213	/**
3214	* cpufreq_update_util - Take a note about CPU utilization changes.
3215	* @rq: Runqueue to carry out the update for.
3216	* @flags: Update reason flags.
3217	*
3218	* This function is called by the scheduler on the CPU whose utilization is
3219	* being updated.
3220	*
3221	* It can only be called from RCU-sched read-side critical sections.
3222	*
3223	* The way cpufreq is currently arranged requires it to evaluate the CPU
3224	* performance state (frequency/voltage) on a regular basis to prevent it from
3225	* being stuck in a completely inadequate performance level for too long.
3226	* That is not guaranteed to happen if the updates are only triggered from CFS
3227	* and DL, though, because they may not be coming in if only RT tasks are
3228	* active all the time (or there are RT tasks only).
3229	*
3230	* As a workaround for that issue, this function is called periodically by the
3231	* RT sched class to trigger extra cpufreq updates to prevent it from stalling,
3232	* but that really is a band-aid. Going forward it should be replaced with
3233	* solutions targeted more specifically at RT tasks.
3234	*/
3235	static inline void cpufreq_update_util(struct rq rq, unsigned* int flags)
3236	{
3237	struct update_util_data *data;
3238
3239	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
3240	cpu_of(rq)));
3241	if (data)
3242	data->func(data, rq_clock(rq), flags);
3243	}
3244	#else /* !CONFIG_CPU_FREQ: */
3245	static inline void cpufreq_update_util(struct rq rq, unsigned* int flags) { }
3246	#endif /* !CONFIG_CPU_FREQ */
3247
3248	#ifdef arch_scale_freq_capacity
3249	# ifndef arch_scale_freq_invariant
3250	# define arch_scale_freq_invariant() true
3251	# endif
3252	#else
3253	# define arch_scale_freq_invariant() false
3254	#endif
3255
3256	unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
3257	unsigned long *min,
3258	unsigned long *max);
3259
3260	unsigned long sugov_effective_cpu_perf(int cpu, unsigned long actual,
3261	unsigned long min,
3262	unsigned long max);
3263
3264
3265	/*
3266	* Verify the fitness of task @p to run on @cpu taking into account the
3267	* CPU original capacity and the runtime/deadline ratio of the task.
3268	*
3269	* The function will return true if the original capacity of @cpu is
3270	* greater than or equal to task's deadline density right shifted by
3271	* (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
3272	*/
3273	static inline bool dl_task_fits_capacity(struct task_struct p, int* cpu)
3274	{
3275	unsigned long cap = arch_scale_cpu_capacity(cpu);
3276
3277	return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT);
3278	}
3279
3280	static inline unsigned long cpu_bw_dl(struct rq *rq)
3281	{
3282	return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
3283	}
3284
3285	static inline unsigned long cpu_util_dl(struct rq *rq)
3286	{
3287	return READ_ONCE(rq->avg_dl.util_avg);
3288	}
3289
3290
3291	extern unsigned long cpu_util_cfs(int cpu);
3292	extern unsigned long cpu_util_cfs_boost(int cpu);
3293
3294	static inline unsigned long cpu_util_rt(struct rq *rq)
3295	{
3296	return READ_ONCE(rq->avg_rt.util_avg);
3297	}
3298
3299	#ifdef CONFIG_UCLAMP_TASK
3300
3301	unsigned long uclamp_eff_value(struct task_struct p, enum* uclamp_id clamp_id);
3302
3303	/*
3304	* When uclamp is compiled in, the aggregation at rq level is 'turned off'
3305	* by default in the fast path and only gets turned on once userspace performs
3306	* an operation that requires it.
3307	*
3308	* Returns true if userspace opted-in to use uclamp and aggregation at rq level
3309	* hence is active.
3310	*/
3311	static inline bool uclamp_is_used(void)
3312	{
3313	return static_branch_likely(&sched_uclamp_used);
3314	}
3315
3316	/*
3317	* Enabling static branches would get the cpus_read_lock(),
3318	* check whether uclamp_is_used before enable it to avoid always
3319	* calling cpus_read_lock(). Because we never disable this
3320	* static key once enable it.
3321	*/
3322	static inline void sched_uclamp_enable(void)
3323	{
3324	if (!uclamp_is_used())
3325	static_branch_enable(&sched_uclamp_used);
3326	}
3327
3328	static inline unsigned long uclamp_rq_get(struct rq *rq,
3329	enum uclamp_id clamp_id)
3330	{
3331	return READ_ONCE(rq->uclamp[clamp_id].value);
3332	}
3333
3334	static inline void uclamp_rq_set(struct rq rq, enum* uclamp_id clamp_id,
3335	unsigned int value)
3336	{
3337	WRITE_ONCE(rq->uclamp[clamp_id].value, value);
3338	}
3339
3340	static inline bool uclamp_rq_is_idle(struct rq *rq)
3341	{
3342	return rq->uclamp_flags & UCLAMP_FLAG_IDLE;
3343	}
3344
3345	/ Is the rq being capped/throttled by uclamp_max? /
3346	static inline bool uclamp_rq_is_capped(struct rq *rq)
3347	{
3348	unsigned long rq_util;
3349	unsigned long max_util;
3350
3351	if (!uclamp_is_used())
3352	return false;
3353
3354	rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq);
3355	max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
3356
3357	return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util;
3358	}
3359
3360	#define for_each_clamp_id(clamp_id) \
3361	for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++)
3362
3363	extern unsigned int sysctl_sched_uclamp_util_min_rt_default;
3364
3365
3366	static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
3367	{
3368	if (clamp_id == UCLAMP_MIN)
3369	return `0`;
3370	return SCHED_CAPACITY_SCALE;
3371	}
3372
3373	/ Integer rounded range for each bucket /
3374	#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
3375
3376	static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
3377	{
3378	return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - `1`);
3379	}
3380
3381	static inline void
3382	uclamp_se_set(struct uclamp_se uc_se, unsigned* int value, bool user_defined)
3383	{
3384	uc_se->value = value;
3385	uc_se->bucket_id = uclamp_bucket_id(value);
3386	uc_se->user_defined = user_defined;
3387	}
3388
3389	#else /* !CONFIG_UCLAMP_TASK: */
3390
3391	static inline unsigned long
3392	uclamp_eff_value(struct task_struct p, enum* uclamp_id clamp_id)
3393	{
3394	if (clamp_id == UCLAMP_MIN)
3395	return `0`;
3396
3397	return SCHED_CAPACITY_SCALE;
3398	}
3399
3400	static inline bool uclamp_rq_is_capped(struct rq rq) { return* false; }
3401
3402	static inline bool uclamp_is_used(void)
3403	{
3404	return false;
3405	}
3406
3407	static inline void sched_uclamp_enable(void) {}
3408
3409	static inline unsigned long
3410	uclamp_rq_get(struct rq rq, enum* uclamp_id clamp_id)
3411	{
3412	if (clamp_id == UCLAMP_MIN)
3413	return `0`;
3414
3415	return SCHED_CAPACITY_SCALE;
3416	}
3417
3418	static inline void
3419	uclamp_rq_set(struct rq rq, enum* uclamp_id clamp_id, unsigned int value)
3420	{
3421	}
3422
3423	static inline bool uclamp_rq_is_idle(struct rq *rq)
3424	{
3425	return false;
3426	}
3427
3428	#endif /* !CONFIG_UCLAMP_TASK */
3429
3430	#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
3431
3432	static inline unsigned long cpu_util_irq(struct rq *rq)
3433	{
3434	return READ_ONCE(rq->avg_irq.util_avg);
3435	}
3436
3437	static inline
3438	unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
3439	{
3440	util *= (max - irq);
3441	util /= max;
3442
3443	return util;
3444
3445	}
3446
3447	#else /* !CONFIG_HAVE_SCHED_AVG_IRQ: */
3448
3449	static inline unsigned long cpu_util_irq(struct rq *rq)
3450	{
3451	return `0`;
3452	}
3453
3454	static inline
3455	unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
3456	{
3457	return util;
3458	}
3459
3460	#endif /* !CONFIG_HAVE_SCHED_AVG_IRQ */
3461
3462	extern void __setparam_fair(struct task_struct p, const* struct sched_attr *attr);
3463
3464	#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
3465
3466	#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
3467
3468	DECLARE_STATIC_KEY_FALSE(sched_energy_present);
3469
3470	static inline bool sched_energy_enabled(void)
3471	{
3472	return static_branch_unlikely(&sched_energy_present);
3473	}
3474
3475	#else /* !(CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL): */
3476
3477	#define perf_domain_span(pd) NULL
3478
3479	static inline bool sched_energy_enabled(void) { return false; }
3480
3481	#endif /* !(CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
3482
3483	#ifdef CONFIG_MEMBARRIER
3484
3485	/*
3486	* The scheduler provides memory barriers required by membarrier between:
3487	* - prior user-space memory accesses and store to rq->membarrier_state,
3488	* - store to rq->membarrier_state and following user-space memory accesses.
3489	* In the same way it provides those guarantees around store to rq->curr.
3490	*/
3491	static inline void membarrier_switch_mm(struct rq *rq,
3492	struct mm_struct *prev_mm,
3493	struct mm_struct *next_mm)
3494	{
3495	int membarrier_state;
3496
3497	if (prev_mm == next_mm)
3498	return;
3499
3500	membarrier_state = atomic_read(v: &next_mm->membarrier_state);
3501	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
3502	return;
3503
3504	WRITE_ONCE(rq->membarrier_state, membarrier_state);
3505	}
3506
3507	#else /* !CONFIG_MEMBARRIER: */
3508
3509	static inline void membarrier_switch_mm(struct rq *rq,
3510	struct mm_struct *prev_mm,
3511	struct mm_struct *next_mm)
3512	{
3513	}
3514
3515	#endif /* !CONFIG_MEMBARRIER */
3516
3517	static inline bool is_per_cpu_kthread(struct task_struct *p)
3518	{
3519	if (!(p->flags & PF_KTHREAD))
3520	return false;
3521
3522	if (p->nr_cpus_allowed != `1`)
3523	return false;
3524
3525	return true;
3526	}
3527
3528	extern void swake_up_all_locked(struct swait_queue_head *q);
3529	extern void __prepare_to_swait(struct swait_queue_head q, struct* swait_queue *wait);
3530
3531	extern int try_to_wake_up(struct task_struct tsk, unsigned* int state, int wake_flags);
3532
3533	#ifdef CONFIG_PREEMPT_DYNAMIC
3534	extern int preempt_dynamic_mode;
3535	extern int sched_dynamic_mode(const char *str);
3536	extern void sched_dynamic_update(int mode);
3537	#endif
3538	extern const char *preempt_modes[];
3539
3540	#ifdef CONFIG_SCHED_MM_CID
3541
3542	#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
3543	#define MM_CID_SCAN_DELAY 100 /* 100ms */
3544
3545	extern raw_spinlock_t cid_lock;
3546	extern int use_cid_lock;
3547
3548	extern void sched_mm_cid_migrate_from(struct task_struct *t);
3549	extern void sched_mm_cid_migrate_to(struct rq dst_rq, struct* task_struct *t);
3550	extern void task_tick_mm_cid(struct rq rq, struct* task_struct *curr);
3551	extern void init_sched_mm_cid(struct task_struct *t);
3552
3553	static inline void __mm_cid_put(struct mm_struct mm, int* cid)
3554	{
3555	if (cid < `0`)
3556	return;
3557	cpumask_clear_cpu(cpu: cid, dstp: mm_cidmask(mm));
3558	}
3559
3560	/*
3561	* The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
3562	* the MM_CID_UNSET state without holding the rq lock, but the rq lock needs to
3563	* be held to transition to other states.
3564	*
3565	* State transitions synchronized with cmpxchg or try_cmpxchg need to be
3566	* consistent across CPUs, which prevents use of this_cpu_cmpxchg.
3567	*/
3568	static inline void mm_cid_put_lazy(struct task_struct *t)
3569	{
3570	struct mm_struct *mm = t->mm;
3571	struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
3572	int cid;
3573
3574	lockdep_assert_irqs_disabled();
3575	cid = __this_cpu_read(pcpu_cid->cid);
3576	if (!mm_cid_is_lazy_put(cid) \|\|
3577	!try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
3578	return;
3579	__mm_cid_put(mm, cid: mm_cid_clear_lazy_put(cid));
3580	}
3581
3582	static inline int mm_cid_pcpu_unset(struct mm_struct *mm)
3583	{
3584	struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
3585	int cid, res;
3586
3587	lockdep_assert_irqs_disabled();
3588	cid = __this_cpu_read(pcpu_cid->cid);
3589	for (;;) {
3590	if (mm_cid_is_unset(cid))
3591	return MM_CID_UNSET;
3592	/*
3593	* Attempt transition from valid or lazy-put to unset.
3594	*/
3595	res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);
3596	if (res == cid)
3597	break;
3598	cid = res;
3599	}
3600	return cid;
3601	}
3602
3603	static inline void mm_cid_put(struct mm_struct *mm)
3604	{
3605	int cid;
3606
3607	lockdep_assert_irqs_disabled();
3608	cid = mm_cid_pcpu_unset(mm);
3609	if (cid == MM_CID_UNSET)
3610	return;
3611	__mm_cid_put(mm, cid: mm_cid_clear_lazy_put(cid));
3612	}
3613
3614	static inline int __mm_cid_try_get(struct task_struct t, struct* mm_struct *mm)
3615	{
3616	struct cpumask *cidmask = mm_cidmask(mm);
3617	struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
3618	int cid, max_nr_cid, allowed_max_nr_cid;
3619
3620	/*
3621	* After shrinking the number of threads or reducing the number
3622	* of allowed cpus, reduce the value of max_nr_cid so expansion
3623	* of cid allocation will preserve cache locality if the number
3624	* of threads or allowed cpus increase again.
3625	*/
3626	max_nr_cid = atomic_read(v: &mm->max_nr_cid);
3627	while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed),
3628	atomic_read(&mm->mm_users))),
3629	max_nr_cid > allowed_max_nr_cid) {
3630	/ atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. /
3631	if (atomic_try_cmpxchg(v: &mm->max_nr_cid, old: &max_nr_cid, new: allowed_max_nr_cid)) {
3632	max_nr_cid = allowed_max_nr_cid;
3633	break;
3634	}
3635	}
3636	/ Try to re-use recent cid. This improves cache locality. /
3637	cid = __this_cpu_read(pcpu_cid->recent_cid);
3638	if (!mm_cid_is_unset(cid) && cid < max_nr_cid &&
3639	!cpumask_test_and_set_cpu(cpu: cid, cpumask: cidmask))
3640	return cid;
3641	/*
3642	* Expand cid allocation if the maximum number of concurrency
3643	* IDs allocated (max_nr_cid) is below the number cpus allowed
3644	* and number of threads. Expanding cid allocation as much as
3645	* possible improves cache locality.
3646	*/
3647	cid = max_nr_cid;
3648	while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(v: &mm->mm_users)) {
3649	/ atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. /
3650	if (!atomic_try_cmpxchg(v: &mm->max_nr_cid, old: &cid, new: cid + `1`))
3651	continue;
3652	if (!cpumask_test_and_set_cpu(cpu: cid, cpumask: cidmask))
3653	return cid;
3654	}
3655	/*
3656	* Find the first available concurrency id.
3657	* Retry finding first zero bit if the mask is temporarily
3658	* filled. This only happens during concurrent remote-clear
3659	* which owns a cid without holding a rq lock.
3660	*/
3661	for (;;) {
3662	cid = cpumask_first_zero(srcp: cidmask);
3663	if (cid < READ_ONCE(mm->nr_cpus_allowed))
3664	break;
3665	cpu_relax();
3666	}
3667	if (cpumask_test_and_set_cpu(cpu: cid, cpumask: cidmask))
3668	return -`1`;
3669
3670	return cid;
3671	}
3672
3673	/*
3674	* Save a snapshot of the current runqueue time of this cpu
3675	* with the per-cpu cid value, allowing to estimate how recently it was used.
3676	*/
3677	static inline void mm_cid_snapshot_time(struct rq rq, struct* mm_struct *mm)
3678	{
3679	struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));
3680
3681	lockdep_assert_rq_held(rq);
3682	WRITE_ONCE(pcpu_cid->time, rq->clock);
3683	}
3684
3685	static inline int __mm_cid_get(struct rq rq, struct* task_struct *t,
3686	struct mm_struct *mm)
3687	{
3688	int cid;
3689
3690	/*
3691	* All allocations (even those using the cid_lock) are lock-free. If
3692	* use_cid_lock is set, hold the cid_lock to perform cid allocation to
3693	* guarantee forward progress.
3694	*/
3695	if (!READ_ONCE(use_cid_lock)) {
3696	cid = __mm_cid_try_get(t, mm);
3697	if (cid >= `0`)
3698	goto end;
3699	raw_spin_lock(&cid_lock);
3700	} else {
3701	raw_spin_lock(&cid_lock);
3702	cid = __mm_cid_try_get(t, mm);
3703	if (cid >= `0`)
3704	goto unlock;
3705	}
3706
3707	/*
3708	* cid concurrently allocated. Retry while forcing following
3709	* allocations to use the cid_lock to ensure forward progress.
3710	*/
3711	WRITE_ONCE(use_cid_lock, `1`);
3712	/*
3713	* Set use_cid_lock before allocation. Only care about program order
3714	* because this is only required for forward progress.
3715	*/
3716	barrier();
3717	/*
3718	* Retry until it succeeds. It is guaranteed to eventually succeed once
3719	* all newcoming allocations observe the use_cid_lock flag set.
3720	*/
3721	do {
3722	cid = __mm_cid_try_get(t, mm);
3723	cpu_relax();
3724	} while (cid < `0`);
3725	/*
3726	* Allocate before clearing use_cid_lock. Only care about
3727	* program order because this is for forward progress.
3728	*/
3729	barrier();
3730	WRITE_ONCE(use_cid_lock, `0`);
3731	unlock:
3732	raw_spin_unlock(&cid_lock);
3733	end:
3734	mm_cid_snapshot_time(rq, mm);
3735
3736	return cid;
3737	}
3738
3739	static inline int mm_cid_get(struct rq rq, struct* task_struct *t,
3740	struct mm_struct *mm)
3741	{
3742	struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
3743	struct cpumask *cpumask;
3744	int cid;
3745
3746	lockdep_assert_rq_held(rq);
3747	cpumask = mm_cidmask(mm);
3748	cid = __this_cpu_read(pcpu_cid->cid);
3749	if (mm_cid_is_valid(cid)) {
3750	mm_cid_snapshot_time(rq, mm);
3751	return cid;
3752	}
3753	if (mm_cid_is_lazy_put(cid)) {
3754	if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
3755	__mm_cid_put(mm, cid: mm_cid_clear_lazy_put(cid));
3756	}
3757	cid = __mm_cid_get(rq, t, mm);
3758	__this_cpu_write(pcpu_cid->cid, cid);
3759	__this_cpu_write(pcpu_cid->recent_cid, cid);
3760
3761	return cid;
3762	}
3763
3764	static inline void switch_mm_cid(struct rq *rq,
3765	struct task_struct *prev,
3766	struct task_struct *next)
3767	{
3768	/*
3769	* Provide a memory barrier between rq->curr store and load of
3770	* {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.
3771	*
3772	* Should be adapted if context_switch() is modified.
3773	*/
3774	if (!next->mm) { // to kernel
3775	/*
3776	* user -> kernel transition does not guarantee a barrier, but
3777	* we can use the fact that it performs an atomic operation in
3778	* mmgrab().
3779	*/
3780	if (prev->mm) // from user
3781	smp_mb__after_mmgrab();
3782	/*
3783	* kernel -> kernel transition does not change rq->curr->mm
3784	* state. It stays NULL.
3785	*/
3786	} else { // to user
3787	/*
3788	* kernel -> user transition does not provide a barrier
3789	* between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
3790	* Provide it here.
3791	*/
3792	if (!prev->mm) { // from kernel
3793	smp_mb();
3794	} else { // from user
3795	/*
3796	* user->user transition relies on an implicit
3797	* memory barrier in switch_mm() when
3798	* current->mm changes. If the architecture
3799	* switch_mm() does not have an implicit memory
3800	* barrier, it is emitted here. If current->mm
3801	* is unchanged, no barrier is needed.
3802	*/
3803	smp_mb__after_switch_mm();
3804	}
3805	}
3806	if (prev->mm_cid_active) {
3807	mm_cid_snapshot_time(rq, mm: prev->mm);
3808	mm_cid_put_lazy(t: prev);
3809	prev->mm_cid = -`1`;
3810	}
3811	if (next->mm_cid_active)
3812	next->last_mm_cid = next->mm_cid = mm_cid_get(rq, t: next, mm: next->mm);
3813	}
3814
3815	#else /* !CONFIG_SCHED_MM_CID: */
3816	static inline void switch_mm_cid(struct rq rq, struct* task_struct prev, struct* task_struct *next) { }
3817	static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
3818	static inline void sched_mm_cid_migrate_to(struct rq dst_rq, struct* task_struct *t) { }
3819	static inline void task_tick_mm_cid(struct rq rq, struct* task_struct *curr) { }
3820	static inline void init_sched_mm_cid(struct task_struct *t) { }
3821	#endif /* !CONFIG_SCHED_MM_CID */
3822
3823	extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
3824	extern int entity_eligible(struct cfs_rq cfs_rq, struct* sched_entity *se);
3825	static inline
3826	void move_queued_task_locked(struct rq src_rq, struct* rq dst_rq, struct* task_struct *task)
3827	{
3828	lockdep_assert_rq_held(rq: src_rq);
3829	lockdep_assert_rq_held(rq: dst_rq);
3830
3831	deactivate_task(rq: src_rq, p: task, flags: `0`);
3832	set_task_cpu(p: task, cpu: dst_rq->cpu);
3833	activate_task(rq: dst_rq, p: task, flags: `0`);
3834	}
3835
3836	static inline
3837	bool task_is_pushable(struct rq rq, struct* task_struct p, int* cpu)
3838	{
3839	if (!task_on_cpu(rq, p) &&
3840	cpumask_test_cpu(cpu, cpumask: &p->cpus_mask))
3841	return true;
3842
3843	return false;
3844	}
3845
3846	#ifdef CONFIG_RT_MUTEXES
3847
3848	static inline int __rt_effective_prio(struct task_struct pi_task, int* prio)
3849	{
3850	if (pi_task)
3851	prio = min(prio, pi_task->prio);
3852
3853	return prio;
3854	}
3855
3856	static inline int rt_effective_prio(struct task_struct p, int* prio)
3857	{
3858	struct task_struct *pi_task = rt_mutex_get_top_task(p);
3859
3860	return __rt_effective_prio(pi_task, prio);
3861	}
3862
3863	#else /* !CONFIG_RT_MUTEXES: */
3864
3865	static inline int rt_effective_prio(struct task_struct p, int* prio)
3866	{
3867	return prio;
3868	}
3869
3870	#endif /* !CONFIG_RT_MUTEXES */
3871
3872	extern int __sched_setscheduler(struct task_struct p, const* struct sched_attr *attr, bool user, bool pi);
3873	extern int __sched_setaffinity(struct task_struct p, struct* affinity_context *ctx);
3874	extern const struct sched_class __setscheduler_class(int* policy, int prio);
3875	extern void set_load_weight(struct task_struct *p, bool update_load);
3876	extern void enqueue_task(struct rq rq, struct* task_struct p, int* flags);
3877	extern bool dequeue_task(struct rq rq, struct* task_struct p, int* flags);
3878
3879	extern void check_class_changing(struct rq rq, struct* task_struct *p,
3880	const struct sched_class *prev_class);
3881	extern void check_class_changed(struct rq rq, struct* task_struct *p,
3882	const struct sched_class *prev_class,
3883	int oldprio);
3884
3885	extern struct balance_callback splice_balance_callbacks(struct* rq *rq);
3886	extern void balance_callbacks(struct rq rq, struct* balance_callback *head);
3887
3888	#ifdef CONFIG_SCHED_CLASS_EXT
3889	/*
3890	* Used by SCX in the enable/disable paths to move tasks between sched_classes
3891	* and establish invariants.
3892	*/
3893	struct sched_enq_and_set_ctx {
3894	struct task_struct *p;
3895	int queue_flags;
3896	bool queued;
3897	bool running;
3898	};
3899
3900	void sched_deq_and_put_task(struct task_struct p, int* queue_flags,
3901	struct sched_enq_and_set_ctx *ctx);
3902	void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
3903
3904	#endif /* CONFIG_SCHED_CLASS_EXT */
3905
3906	#include "ext.h"
3907
3908	#endif /* _KERNEL_SCHED_SCHED_H */
3909

Browse the source code of Linux/kernel/sched/sched.h